xso/
minidom_compat.rs

1//! Implementations of traits from this crate for minidom types
2
3// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
4//
5// This Source Code Form is subject to the terms of the Mozilla Public
6// License, v. 2.0. If a copy of the MPL was not distributed with this
7// file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
9use alloc::{
10    borrow::{Cow, ToOwned},
11    boxed::Box,
12    vec::IntoIter,
13};
14use core::marker::PhantomData;
15
16use minidom::{Element, Node};
17
18use rxml::{
19    parser::EventMetrics,
20    writer::{SimpleNamespaces, TrackNamespace},
21    AttrMap, Event, Name, NameStr, Namespace, NcName, NcNameStr,
22};
23
24use crate::{
25    error::{Error, FromEventsError},
26    rxml_util::{EventToItem, Item},
27    AsXml, FromEventsBuilder, FromXml,
28};
29
30/// State machine for converting a minidom Element into rxml events.
31enum IntoEventsInner {
32    /// Element header: the element is still intact and we need to generate
33    /// the [`rxml::Event::StartElement`] event from the namespace, name, and
34    /// attributes.
35    Header(Element),
36
37    /// Content: The contents of the element are streamed as events.
38    Nodes {
39        /// Remaining child nodes (text and/or children) to emit.
40        remaining: IntoIter<Node>,
41
42        /// When emitting a child element, this is a nested [`IntoEvents`]
43        /// instance for that child element.
44        nested: Option<Box<IntoEvents>>,
45    },
46
47    /// End of iteration: this state generates an end-of-iterator state.
48    ///
49    /// Note that the [`rxml::Event::EndElement`] event for the element itself
50    /// is generated by the iterator already in the `Nodes` state, when
51    /// `nested` is None and `remaining` returns `None` from its `next()`
52    /// implementation.
53    Fin,
54}
55
56/// Create the parts for a [`rxml::Event::StartElement`] from a
57/// [`minidom::Element`].
58///
59/// Note that this copies the attribute data as well as namespace and name.
60/// This is due to limitations in the [`minidom::Element`] API.
61// NOTE to developers: The limitations are not fully trivial to overcome:
62// the attributes use a BTreeMap internally, which does not offer a `drain`
63// iterator.
64pub fn make_start_ev_parts(el: &Element) -> Result<(rxml::QName, AttrMap), Error> {
65    let name = NcName::try_from(el.name())?;
66    let namespace = Namespace::from(el.ns());
67
68    let mut attrs = AttrMap::new();
69    for (name, value) in el.attrs() {
70        let name = Name::try_from(name)?;
71        let (prefix, name) = name.split_name()?;
72        let namespace = if let Some(prefix) = prefix {
73            if prefix == "xml" {
74                Namespace::XML
75            } else {
76                let ns = match el.prefixes.get(&Some(prefix.into())) {
77                    Some(v) => v,
78                    None => {
79                        panic!("undeclared xml namespace prefix in minidom::Element")
80                    }
81                };
82                Namespace::from(ns.to_owned())
83            }
84        } else {
85            Namespace::NONE
86        };
87
88        attrs.insert(namespace, name, value.to_owned());
89    }
90
91    Ok(((namespace, name), attrs))
92}
93
94impl IntoEventsInner {
95    fn next(&mut self) -> Result<Option<Event>, Error> {
96        match self {
97            IntoEventsInner::Header(ref mut el) => {
98                let (qname, attrs) = make_start_ev_parts(el)?;
99                let event = Event::StartElement(EventMetrics::zero(), qname, attrs);
100
101                *self = IntoEventsInner::Nodes {
102                    remaining: el.take_nodes().into_iter(),
103                    nested: None,
104                };
105                Ok(Some(event))
106            }
107            IntoEventsInner::Nodes {
108                ref mut nested,
109                ref mut remaining,
110            } => {
111                loop {
112                    if let Some(nested) = nested.as_mut() {
113                        if let Some(ev) = nested.next() {
114                            return Some(ev).transpose();
115                        }
116                    }
117                    match remaining.next() {
118                        Some(Node::Text(text)) => {
119                            return Ok(Some(Event::Text(EventMetrics::zero(), text)));
120                        }
121                        Some(Node::Element(el)) => {
122                            *nested = Some(Box::new(IntoEvents::new(el)));
123                            // fallthrough to next loop iteration
124                        }
125                        None => {
126                            // end of element, switch state and emit EndElement
127                            *self = IntoEventsInner::Fin;
128                            return Ok(Some(Event::EndElement(EventMetrics::zero())));
129                        }
130                    }
131                }
132            }
133            IntoEventsInner::Fin => Ok(None),
134        }
135    }
136}
137
138/// Convert a [`minidom::Element`] into [`rxml::Event`]s.
139///
140/// This can be constructed from the
141/// [`IntoXml::into_event_iter`][`crate::IntoXml::into_event_iter`]
142/// implementation on [`minidom::Element`].
143struct IntoEvents(IntoEventsInner);
144
145impl IntoEvents {
146    fn new(el: Element) -> Self {
147        IntoEvents(IntoEventsInner::Header(el))
148    }
149}
150
151impl Iterator for IntoEvents {
152    type Item = Result<Event, Error>;
153
154    fn next(&mut self) -> Option<Self::Item> {
155        self.0.next().transpose()
156    }
157}
158
159enum AsXmlState<'a> {
160    /// Element header: we need to generate the
161    /// [`Item::ElementHeadStart`] item from the namespace and name.
162    Header { element: &'a Element },
163
164    /// Element header: we now generate the attributes.
165    Attributes {
166        /// The element (needed for the contents later and to access the
167        /// prefix mapping).
168        element: &'a Element,
169
170        /// Attribute iterator.
171        attributes: minidom::element::Attrs<'a>,
172    },
173
174    /// Content: The contents of the element are streamed as events.
175    Nodes {
176        /// Remaining child nodes (text and/or children) to emit.
177        nodes: minidom::element::Nodes<'a>,
178
179        /// When emitting a child element, this is a nested [`IntoEvents`]
180        /// instance for that child element.
181        nested: Option<Box<ElementAsXml<'a>>>,
182    },
183}
184
185/// Convert a [`minidom::Element`] to [`Item`][`crate::rxml_util::Item`]s.
186///
187/// This can be constructed from the
188/// [`AsXml::as_xml_iter`][`crate::AsXml::as_xml_iter`]
189/// implementation on [`minidom::Element`].
190pub struct ElementAsXml<'a>(Option<AsXmlState<'a>>);
191
192impl<'a> Iterator for ElementAsXml<'a> {
193    type Item = Result<Item<'a>, Error>;
194
195    fn next(&mut self) -> Option<Self::Item> {
196        match self.0 {
197            None => None,
198            Some(AsXmlState::Header { ref element }) => {
199                let item = Item::ElementHeadStart(
200                    Namespace::from(element.ns()),
201                    Cow::Borrowed(match <&NcNameStr>::try_from(element.name()) {
202                        Ok(v) => v,
203                        Err(e) => {
204                            self.0 = None;
205                            return Some(Err(e.into()));
206                        }
207                    }),
208                );
209                self.0 = Some(AsXmlState::Attributes {
210                    element,
211                    attributes: element.attrs(),
212                });
213                Some(Ok(item))
214            }
215            Some(AsXmlState::Attributes {
216                ref mut attributes,
217                ref element,
218            }) => {
219                if let Some((name, value)) = attributes.next() {
220                    let name = match <&NameStr>::try_from(name) {
221                        Ok(v) => v,
222                        Err(e) => {
223                            self.0 = None;
224                            return Some(Err(e.into()));
225                        }
226                    };
227                    let (prefix, name) = match name.split_name() {
228                        Ok(v) => v,
229                        Err(e) => {
230                            self.0 = None;
231                            return Some(Err(e.into()));
232                        }
233                    };
234                    let namespace = if let Some(prefix) = prefix {
235                        if prefix == "xml" {
236                            Namespace::XML
237                        } else {
238                            let ns = match element.prefixes.get(&Some(prefix.as_str().to_owned())) {
239                                Some(v) => v,
240                                None => {
241                                    panic!("undeclared xml namespace prefix in minidom::Element")
242                                }
243                            };
244                            Namespace::from(ns.to_owned())
245                        }
246                    } else {
247                        Namespace::NONE
248                    };
249                    Some(Ok(Item::Attribute(
250                        namespace,
251                        Cow::Borrowed(name),
252                        Cow::Borrowed(value),
253                    )))
254                } else {
255                    self.0 = Some(AsXmlState::Nodes {
256                        nodes: element.nodes(),
257                        nested: None,
258                    });
259                    Some(Ok(Item::ElementHeadEnd))
260                }
261            }
262            Some(AsXmlState::Nodes {
263                ref mut nodes,
264                ref mut nested,
265            }) => {
266                if let Some(nested) = nested.as_mut() {
267                    if let Some(next) = nested.next() {
268                        return Some(next);
269                    }
270                }
271                *nested = None;
272                match nodes.next() {
273                    None => {
274                        self.0 = None;
275                        Some(Ok(Item::ElementFoot))
276                    }
277                    Some(minidom::Node::Text(ref text)) => {
278                        Some(Ok(Item::Text(Cow::Borrowed(text))))
279                    }
280                    Some(minidom::Node::Element(ref element)) => {
281                        let mut iter = match element.as_xml_iter() {
282                            Ok(v) => v,
283                            Err(e) => {
284                                self.0 = None;
285                                return Some(Err(e.into()));
286                            }
287                        };
288                        let item = iter.next().unwrap();
289                        *nested = Some(Box::new(iter));
290                        Some(item)
291                    }
292                }
293            }
294        }
295    }
296}
297
298impl AsXml for minidom::Element {
299    type ItemIter<'a> = ElementAsXml<'a>;
300
301    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, Error> {
302        Ok(ElementAsXml(Some(AsXmlState::Header { element: self })))
303    }
304}
305
306/// Construct a [`minidom::Element`] from [`rxml::Event`]s
307///
308/// This can be constructed from the
309/// [`FromXml::from_events`][`crate::FromXml::from_events`]
310/// implementation on [`minidom::Element`].
311pub struct ElementFromEvents {
312    inner: Option<Element>,
313    nested: Option<Box<ElementFromEvents>>,
314}
315
316impl ElementFromEvents {
317    /// Construct a new builder from an element header.
318    ///
319    /// Unlike the [`FromXml::from_events`] implementation on
320    /// [`minidom::Element`], this is contractually infallible. Using this may
321    /// thus save you an `unwrap()` call.
322    pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Self {
323        let mut prefixes = SimpleNamespaces::new();
324        let mut builder = Element::builder(qname.1, qname.0);
325        for ((namespace, name), value) in attrs.into_iter() {
326            if namespace.is_none() {
327                builder = builder.attr(name, value);
328            } else {
329                let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone());
330                let name = prefix.with_suffix(&name);
331                if is_new {
332                    builder = builder
333                        .prefix(
334                            Some(prefix.as_str().to_owned()),
335                            namespace.as_str().to_owned(),
336                        )
337                        .unwrap();
338                }
339                builder = builder.attr(name, value);
340            }
341        }
342
343        let element = builder.build();
344        Self {
345            inner: Some(element),
346            nested: None,
347        }
348    }
349}
350
351impl FromEventsBuilder for ElementFromEvents {
352    type Output = minidom::Element;
353
354    fn feed(&mut self, ev: Event) -> Result<Option<Self::Output>, Error> {
355        let inner = self
356            .inner
357            .as_mut()
358            .expect("feed() called after it finished");
359        if let Some(nested) = self.nested.as_mut() {
360            match nested.feed(ev)? {
361                Some(v) => {
362                    inner.append_child(v);
363                    self.nested = None;
364                    return Ok(None);
365                }
366                None => return Ok(None),
367            }
368        }
369        match ev {
370            Event::XmlDeclaration(_, _) => Ok(None),
371            Event::StartElement(_, qname, attrs) => {
372                let nested = match Element::from_events(qname, attrs) {
373                    Ok(v) => v,
374                    Err(FromEventsError::Invalid(e)) => return Err(e),
375                    Err(FromEventsError::Mismatch { .. }) => {
376                        unreachable!("<Element as FromXml>::from_events should accept everything!")
377                    }
378                };
379                self.nested = Some(Box::new(nested));
380                Ok(None)
381            }
382            Event::Text(_, text) => {
383                inner.append_text_node(text);
384                Ok(None)
385            }
386            Event::EndElement(_) => Ok(Some(self.inner.take().unwrap())),
387        }
388    }
389}
390
391impl FromXml for Element {
392    type Builder = ElementFromEvents;
393
394    fn from_events(
395        qname: rxml::QName,
396        attrs: rxml::AttrMap,
397    ) -> Result<Self::Builder, FromEventsError> {
398        Ok(Self::Builder::new(qname, attrs))
399    }
400}
401
402/// Helper struct to streamingly parse a struct which implements conversion
403/// from [`minidom::Element`].
404pub struct FromEventsViaElement<T> {
405    inner: ElementFromEvents,
406    // needed here because we need to keep the type `T` around until
407    // `FromEventsBuilder` is done and it must always be the same type, so we
408    // have to nail it down in the struct's type, and to do that we need to
409    // bind it to a field. that's what PhantomData is for.
410    _phantom: PhantomData<T>,
411}
412
413impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsViaElement<T>
414where
415    Error: From<E>,
416{
417    /// Create a new streaming parser for `T`.
418    pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Result<Self, FromEventsError> {
419        Ok(Self {
420            _phantom: PhantomData,
421            inner: Element::from_events(qname, attrs)?,
422        })
423    }
424}
425
426impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsBuilder for FromEventsViaElement<T>
427where
428    Error: From<E>,
429{
430    type Output = T;
431
432    fn feed(&mut self, ev: Event) -> Result<Option<Self::Output>, Error> {
433        match self.inner.feed(ev) {
434            Ok(Some(v)) => Ok(Some(v.try_into()?)),
435            Ok(None) => Ok(None),
436            Err(e) => Err(e),
437        }
438    }
439}
440
441/// Helper struct to stream a struct which implements conversion
442/// to [`minidom::Element`].
443pub struct AsItemsViaElement<'x> {
444    iter: EventToItem<IntoEvents>,
445    lifetime_binding: PhantomData<Item<'x>>,
446}
447
448impl<'x> AsItemsViaElement<'x> {
449    /// Create a new streaming parser for `T`.
450    pub fn new<E, T>(value: T) -> Result<Self, crate::error::Error>
451    where
452        Error: From<E>,
453        minidom::Element: TryFrom<T, Error = E>,
454    {
455        let element: minidom::Element = value.try_into()?;
456        Ok(Self {
457            iter: EventToItem::new(IntoEvents::new(element)),
458            lifetime_binding: PhantomData,
459        })
460    }
461}
462
463impl<'x> Iterator for AsItemsViaElement<'x> {
464    type Item = Result<Item<'x>, Error>;
465
466    fn next(&mut self) -> Option<Self::Item> {
467        self.iter.next().map(|x| x.map(Item::into_owned))
468    }
469}
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474
475    #[test]
476    fn transform_element_is_equivalent() {
477        let el: Element = "<foo xmlns='urn:a' a='b' c='d'><child a='x'/><child a='y'>some text</child><child xmlns='urn:b'><nested-child/></child></foo>".parse().unwrap();
478        let transformed: Element = crate::transform(&el).unwrap();
479        assert_eq!(el, transformed);
480    }
481}