xso/
minidom_compat.rs

1//! Implementations of traits from this crate for minidom types
2
3// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
4//
5// This Source Code Form is subject to the terms of the Mozilla Public
6// License, v. 2.0. If a copy of the MPL was not distributed with this
7// file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
9use alloc::{
10    borrow::{Cow, ToOwned},
11    boxed::Box,
12    vec::IntoIter,
13};
14use core::marker::PhantomData;
15
16use minidom::{Element, Node};
17
18use rxml::{
19    parser::EventMetrics,
20    writer::{SimpleNamespaces, TrackNamespace},
21    AttrMap, Event, Name, NameStr, Namespace, NcName, NcNameStr,
22};
23
24use crate::{
25    error::{Error, FromEventsError},
26    rxml_util::{EventToItem, Item},
27    AsXml, Context, FromEventsBuilder, FromXml,
28};
29
30/// State machine for converting a minidom Element into rxml events.
31///
32/// See [`IntoEvents`] for when this is still used.
33enum IntoEventsInner {
34    /// Element header: the element is still intact and we need to generate
35    /// the [`rxml::Event::StartElement`] event from the namespace, name, and
36    /// attributes.
37    Header(Element),
38
39    /// Content: The contents of the element are streamed as events.
40    Nodes {
41        /// Remaining child nodes (text and/or children) to emit.
42        remaining: IntoIter<Node>,
43
44        /// When emitting a child element, this is a nested [`IntoEvents`]
45        /// instance for that child element.
46        nested: Option<Box<IntoEvents>>,
47    },
48
49    /// End of iteration: this state generates an end-of-iterator state.
50    ///
51    /// Note that the [`rxml::Event::EndElement`] event for the element itself
52    /// is generated by the iterator already in the `Nodes` state, when
53    /// `nested` is None and `remaining` returns `None` from its `next()`
54    /// implementation.
55    Fin,
56}
57
58/// Create the parts for a [`rxml::Event::StartElement`] from a
59/// [`minidom::Element`].
60///
61/// Note that this copies the attribute data as well as namespace and name.
62/// This is due to limitations in the [`minidom::Element`] API.
63// NOTE to developers: The limitations are not fully trivial to overcome:
64// the attributes use a BTreeMap internally, which does not offer a `drain`
65// iterator.
66#[deprecated(
67    since = "0.1.3",
68    note = "obsolete since the transition to AsXml. no replacement."
69)]
70// NOTE: instead of deleting this, make it non-pub to be able to continue to
71// use it in IntoEventsInner.
72pub fn make_start_ev_parts(el: &Element) -> Result<(rxml::QName, AttrMap), Error> {
73    let name = NcName::try_from(el.name())?;
74    let namespace = Namespace::from(el.ns());
75
76    let mut attrs = AttrMap::new();
77    for (name, value) in el.attrs() {
78        let name = Name::try_from(name)?;
79        let (prefix, name) = name.split_name()?;
80        let namespace = if let Some(prefix) = prefix {
81            if prefix == "xml" {
82                Namespace::XML
83            } else {
84                let ns = match el.prefixes.get(&Some(prefix.into())) {
85                    Some(v) => v,
86                    None => {
87                        panic!("undeclared xml namespace prefix in minidom::Element")
88                    }
89                };
90                Namespace::from(ns.to_owned())
91            }
92        } else {
93            Namespace::NONE
94        };
95
96        attrs.insert(namespace, name, value.to_owned());
97    }
98
99    Ok(((namespace, name), attrs))
100}
101
102impl IntoEventsInner {
103    fn next(&mut self) -> Result<Option<Event>, Error> {
104        match self {
105            IntoEventsInner::Header(ref mut el) => {
106                #[allow(deprecated)]
107                let (qname, attrs) = make_start_ev_parts(el)?;
108                let event = Event::StartElement(EventMetrics::zero(), qname, attrs);
109
110                *self = IntoEventsInner::Nodes {
111                    remaining: el.take_nodes().into_iter(),
112                    nested: None,
113                };
114                Ok(Some(event))
115            }
116            IntoEventsInner::Nodes {
117                ref mut nested,
118                ref mut remaining,
119            } => {
120                loop {
121                    if let Some(nested) = nested.as_mut() {
122                        if let Some(ev) = nested.next() {
123                            return Some(ev).transpose();
124                        }
125                    }
126                    match remaining.next() {
127                        Some(Node::Text(text)) => {
128                            return Ok(Some(Event::Text(EventMetrics::zero(), text)));
129                        }
130                        Some(Node::Element(el)) => {
131                            *nested = Some(Box::new(IntoEvents::new(el)));
132                            // fallthrough to next loop iteration
133                        }
134                        None => {
135                            // end of element, switch state and emit EndElement
136                            *self = IntoEventsInner::Fin;
137                            return Ok(Some(Event::EndElement(EventMetrics::zero())));
138                        }
139                    }
140                }
141            }
142            IntoEventsInner::Fin => Ok(None),
143        }
144    }
145}
146
147/// Convert a [`minidom::Element`] into [`rxml::Event`]s.
148///
149/// This is a helper struct for [`FromEventsViaElement`]. We cannot use
150/// [`ElementAsXml`] there for lifetime reasons: The [`FromXml`] trait does
151/// not allow a lifetime parameter on the `Builder` type or passing arbitrary
152/// data into the builder.
153struct IntoEvents(IntoEventsInner);
154
155impl IntoEvents {
156    fn new(el: Element) -> Self {
157        IntoEvents(IntoEventsInner::Header(el))
158    }
159}
160
161impl Iterator for IntoEvents {
162    type Item = Result<Event, Error>;
163
164    fn next(&mut self) -> Option<Self::Item> {
165        self.0.next().transpose()
166    }
167}
168
169enum AsXmlState<'a> {
170    /// Element header: we need to generate the
171    /// [`Item::ElementHeadStart`] item from the namespace and name.
172    Header { element: &'a Element },
173
174    /// Element header: we now generate the attributes.
175    Attributes {
176        /// The element (needed for the contents later and to access the
177        /// prefix mapping).
178        element: &'a Element,
179
180        /// Attribute iterator.
181        attributes: minidom::element::Attrs<'a>,
182    },
183
184    /// Content: The contents of the element are streamed as events.
185    Nodes {
186        /// Remaining child nodes (text and/or children) to emit.
187        nodes: minidom::element::Nodes<'a>,
188
189        /// When emitting a child element, this is a nested [`ElementAsXml`]
190        /// instance for that child element.
191        nested: Option<Box<ElementAsXml<'a>>>,
192    },
193}
194
195/// Convert a [`minidom::Element`] to [`Item`][`crate::rxml_util::Item`]s.
196///
197/// This can be constructed from the
198/// [`AsXml::as_xml_iter`][`crate::AsXml::as_xml_iter`]
199/// implementation on [`minidom::Element`].
200pub struct ElementAsXml<'a>(Option<AsXmlState<'a>>);
201
202impl<'a> Iterator for ElementAsXml<'a> {
203    type Item = Result<Item<'a>, Error>;
204
205    fn next(&mut self) -> Option<Self::Item> {
206        match self.0 {
207            None => None,
208            Some(AsXmlState::Header { element }) => {
209                let item = Item::ElementHeadStart(
210                    Namespace::from(element.ns()),
211                    Cow::Borrowed(match <&NcNameStr>::try_from(element.name()) {
212                        Ok(v) => v,
213                        Err(e) => {
214                            self.0 = None;
215                            return Some(Err(e.into()));
216                        }
217                    }),
218                );
219                self.0 = Some(AsXmlState::Attributes {
220                    element,
221                    attributes: element.attrs(),
222                });
223                Some(Ok(item))
224            }
225            Some(AsXmlState::Attributes {
226                ref mut attributes,
227                element,
228            }) => {
229                if let Some((name, value)) = attributes.next() {
230                    let name = match <&NameStr>::try_from(name) {
231                        Ok(v) => v,
232                        Err(e) => {
233                            self.0 = None;
234                            return Some(Err(e.into()));
235                        }
236                    };
237                    let (prefix, name) = match name.split_name() {
238                        Ok(v) => v,
239                        Err(e) => {
240                            self.0 = None;
241                            return Some(Err(e.into()));
242                        }
243                    };
244                    let namespace = if let Some(prefix) = prefix {
245                        if prefix == "xml" {
246                            Namespace::XML
247                        } else {
248                            let ns = match element.prefixes.get(&Some(prefix.as_str().to_owned())) {
249                                Some(v) => v,
250                                None => {
251                                    panic!("undeclared xml namespace prefix in minidom::Element")
252                                }
253                            };
254                            Namespace::from(ns.to_owned())
255                        }
256                    } else {
257                        Namespace::NONE
258                    };
259                    Some(Ok(Item::Attribute(
260                        namespace,
261                        Cow::Borrowed(name),
262                        Cow::Borrowed(value),
263                    )))
264                } else {
265                    self.0 = Some(AsXmlState::Nodes {
266                        nodes: element.nodes(),
267                        nested: None,
268                    });
269                    Some(Ok(Item::ElementHeadEnd))
270                }
271            }
272            Some(AsXmlState::Nodes {
273                ref mut nodes,
274                ref mut nested,
275            }) => {
276                if let Some(nested) = nested.as_mut() {
277                    if let Some(next) = nested.next() {
278                        return Some(next);
279                    }
280                }
281                *nested = None;
282                match nodes.next() {
283                    None => {
284                        self.0 = None;
285                        Some(Ok(Item::ElementFoot))
286                    }
287                    Some(minidom::Node::Text(ref text)) => {
288                        Some(Ok(Item::Text(Cow::Borrowed(text))))
289                    }
290                    Some(minidom::Node::Element(ref element)) => {
291                        let mut iter = match element.as_xml_iter() {
292                            Ok(v) => v,
293                            Err(e) => {
294                                self.0 = None;
295                                return Some(Err(e));
296                            }
297                        };
298                        let item = iter.next().unwrap();
299                        *nested = Some(Box::new(iter));
300                        Some(item)
301                    }
302                }
303            }
304        }
305    }
306}
307
308impl AsXml for minidom::Element {
309    type ItemIter<'a> = ElementAsXml<'a>;
310
311    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, Error> {
312        Ok(ElementAsXml(Some(AsXmlState::Header { element: self })))
313    }
314}
315
316/// Construct a [`minidom::Element`] from [`rxml::Event`]s
317///
318/// This can be constructed from the
319/// [`FromXml::from_events`][`crate::FromXml::from_events`]
320/// implementation on [`minidom::Element`].
321pub struct ElementFromEvents {
322    inner: Option<Element>,
323    nested: Option<Box<ElementFromEvents>>,
324}
325
326impl ElementFromEvents {
327    /// Construct a new builder from an element header.
328    ///
329    /// Unlike the [`FromXml::from_events`] implementation on
330    /// [`minidom::Element`], this is contractually infallible. Using this may
331    /// thus save you an `unwrap()` call.
332    pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Self {
333        let mut prefixes = SimpleNamespaces::new();
334        let mut builder = Element::builder(qname.1, qname.0);
335        for ((namespace, name), value) in attrs.into_iter() {
336            if namespace.is_none() {
337                builder = builder.attr(name, value);
338            } else {
339                let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone());
340                let name = prefix.with_suffix(&name);
341                if is_new {
342                    builder = builder
343                        .prefix(
344                            Some(prefix.as_str().to_owned()),
345                            namespace.as_str().to_owned(),
346                        )
347                        .unwrap();
348                }
349                builder = builder.attr(name, value);
350            }
351        }
352
353        let element = builder.build();
354        Self {
355            inner: Some(element),
356            nested: None,
357        }
358    }
359}
360
361impl FromEventsBuilder for ElementFromEvents {
362    type Output = minidom::Element;
363
364    fn feed(&mut self, ev: Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
365        let inner = self
366            .inner
367            .as_mut()
368            .expect("feed() called after it finished");
369        if let Some(nested) = self.nested.as_mut() {
370            match nested.feed(ev, ctx)? {
371                Some(v) => {
372                    inner.append_child(v);
373                    self.nested = None;
374                    return Ok(None);
375                }
376                None => return Ok(None),
377            }
378        }
379        match ev {
380            Event::XmlDeclaration(_, _) => Ok(None),
381            Event::StartElement(_, qname, attrs) => {
382                let nested = match Element::from_events(qname, attrs, ctx) {
383                    Ok(v) => v,
384                    Err(FromEventsError::Invalid(e)) => return Err(e),
385                    Err(FromEventsError::Mismatch { .. }) => {
386                        unreachable!("<Element as FromXml>::from_events should accept everything!")
387                    }
388                };
389                self.nested = Some(Box::new(nested));
390                Ok(None)
391            }
392            Event::Text(_, text) => {
393                inner.append_text_node(text);
394                Ok(None)
395            }
396            Event::EndElement(_) => Ok(Some(self.inner.take().unwrap())),
397        }
398    }
399}
400
401impl FromXml for Element {
402    type Builder = ElementFromEvents;
403
404    fn from_events(
405        qname: rxml::QName,
406        attrs: rxml::AttrMap,
407        _ctx: &Context<'_>,
408    ) -> Result<Self::Builder, FromEventsError> {
409        Ok(Self::Builder::new(qname, attrs))
410    }
411}
412
413/// Helper struct to streamingly parse a struct which implements conversion
414/// from [`minidom::Element`].
415pub struct FromEventsViaElement<T> {
416    inner: ElementFromEvents,
417    // needed here because we need to keep the type `T` around until
418    // `FromEventsBuilder` is done and it must always be the same type, so we
419    // have to nail it down in the struct's type, and to do that we need to
420    // bind it to a field. that's what PhantomData is for.
421    _phantom: PhantomData<T>,
422}
423
424impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsViaElement<T>
425where
426    Error: From<E>,
427{
428    /// Create a new streaming parser for `T`.
429    pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Result<Self, FromEventsError> {
430        Ok(Self {
431            _phantom: PhantomData,
432            inner: Element::from_events(
433                qname,
434                attrs,
435                // FromEventsViaElement does not support passing through
436                // `xml:lang` inheritance, so we don't pass any context.
437                &Context::empty(),
438            )?,
439        })
440    }
441}
442
443impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsBuilder for FromEventsViaElement<T>
444where
445    Error: From<E>,
446{
447    type Output = T;
448
449    fn feed(&mut self, ev: Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
450        match self.inner.feed(ev, ctx) {
451            Ok(Some(v)) => Ok(Some(v.try_into()?)),
452            Ok(None) => Ok(None),
453            Err(e) => Err(e),
454        }
455    }
456}
457
458/// Helper struct to stream a struct which implements conversion
459/// to [`minidom::Element`].
460// NOTE to developers: The 'x lifetime looks unnecessary, but in fact we need
461// it because of the `AsXml` signature. It requires the iterator type to have
462// a lifetime parameter which matches the Item's lifetimes. Even though it
463// would strictly be valid to return something with 'static lifetime when any
464// other lifetime 'a is required, rustc doesn't let us do that.
465pub struct AsItemsViaElement<'x> {
466    iter: EventToItem<IntoEvents>,
467    lifetime_binding: PhantomData<Item<'x>>,
468}
469
470impl AsItemsViaElement<'_> {
471    /// Create a new streaming parser for `T`.
472    pub fn new<E, T>(value: T) -> Result<Self, crate::error::Error>
473    where
474        Error: From<E>,
475        minidom::Element: TryFrom<T, Error = E>,
476    {
477        let element: minidom::Element = value.try_into()?;
478        Ok(Self {
479            iter: EventToItem::new(IntoEvents::new(element)),
480            lifetime_binding: PhantomData,
481        })
482    }
483}
484
485impl<'x> Iterator for AsItemsViaElement<'x> {
486    type Item = Result<Item<'x>, Error>;
487
488    fn next(&mut self) -> Option<Self::Item> {
489        self.iter.next().map(|x| x.map(Item::into_owned))
490    }
491}
492
493#[cfg(test)]
494mod tests {
495    use super::*;
496
497    #[test]
498    fn transform_element_is_equivalent() {
499        let el: Element = "<foo xmlns='urn:a' a='b' c='d'><child a='x'/><child a='y'>some text</child><child xmlns='urn:b'><nested-child/></child></foo>".parse().unwrap();
500        let transformed: Element = crate::transform(&el).unwrap();
501        assert_eq!(el, transformed);
502    }
503}