xso/
minidom_compat.rs

1//! Implementations of traits from this crate for minidom types
2
3// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
4//
5// This Source Code Form is subject to the terms of the Mozilla Public
6// License, v. 2.0. If a copy of the MPL was not distributed with this
7// file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
9use alloc::{
10    borrow::{Cow, ToOwned},
11    boxed::Box,
12    vec::IntoIter,
13};
14use core::marker::PhantomData;
15
16use minidom::{Element, Node};
17
18use rxml::{parser::EventMetrics, AttrMap, Event, Namespace, NcName, NcNameStr};
19
20use crate::{
21    error::{Error, FromEventsError},
22    rxml_util::{EventToItem, Item},
23    AsXml, Context, FromEventsBuilder, FromXml,
24};
25
26/// State machine for converting a minidom Element into rxml events.
27///
28/// See [`IntoEvents`] for when this is still used.
29enum IntoEventsInner {
30    /// Element header: the element is still intact and we need to generate
31    /// the [`rxml::Event::StartElement`] event from the namespace, name, and
32    /// attributes.
33    Header(Element),
34
35    /// Content: The contents of the element are streamed as events.
36    Nodes {
37        /// Remaining child nodes (text and/or children) to emit.
38        remaining: IntoIter<Node>,
39
40        /// When emitting a child element, this is a nested [`IntoEvents`]
41        /// instance for that child element.
42        nested: Option<Box<IntoEvents>>,
43    },
44
45    /// End of iteration: this state generates an end-of-iterator state.
46    ///
47    /// Note that the [`rxml::Event::EndElement`] event for the element itself
48    /// is generated by the iterator already in the `Nodes` state, when
49    /// `nested` is None and `remaining` returns `None` from its `next()`
50    /// implementation.
51    Fin,
52}
53
54/// Create the parts for a [`rxml::Event::StartElement`] from a
55/// [`minidom::Element`].
56///
57/// Note that this copies the attribute data as well as namespace and name.
58/// This is due to limitations in the [`minidom::Element`] API.
59// NOTE to developers: The limitations are not fully trivial to overcome:
60// the attributes use a BTreeMap internally, which does not offer a `drain`
61// iterator.
62#[deprecated(
63    since = "0.1.3",
64    note = "obsolete since the transition to AsXml. no replacement."
65)]
66// NOTE: instead of deleting this, make it non-pub to be able to continue to
67// use it in IntoEventsInner.
68pub fn make_start_ev_parts(el: &Element) -> Result<(rxml::QName, AttrMap), Error> {
69    let name = NcName::try_from(el.name())?;
70    let namespace = Namespace::from(el.ns());
71
72    let mut attrs = AttrMap::new();
73    for ((namespace, name), value) in el.attrs() {
74        attrs.insert(namespace.clone(), name.clone(), value.to_owned());
75    }
76
77    Ok(((namespace, name), attrs))
78}
79
80impl IntoEventsInner {
81    fn next(&mut self) -> Result<Option<Event>, Error> {
82        match self {
83            IntoEventsInner::Header(ref mut el) => {
84                #[allow(deprecated)]
85                let (qname, attrs) = make_start_ev_parts(el)?;
86                let event = Event::StartElement(EventMetrics::zero(), qname, attrs);
87
88                *self = IntoEventsInner::Nodes {
89                    remaining: el.take_nodes().into_iter(),
90                    nested: None,
91                };
92                Ok(Some(event))
93            }
94            IntoEventsInner::Nodes {
95                ref mut nested,
96                ref mut remaining,
97            } => {
98                loop {
99                    if let Some(nested) = nested.as_mut() {
100                        if let Some(ev) = nested.next() {
101                            return Some(ev).transpose();
102                        }
103                    }
104                    match remaining.next() {
105                        Some(Node::Text(text)) => {
106                            return Ok(Some(Event::Text(EventMetrics::zero(), text)));
107                        }
108                        Some(Node::Element(el)) => {
109                            *nested = Some(Box::new(IntoEvents::new(el)));
110                            // fallthrough to next loop iteration
111                        }
112                        None => {
113                            // end of element, switch state and emit EndElement
114                            *self = IntoEventsInner::Fin;
115                            return Ok(Some(Event::EndElement(EventMetrics::zero())));
116                        }
117                    }
118                }
119            }
120            IntoEventsInner::Fin => Ok(None),
121        }
122    }
123}
124
125/// Convert a [`minidom::Element`] into [`rxml::Event`]s.
126///
127/// This is a helper struct for [`FromEventsViaElement`]. We cannot use
128/// [`ElementAsXml`] there for lifetime reasons: The [`FromXml`] trait does
129/// not allow a lifetime parameter on the `Builder` type or passing arbitrary
130/// data into the builder.
131struct IntoEvents(IntoEventsInner);
132
133impl IntoEvents {
134    fn new(el: Element) -> Self {
135        IntoEvents(IntoEventsInner::Header(el))
136    }
137}
138
139impl Iterator for IntoEvents {
140    type Item = Result<Event, Error>;
141
142    fn next(&mut self) -> Option<Self::Item> {
143        self.0.next().transpose()
144    }
145}
146
147enum AsXmlState<'a> {
148    /// Element header: we need to generate the
149    /// [`Item::ElementHeadStart`] item from the namespace and name.
150    Header { element: &'a Element },
151
152    /// Element header: we now generate the attributes.
153    Attributes {
154        /// The element (needed for the contents later and to access the
155        /// prefix mapping).
156        element: &'a Element,
157
158        /// Attribute iterator.
159        attributes: rxml::xml_map::Iter<'a, std::string::String>,
160    },
161
162    /// Content: The contents of the element are streamed as events.
163    Nodes {
164        /// Remaining child nodes (text and/or children) to emit.
165        nodes: minidom::element::Nodes<'a>,
166
167        /// When emitting a child element, this is a nested [`ElementAsXml`]
168        /// instance for that child element.
169        nested: Option<Box<ElementAsXml<'a>>>,
170    },
171}
172
173/// Convert a [`minidom::Element`] to [`Item`][`crate::rxml_util::Item`]s.
174///
175/// This can be constructed from the
176/// [`AsXml::as_xml_iter`][`crate::AsXml::as_xml_iter`]
177/// implementation on [`minidom::Element`].
178pub struct ElementAsXml<'a>(Option<AsXmlState<'a>>);
179
180impl<'a> Iterator for ElementAsXml<'a> {
181    type Item = Result<Item<'a>, Error>;
182
183    fn next(&mut self) -> Option<Self::Item> {
184        match self.0 {
185            None => None,
186            Some(AsXmlState::Header { element }) => {
187                let item = Item::ElementHeadStart(
188                    Namespace::from(element.ns()),
189                    Cow::Borrowed(match <&NcNameStr>::try_from(element.name()) {
190                        Ok(v) => v,
191                        Err(e) => {
192                            self.0 = None;
193                            return Some(Err(e.into()));
194                        }
195                    }),
196                );
197                self.0 = Some(AsXmlState::Attributes {
198                    element,
199                    attributes: element.attrs().iter(),
200                });
201                Some(Ok(item))
202            }
203            Some(AsXmlState::Attributes {
204                ref mut attributes,
205                element,
206            }) => {
207                if let Some(((namespace, name), value)) = attributes.next() {
208                    Some(Ok(Item::Attribute(
209                        namespace.clone(),
210                        Cow::Borrowed(name),
211                        Cow::Borrowed(value),
212                    )))
213                } else {
214                    self.0 = Some(AsXmlState::Nodes {
215                        nodes: element.nodes(),
216                        nested: None,
217                    });
218                    Some(Ok(Item::ElementHeadEnd))
219                }
220            }
221            Some(AsXmlState::Nodes {
222                ref mut nodes,
223                ref mut nested,
224            }) => {
225                if let Some(nested) = nested.as_mut() {
226                    if let Some(next) = nested.next() {
227                        return Some(next);
228                    }
229                }
230                *nested = None;
231                match nodes.next() {
232                    None => {
233                        self.0 = None;
234                        Some(Ok(Item::ElementFoot))
235                    }
236                    Some(minidom::Node::Text(ref text)) => {
237                        Some(Ok(Item::Text(Cow::Borrowed(text))))
238                    }
239                    Some(minidom::Node::Element(ref element)) => {
240                        let mut iter = match element.as_xml_iter() {
241                            Ok(v) => v,
242                            Err(e) => {
243                                self.0 = None;
244                                return Some(Err(e));
245                            }
246                        };
247                        let item = iter.next().unwrap();
248                        *nested = Some(Box::new(iter));
249                        Some(item)
250                    }
251                }
252            }
253        }
254    }
255}
256
257impl AsXml for minidom::Element {
258    type ItemIter<'a> = ElementAsXml<'a>;
259
260    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, Error> {
261        Ok(ElementAsXml(Some(AsXmlState::Header { element: self })))
262    }
263}
264
265/// Construct a [`minidom::Element`] from [`rxml::Event`]s
266///
267/// This can be constructed from the
268/// [`FromXml::from_events`][`crate::FromXml::from_events`]
269/// implementation on [`minidom::Element`].
270pub struct ElementFromEvents {
271    inner: Option<Element>,
272    nested: Option<Box<ElementFromEvents>>,
273}
274
275impl ElementFromEvents {
276    /// Construct a new builder from an element header.
277    ///
278    /// Unlike the [`FromXml::from_events`] implementation on
279    /// [`minidom::Element`], this is contractually infallible. Using this may
280    /// thus save you an `unwrap()` call.
281    pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Self {
282        let mut builder = Element::builder(qname.1, qname.0);
283        for ((namespace, name), value) in attrs.into_iter() {
284            builder = builder.attr_ns(namespace, name, value);
285        }
286
287        let element = builder.build();
288        Self {
289            inner: Some(element),
290            nested: None,
291        }
292    }
293}
294
295impl FromEventsBuilder for ElementFromEvents {
296    type Output = minidom::Element;
297
298    fn feed(&mut self, ev: Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
299        let inner = self
300            .inner
301            .as_mut()
302            .expect("feed() called after it finished");
303        if let Some(nested) = self.nested.as_mut() {
304            match nested.feed(ev, ctx)? {
305                Some(v) => {
306                    inner.append_child(v);
307                    self.nested = None;
308                    return Ok(None);
309                }
310                None => return Ok(None),
311            }
312        }
313        match ev {
314            Event::XmlDeclaration(_, _) => Ok(None),
315            Event::StartElement(_, qname, attrs) => {
316                let nested = match Element::from_events(qname, attrs, ctx) {
317                    Ok(v) => v,
318                    Err(FromEventsError::Invalid(e)) => return Err(e),
319                    Err(FromEventsError::Mismatch { .. }) => {
320                        unreachable!("<Element as FromXml>::from_events should accept everything!")
321                    }
322                };
323                self.nested = Some(Box::new(nested));
324                Ok(None)
325            }
326            Event::Text(_, text) => {
327                inner.append_text_node(text);
328                Ok(None)
329            }
330            Event::EndElement(_) => Ok(Some(self.inner.take().unwrap())),
331        }
332    }
333}
334
335impl FromXml for Element {
336    type Builder = ElementFromEvents;
337
338    fn from_events(
339        qname: rxml::QName,
340        attrs: rxml::AttrMap,
341        _ctx: &Context<'_>,
342    ) -> Result<Self::Builder, FromEventsError> {
343        Ok(Self::Builder::new(qname, attrs))
344    }
345}
346
347/// Helper struct to streamingly parse a struct which implements conversion
348/// from [`minidom::Element`].
349pub struct FromEventsViaElement<T> {
350    inner: ElementFromEvents,
351    // needed here because we need to keep the type `T` around until
352    // `FromEventsBuilder` is done and it must always be the same type, so we
353    // have to nail it down in the struct's type, and to do that we need to
354    // bind it to a field. that's what PhantomData is for.
355    _phantom: PhantomData<T>,
356}
357
358impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsViaElement<T>
359where
360    Error: From<E>,
361{
362    /// Create a new streaming parser for `T`.
363    pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Result<Self, FromEventsError> {
364        Ok(Self {
365            _phantom: PhantomData,
366            inner: Element::from_events(
367                qname,
368                attrs,
369                // FromEventsViaElement does not support passing through
370                // `xml:lang` inheritance, so we don't pass any context.
371                &Context::empty(),
372            )?,
373        })
374    }
375}
376
377impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsBuilder for FromEventsViaElement<T>
378where
379    Error: From<E>,
380{
381    type Output = T;
382
383    fn feed(&mut self, ev: Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
384        match self.inner.feed(ev, ctx) {
385            Ok(Some(v)) => Ok(Some(v.try_into()?)),
386            Ok(None) => Ok(None),
387            Err(e) => Err(e),
388        }
389    }
390}
391
392/// Helper struct to stream a struct which implements conversion
393/// to [`minidom::Element`].
394// NOTE to developers: The 'x lifetime looks unnecessary, but in fact we need
395// it because of the `AsXml` signature. It requires the iterator type to have
396// a lifetime parameter which matches the Item's lifetimes. Even though it
397// would strictly be valid to return something with 'static lifetime when any
398// other lifetime 'a is required, rustc doesn't let us do that.
399pub struct AsItemsViaElement<'x> {
400    iter: EventToItem<IntoEvents>,
401    lifetime_binding: PhantomData<Item<'x>>,
402}
403
404impl AsItemsViaElement<'_> {
405    /// Create a new streaming parser for `T`.
406    pub fn new<E, T>(value: T) -> Result<Self, crate::error::Error>
407    where
408        Error: From<E>,
409        minidom::Element: TryFrom<T, Error = E>,
410    {
411        let element: minidom::Element = value.try_into()?;
412        Ok(Self {
413            iter: EventToItem::new(IntoEvents::new(element)),
414            lifetime_binding: PhantomData,
415        })
416    }
417}
418
419impl<'x> Iterator for AsItemsViaElement<'x> {
420    type Item = Result<Item<'x>, Error>;
421
422    fn next(&mut self) -> Option<Self::Item> {
423        self.iter.next().map(|x| x.map(Item::into_owned))
424    }
425}
426
427#[cfg(test)]
428mod tests {
429    use super::*;
430
431    #[test]
432    fn transform_element_is_equivalent() {
433        let el: Element = "<foo xmlns='urn:a' a='b' c='d'><child a='x'/><child a='y'>some text</child><child xmlns='urn:b'><nested-child/></child></foo>".parse().unwrap();
434        let transformed: Element = crate::transform(&el).unwrap();
435        assert_eq!(el, transformed);
436    }
437}