Skip to main content

xso/
minidom_compat.rs

1//! Implementations of traits from this crate for minidom types
2
3// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
4//
5// This Source Code Form is subject to the terms of the Mozilla Public
6// License, v. 2.0. If a copy of the MPL was not distributed with this
7// file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
9use alloc::{
10    borrow::{Cow, ToOwned},
11    boxed::Box,
12    string::String,
13    vec::IntoIter,
14};
15use core::marker::PhantomData;
16
17use minidom::{Element, Node};
18
19use rxml::{parser::EventMetrics, AttrMap, Event, Namespace, NcName, NcNameStr};
20
21use crate::{
22    error::{Error, FromEventsError},
23    rxml_util::{EventToItem, Item},
24    AsXml, Context, FromEventsBuilder, FromXml,
25};
26
27/// State machine for converting a minidom Element into rxml events.
28///
29/// See [`IntoEvents`] for when this is still used.
30enum IntoEventsInner {
31    /// Element header: the element is still intact and we need to generate
32    /// the [`rxml::Event::StartElement`] event from the namespace, name, and
33    /// attributes.
34    Header(Element),
35
36    /// Content: The contents of the element are streamed as events.
37    Nodes {
38        /// Remaining child nodes (text and/or children) to emit.
39        remaining: IntoIter<Node>,
40
41        /// When emitting a child element, this is a nested [`IntoEvents`]
42        /// instance for that child element.
43        nested: Option<Box<IntoEvents>>,
44    },
45
46    /// End of iteration: this state generates an end-of-iterator state.
47    ///
48    /// Note that the [`rxml::Event::EndElement`] event for the element itself
49    /// is generated by the iterator already in the `Nodes` state, when
50    /// `nested` is None and `remaining` returns `None` from its `next()`
51    /// implementation.
52    Fin,
53}
54
55/// Create the parts for a [`rxml::Event::StartElement`] from a
56/// [`minidom::Element`].
57///
58/// Note that this copies the attribute data as well as namespace and name.
59/// This is due to limitations in the [`minidom::Element`] API.
60// NOTE to developers: The limitations are not fully trivial to overcome:
61// the attributes use a BTreeMap internally, which does not offer a `drain`
62// iterator.
63#[deprecated(
64    since = "0.1.3",
65    note = "obsolete since the transition to AsXml. no replacement."
66)]
67// NOTE: instead of deleting this, make it non-pub to be able to continue to
68// use it in IntoEventsInner.
69pub fn make_start_ev_parts(el: &Element) -> Result<(rxml::QName, AttrMap), Error> {
70    let name = NcName::try_from(el.name())?;
71    let namespace = Namespace::from(el.ns());
72
73    let mut attrs = AttrMap::new();
74    for ((namespace, name), value) in el.attrs() {
75        attrs.insert(namespace.clone(), name.clone(), value.to_owned());
76    }
77
78    Ok(((namespace, name), attrs))
79}
80
81impl IntoEventsInner {
82    fn next(&mut self) -> Result<Option<Event>, Error> {
83        match self {
84            IntoEventsInner::Header(ref mut el) => {
85                #[allow(deprecated)]
86                let (qname, attrs) = make_start_ev_parts(el)?;
87                let event = Event::StartElement(EventMetrics::zero(), qname, attrs);
88
89                *self = IntoEventsInner::Nodes {
90                    remaining: el.take_nodes().into_iter(),
91                    nested: None,
92                };
93                Ok(Some(event))
94            }
95            IntoEventsInner::Nodes {
96                ref mut nested,
97                ref mut remaining,
98            } => {
99                loop {
100                    if let Some(nested) = nested.as_mut() {
101                        if let Some(ev) = nested.next() {
102                            return Some(ev).transpose();
103                        }
104                    }
105                    match remaining.next() {
106                        Some(Node::Text(text)) => {
107                            return Ok(Some(Event::Text(EventMetrics::zero(), text)));
108                        }
109                        Some(Node::Element(el)) => {
110                            *nested = Some(Box::new(IntoEvents::new(el)));
111                            // fallthrough to next loop iteration
112                        }
113                        None => {
114                            // end of element, switch state and emit EndElement
115                            *self = IntoEventsInner::Fin;
116                            return Ok(Some(Event::EndElement(EventMetrics::zero())));
117                        }
118                    }
119                }
120            }
121            IntoEventsInner::Fin => Ok(None),
122        }
123    }
124}
125
126/// Convert a [`minidom::Element`] into [`rxml::Event`]s.
127///
128/// This is a helper struct for [`FromEventsViaElement`]. We cannot use
129/// [`ElementAsXml`] there for lifetime reasons: The [`FromXml`] trait does
130/// not allow a lifetime parameter on the `Builder` type or passing arbitrary
131/// data into the builder.
132struct IntoEvents(IntoEventsInner);
133
134impl IntoEvents {
135    fn new(el: Element) -> Self {
136        IntoEvents(IntoEventsInner::Header(el))
137    }
138}
139
140impl Iterator for IntoEvents {
141    type Item = Result<Event, Error>;
142
143    fn next(&mut self) -> Option<Self::Item> {
144        self.0.next().transpose()
145    }
146}
147
148enum AsXmlState<'a> {
149    /// Element header: we need to generate the
150    /// [`Item::ElementHeadStart`] item from the namespace and name.
151    Header { element: &'a Element },
152
153    /// Element header: we now generate the attributes.
154    Attributes {
155        /// The element (needed for the contents later and to access the
156        /// prefix mapping).
157        element: &'a Element,
158
159        /// Attribute iterator.
160        attributes: rxml::xml_map::Iter<'a, String>,
161    },
162
163    /// Content: The contents of the element are streamed as events.
164    Nodes {
165        /// Remaining child nodes (text and/or children) to emit.
166        nodes: minidom::element::Nodes<'a>,
167
168        /// When emitting a child element, this is a nested [`ElementAsXml`]
169        /// instance for that child element.
170        nested: Option<Box<ElementAsXml<'a>>>,
171    },
172}
173
174/// Convert a [`minidom::Element`] to [`Item`][`crate::rxml_util::Item`]s.
175///
176/// This can be constructed from the
177/// [`AsXml::as_xml_iter`][`crate::AsXml::as_xml_iter`]
178/// implementation on [`minidom::Element`].
179pub struct ElementAsXml<'a>(Option<AsXmlState<'a>>);
180
181impl<'a> Iterator for ElementAsXml<'a> {
182    type Item = Result<Item<'a>, Error>;
183
184    fn next(&mut self) -> Option<Self::Item> {
185        match self.0 {
186            None => None,
187            Some(AsXmlState::Header { element }) => {
188                let item = Item::ElementHeadStart(
189                    Namespace::from(element.ns()),
190                    Cow::Borrowed(match <&NcNameStr>::try_from(element.name()) {
191                        Ok(v) => v,
192                        Err(e) => {
193                            self.0 = None;
194                            return Some(Err(e.into()));
195                        }
196                    }),
197                );
198                self.0 = Some(AsXmlState::Attributes {
199                    element,
200                    attributes: element.attrs().iter(),
201                });
202                Some(Ok(item))
203            }
204            Some(AsXmlState::Attributes {
205                ref mut attributes,
206                element,
207            }) => {
208                if let Some(((namespace, name), value)) = attributes.next() {
209                    Some(Ok(Item::Attribute(
210                        namespace.clone(),
211                        Cow::Borrowed(name),
212                        Cow::Borrowed(value),
213                    )))
214                } else {
215                    self.0 = Some(AsXmlState::Nodes {
216                        nodes: element.nodes(),
217                        nested: None,
218                    });
219                    Some(Ok(Item::ElementHeadEnd))
220                }
221            }
222            Some(AsXmlState::Nodes {
223                ref mut nodes,
224                ref mut nested,
225            }) => {
226                if let Some(nested) = nested.as_mut() {
227                    if let Some(next) = nested.next() {
228                        return Some(next);
229                    }
230                }
231                *nested = None;
232                match nodes.next() {
233                    None => {
234                        self.0 = None;
235                        Some(Ok(Item::ElementFoot))
236                    }
237                    Some(minidom::Node::Text(ref text)) => {
238                        Some(Ok(Item::Text(Cow::Borrowed(text))))
239                    }
240                    Some(minidom::Node::Element(ref element)) => {
241                        let mut iter = match element.as_xml_iter() {
242                            Ok(v) => v,
243                            Err(e) => {
244                                self.0 = None;
245                                return Some(Err(e));
246                            }
247                        };
248                        let item = iter.next().unwrap();
249                        *nested = Some(Box::new(iter));
250                        Some(item)
251                    }
252                }
253            }
254        }
255    }
256}
257
258impl AsXml for minidom::Element {
259    type ItemIter<'a> = ElementAsXml<'a>;
260
261    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, Error> {
262        Ok(ElementAsXml(Some(AsXmlState::Header { element: self })))
263    }
264}
265
266/// Construct a [`minidom::Element`] from [`rxml::Event`]s
267///
268/// This can be constructed from the
269/// [`FromXml::from_events`][`crate::FromXml::from_events`]
270/// implementation on [`minidom::Element`].
271pub struct ElementFromEvents {
272    inner: Option<Element>,
273    nested: Option<Box<ElementFromEvents>>,
274}
275
276impl ElementFromEvents {
277    /// Construct a new builder from an element header.
278    ///
279    /// Unlike the [`FromXml::from_events`] implementation on
280    /// [`minidom::Element`], this is contractually infallible. Using this may
281    /// thus save you an `unwrap()` call.
282    pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Self {
283        let mut builder = Element::builder(qname.1, qname.0);
284        for ((namespace, name), value) in attrs.into_iter() {
285            builder = builder.attr_ns(namespace, name, value);
286        }
287
288        let element = builder.build();
289        Self {
290            inner: Some(element),
291            nested: None,
292        }
293    }
294}
295
296impl FromEventsBuilder for ElementFromEvents {
297    type Output = minidom::Element;
298
299    fn feed(&mut self, ev: Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
300        let inner = self
301            .inner
302            .as_mut()
303            .expect("feed() called after it finished");
304        if let Some(nested) = self.nested.as_mut() {
305            match nested.feed(ev, ctx)? {
306                Some(v) => {
307                    inner.append_child(v);
308                    self.nested = None;
309                    return Ok(None);
310                }
311                None => return Ok(None),
312            }
313        }
314        match ev {
315            Event::XmlDeclaration(_, _) => Ok(None),
316            Event::StartElement(_, qname, attrs) => {
317                let nested = match Element::from_events(qname, attrs, ctx) {
318                    Ok(v) => v,
319                    Err(FromEventsError::Invalid(e)) => return Err(e),
320                    Err(FromEventsError::Mismatch { .. }) => {
321                        unreachable!("<Element as FromXml>::from_events should accept everything!")
322                    }
323                };
324                self.nested = Some(Box::new(nested));
325                Ok(None)
326            }
327            Event::Text(_, text) => {
328                inner.append_text_node(text);
329                Ok(None)
330            }
331            Event::EndElement(_) => Ok(Some(self.inner.take().unwrap())),
332        }
333    }
334}
335
336impl FromXml for Element {
337    type Builder = ElementFromEvents;
338
339    fn from_events(
340        qname: rxml::QName,
341        attrs: rxml::AttrMap,
342        _ctx: &Context<'_>,
343    ) -> Result<Self::Builder, FromEventsError> {
344        Ok(Self::Builder::new(qname, attrs))
345    }
346}
347
348/// Helper struct to streamingly parse a struct which implements conversion
349/// from [`minidom::Element`].
350pub struct FromEventsViaElement<T> {
351    inner: ElementFromEvents,
352    // needed here because we need to keep the type `T` around until
353    // `FromEventsBuilder` is done and it must always be the same type, so we
354    // have to nail it down in the struct's type, and to do that we need to
355    // bind it to a field. that's what PhantomData is for.
356    _phantom: PhantomData<T>,
357}
358
359impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsViaElement<T>
360where
361    Error: From<E>,
362{
363    /// Create a new streaming parser for `T`.
364    pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Result<Self, FromEventsError> {
365        Ok(Self {
366            _phantom: PhantomData,
367            inner: Element::from_events(
368                qname,
369                attrs,
370                // FromEventsViaElement does not support passing through
371                // `xml:lang` inheritance, so we don't pass any context.
372                &Context::empty(),
373            )?,
374        })
375    }
376}
377
378impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsBuilder for FromEventsViaElement<T>
379where
380    Error: From<E>,
381{
382    type Output = T;
383
384    fn feed(&mut self, ev: Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
385        match self.inner.feed(ev, ctx) {
386            Ok(Some(v)) => Ok(Some(v.try_into()?)),
387            Ok(None) => Ok(None),
388            Err(e) => Err(e),
389        }
390    }
391}
392
393/// Helper struct to stream a struct which implements conversion
394/// to [`minidom::Element`].
395// NOTE to developers: The 'x lifetime looks unnecessary, but in fact we need
396// it because of the `AsXml` signature. It requires the iterator type to have
397// a lifetime parameter which matches the Item's lifetimes. Even though it
398// would strictly be valid to return something with 'static lifetime when any
399// other lifetime 'a is required, rustc doesn't let us do that.
400pub struct AsItemsViaElement<'x> {
401    iter: EventToItem<IntoEvents>,
402    lifetime_binding: PhantomData<Item<'x>>,
403}
404
405impl AsItemsViaElement<'_> {
406    /// Create a new streaming parser for `T`.
407    pub fn new<E, T>(value: T) -> Result<Self, crate::error::Error>
408    where
409        Error: From<E>,
410        minidom::Element: TryFrom<T, Error = E>,
411    {
412        let element: minidom::Element = value.try_into()?;
413        Ok(Self {
414            iter: EventToItem::new(IntoEvents::new(element)),
415            lifetime_binding: PhantomData,
416        })
417    }
418}
419
420impl<'x> Iterator for AsItemsViaElement<'x> {
421    type Item = Result<Item<'x>, Error>;
422
423    fn next(&mut self) -> Option<Self::Item> {
424        self.iter.next().map(|x| x.map(Item::into_owned))
425    }
426}
427
428#[cfg(test)]
429mod tests {
430    use super::*;
431
432    #[test]
433    fn transform_element_is_equivalent() {
434        let el: Element = "<foo xmlns='urn:a' a='b' c='d'><child a='x'/><child a='y'>some text</child><child xmlns='urn:b'><nested-child/></child></foo>".parse().unwrap();
435        let transformed: Element = crate::transform(&el).unwrap();
436        assert_eq!(el, transformed);
437    }
438}