xso/
fromxml.rs

1//! # Generic builder type implementations
2//!
3//! This module contains [`FromEventsBuilder`] implementations for types from
4//! foreign libraries (such as the standard library).
5//!
6//! In order to not clutter the `xso` crate's main namespace, they are
7//! stashed away in a separate module.
8
9// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
10//
11// This Source Code Form is subject to the terms of the Mozilla Public
12// License, v. 2.0. If a copy of the MPL was not distributed with this
13// file, You can obtain one at http://mozilla.org/MPL/2.0/.
14
15use alloc::boxed::Box;
16
17use crate::error::{Error, FromEventsError};
18use crate::{FromEventsBuilder, FromXml};
19
20/// # Parsing context for [`FromEventsBuilder`]
21///
22/// For the most part, [`FromEventsBuilder`] implementations can work with
23/// only the information inside the [`rxml::Event`] which is delivered to
24/// them (and any information they may have stored from previous events).
25///
26/// However, there is (currently) one special case: the `xml:lang` attribute.
27/// That attribute is inherited across the entire document tree hierarchy. If
28/// the parsed element is not the top-level element, there may be an implicit
29/// value for `xml:lang`.
30#[derive(Debug)]
31#[doc(hidden)]
32pub struct Context<'x> {
33    language: Option<&'x str>,
34}
35
36impl<'x> Context<'x> {
37    /// A context suitable for the beginning of the document.
38    ///
39    /// `xml:lang` is assumed to be unset.
40    pub fn empty() -> Self {
41        Self { language: None }
42    }
43
44    /// Set the effective `xml:lang` value on the context and return it.
45    pub fn with_language(mut self, language: Option<&'x str>) -> Self {
46        self.language = language;
47        self
48    }
49
50    /// Return the `xml:lang` value in effect at the end of the event which
51    /// is currently being processed.
52    pub fn language(&self) -> Option<&str> {
53        self.language.as_deref()
54    }
55}
56
57/// Helper struct to construct an `Option<T>` from XML events.
58pub struct OptionBuilder<T: FromEventsBuilder>(T);
59
60impl<T: FromEventsBuilder> FromEventsBuilder for OptionBuilder<T> {
61    type Output = Option<T::Output>;
62
63    fn feed(&mut self, ev: rxml::Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
64        self.0.feed(ev, ctx).map(|ok| ok.map(Some))
65    }
66}
67
68/// Parsers `T` into `Some(.)`.
69///
70/// Note that this never generates `None`: The main use case is to allow
71/// external (i.e. without calling `from_events`) defaulting to `None` and
72/// for optional serialisation (the [`AsXml`][`crate::AsXml`] implementation
73/// on `Option<T>` emits nothing for `None`).
74impl<T: FromXml> FromXml for Option<T> {
75    type Builder = OptionBuilder<T::Builder>;
76
77    fn from_events(
78        name: rxml::QName,
79        attrs: rxml::AttrMap,
80        ctx: &Context<'_>,
81    ) -> Result<Self::Builder, FromEventsError> {
82        Ok(OptionBuilder(T::from_events(name, attrs, ctx)?))
83    }
84}
85
86/// Helper struct to construct an `Box<T>` from XML events.
87pub struct BoxBuilder<T: FromEventsBuilder>(Box<T>);
88
89impl<T: FromEventsBuilder> FromEventsBuilder for BoxBuilder<T> {
90    type Output = Box<T::Output>;
91
92    fn feed(&mut self, ev: rxml::Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
93        self.0.feed(ev, ctx).map(|ok| ok.map(Box::new))
94    }
95}
96
97/// Parsers `T` into a `Box`.
98impl<T: FromXml> FromXml for Box<T> {
99    type Builder = BoxBuilder<T::Builder>;
100
101    fn from_events(
102        name: rxml::QName,
103        attrs: rxml::AttrMap,
104        ctx: &Context<'_>,
105    ) -> Result<Self::Builder, FromEventsError> {
106        Ok(BoxBuilder(Box::new(T::from_events(name, attrs, ctx)?)))
107    }
108}
109
110#[derive(Debug)]
111enum FallibleBuilderInner<T: FromEventsBuilder, E> {
112    Processing { depth: usize, builder: T },
113    Failed { depth: usize, err: Option<E> },
114    Done,
115}
116
117/// Build a `Result<T, E>` from XML.
118///
119/// This builder, invoked generally via the [`FromXml`] implementation on
120/// `Result<T, E> where T: FromXml, E: From<Error>`, allows to fallably parse
121/// an XSO from XML.
122///
123/// If an error occurs while parsing the XSO, the remaining events which
124/// belong to that XSO are discarded. Once all events have been seen, the
125/// error is returned as `Err(.)` value.
126///
127/// If parsing succeeds, the parsed XSO is returned as `Ok(.)` value.
128#[derive(Debug)]
129pub struct FallibleBuilder<T: FromEventsBuilder, E>(FallibleBuilderInner<T, E>);
130
131impl<T: FromEventsBuilder, E: From<Error>> FromEventsBuilder for FallibleBuilder<T, E> {
132    type Output = Result<T::Output, E>;
133
134    fn feed(&mut self, ev: rxml::Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
135        match self.0 {
136            FallibleBuilderInner::Processing {
137                ref mut depth,
138                ref mut builder,
139            } => {
140                let new_depth = match ev {
141                    rxml::Event::StartElement(..) => match depth.checked_add(1) {
142                        // I *think* it is OK to return an err here
143                        // instead of panicking. The reason is that anyone
144                        // who intends to resume processing at the level
145                        // of where we started to parse this thing in case
146                        // of an error either has to:
147                        // - Use this fallible implementation and rely on
148                        //   it capturing the error (which we don't in
149                        //   this case).
150                        // - Or count the depth themselves, which will
151                        //   either fail in the same way, or they use a
152                        //   wider type (in which case it's ok).
153                        None => {
154                            self.0 = FallibleBuilderInner::Done;
155                            return Err(Error::Other("maximum XML nesting depth exceeded"));
156                        }
157                        Some(v) => Some(v),
158                    },
159                    // In case of an element end, underflow means that we
160                    // have reached the end of the XSO we wanted to process.
161                    // We handle that case at the end of the outer match's
162                    // body: Either we have returned a value then (good), or,
163                    // if we reach the end there with a new_depth == None,
164                    // something went horribly wrong (and we panic).
165                    rxml::Event::EndElement(..) => depth.checked_sub(1),
166
167                    // Text and XML declarations have no influence on parsing
168                    // depth.
169                    rxml::Event::XmlDeclaration(..) | rxml::Event::Text(..) => Some(*depth),
170                };
171
172                match builder.feed(ev, ctx) {
173                    Ok(Some(v)) => {
174                        self.0 = FallibleBuilderInner::Done;
175                        return Ok(Some(Ok(v)));
176                    }
177                    Ok(None) => {
178                        // continue processing in the next round.
179                    }
180                    Err(e) => {
181                        // We are now officially failed ..
182                        match new_depth {
183                            // .. but we are not done yet, so enter the
184                            // failure backtracking state.
185                            Some(depth) => {
186                                self.0 = FallibleBuilderInner::Failed {
187                                    depth,
188                                    err: Some(e.into()),
189                                };
190                                return Ok(None);
191                            }
192                            // .. and we are done with parsing, so we return
193                            // the error as value.
194                            None => {
195                                self.0 = FallibleBuilderInner::Done;
196                                return Ok(Some(Err(e.into())));
197                            }
198                        }
199                    }
200                };
201
202                *depth = match new_depth {
203                    Some(v) => v,
204                    None => unreachable!("fallible parsing continued beyond end of element"),
205                };
206
207                // Need more events.
208                Ok(None)
209            }
210            FallibleBuilderInner::Failed {
211                ref mut depth,
212                ref mut err,
213            } => {
214                *depth = match ev {
215                    rxml::Event::StartElement(..) => match depth.checked_add(1) {
216                        // See above for error return rationale.
217                        None => {
218                            self.0 = FallibleBuilderInner::Done;
219                            return Err(Error::Other("maximum XML nesting depth exceeded"));
220                        }
221                        Some(v) => v,
222                    },
223                    rxml::Event::EndElement(..) => match depth.checked_sub(1) {
224                        Some(v) => v,
225                        None => {
226                            // We are officially done, return a value, switch
227                            // states, and be done with it.
228                            let err = err.take().expect("fallible parsing somehow lost its error");
229                            self.0 = FallibleBuilderInner::Done;
230                            return Ok(Some(Err(err)));
231                        }
232                    },
233
234                    // Text and XML declarations have no influence on parsing
235                    // depth.
236                    rxml::Event::XmlDeclaration(..) | rxml::Event::Text(..) => *depth,
237                };
238
239                // Need more events
240                Ok(None)
241            }
242            FallibleBuilderInner::Done => {
243                panic!("FromEventsBuilder called after it returned a value")
244            }
245        }
246    }
247}
248
249/// Parsers `T` fallibly. See [`FallibleBuilder`] for details.
250impl<T: FromXml, E: From<Error>> FromXml for Result<T, E> {
251    type Builder = FallibleBuilder<T::Builder, E>;
252
253    fn from_events(
254        name: rxml::QName,
255        attrs: rxml::AttrMap,
256        ctx: &Context<'_>,
257    ) -> Result<Self::Builder, FromEventsError> {
258        match T::from_events(name, attrs, ctx) {
259            Ok(builder) => Ok(FallibleBuilder(FallibleBuilderInner::Processing {
260                depth: 0,
261                builder,
262            })),
263            Err(FromEventsError::Mismatch { name, attrs }) => {
264                Err(FromEventsError::Mismatch { name, attrs })
265            }
266            Err(FromEventsError::Invalid(e)) => Ok(FallibleBuilder(FallibleBuilderInner::Failed {
267                depth: 0,
268                err: Some(e.into()),
269            })),
270        }
271    }
272}
273
274/// Builder which discards an entire child tree without inspecting the
275/// contents.
276#[derive(Debug, Default)]
277pub struct Discard {
278    depth: usize,
279}
280
281impl Discard {
282    /// Create a new discarding builder.
283    pub fn new() -> Self {
284        Self::default()
285    }
286}
287
288impl FromEventsBuilder for Discard {
289    type Output = ();
290
291    fn feed(&mut self, ev: rxml::Event, _ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
292        match ev {
293            rxml::Event::StartElement(..) => {
294                self.depth = match self.depth.checked_add(1) {
295                    Some(v) => v,
296                    None => return Err(Error::Other("maximum XML nesting depth exceeded")),
297                };
298                Ok(None)
299            }
300            rxml::Event::EndElement(..) => match self.depth.checked_sub(1) {
301                None => Ok(Some(())),
302                Some(v) => {
303                    self.depth = v;
304                    Ok(None)
305                }
306            },
307            _ => Ok(None),
308        }
309    }
310}
311
312/// Builder which discards the contents (or raises on unexpected contents).
313///
314/// This builder is only to be used from within the proc macros and is not
315/// stable, public API.
316#[doc(hidden)]
317#[cfg(feature = "macros")]
318pub struct EmptyBuilder {
319    childerr: &'static str,
320    texterr: &'static str,
321}
322
323#[cfg(feature = "macros")]
324impl FromEventsBuilder for EmptyBuilder {
325    type Output = ();
326
327    fn feed(&mut self, ev: rxml::Event, _ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
328        match ev {
329            rxml::Event::EndElement(..) => Ok(Some(())),
330            rxml::Event::StartElement(..) => Err(Error::Other(self.childerr)),
331            rxml::Event::Text(..) => Err(Error::Other(self.texterr)),
332            _ => Err(Error::Other(
333                "unexpected content in supposed-to-be-empty element",
334            )),
335        }
336    }
337}
338
339/// Precursor struct for [`EmptyBuilder`].
340///
341/// This struct is only to be used from within the proc macros and is not
342/// stable, public API.
343#[doc(hidden)]
344#[cfg(feature = "macros")]
345pub struct Empty {
346    pub attributeerr: &'static str,
347    pub childerr: &'static str,
348    pub texterr: &'static str,
349}
350
351#[cfg(feature = "macros")]
352impl Empty {
353    pub fn start(self, attr: rxml::AttrMap) -> Result<EmptyBuilder, Error> {
354        if !attr.is_empty() {
355            return Err(Error::Other(self.attributeerr));
356        }
357        Ok(EmptyBuilder {
358            childerr: self.childerr,
359            texterr: self.texterr,
360        })
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    use super::*;
367
368    use alloc::borrow::ToOwned;
369    use rxml::{parser::EventMetrics, Event, Namespace, NcName};
370
371    macro_rules! null_builder {
372        ($name:ident for $output:ident) => {
373            #[derive(Debug)]
374            enum $name {}
375
376            impl FromEventsBuilder for $name {
377                type Output = $output;
378
379                fn feed(
380                    &mut self,
381                    _: Event,
382                    _: &Context<'_>,
383                ) -> Result<Option<Self::Output>, Error> {
384                    unreachable!();
385                }
386            }
387        };
388    }
389
390    null_builder!(AlwaysMismatchBuilder for AlwaysMismatch);
391    null_builder!(InitialErrorBuilder for InitialError);
392
393    #[derive(Debug)]
394    struct AlwaysMismatch;
395
396    impl FromXml for AlwaysMismatch {
397        type Builder = AlwaysMismatchBuilder;
398
399        fn from_events(
400            name: rxml::QName,
401            attrs: rxml::AttrMap,
402            _ctx: &Context<'_>,
403        ) -> Result<Self::Builder, FromEventsError> {
404            Err(FromEventsError::Mismatch { name, attrs })
405        }
406    }
407
408    #[derive(Debug)]
409    struct InitialError;
410
411    impl FromXml for InitialError {
412        type Builder = InitialErrorBuilder;
413
414        fn from_events(
415            _: rxml::QName,
416            _: rxml::AttrMap,
417            _: &Context<'_>,
418        ) -> Result<Self::Builder, FromEventsError> {
419            Err(FromEventsError::Invalid(Error::Other("some error")))
420        }
421    }
422
423    #[derive(Debug)]
424    struct FailOnContentBuilder;
425
426    impl FromEventsBuilder for FailOnContentBuilder {
427        type Output = FailOnContent;
428
429        fn feed(&mut self, _: Event, _: &Context<'_>) -> Result<Option<Self::Output>, Error> {
430            Err(Error::Other("content error"))
431        }
432    }
433
434    #[derive(Debug)]
435    struct FailOnContent;
436
437    impl FromXml for FailOnContent {
438        type Builder = FailOnContentBuilder;
439
440        fn from_events(
441            _: rxml::QName,
442            _: rxml::AttrMap,
443            _: &Context<'_>,
444        ) -> Result<Self::Builder, FromEventsError> {
445            Ok(FailOnContentBuilder)
446        }
447    }
448
449    fn qname() -> rxml::QName {
450        (Namespace::NONE, NcName::try_from("test").unwrap())
451    }
452
453    fn attrs() -> rxml::AttrMap {
454        rxml::AttrMap::new()
455    }
456
457    #[test]
458    fn fallible_builder_mismatch_passthrough() {
459        match Result::<AlwaysMismatch, Error>::from_events(qname(), attrs(), &Context::empty()) {
460            Err(FromEventsError::Mismatch { .. }) => (),
461            other => panic!("unexpected result: {:?}", other),
462        }
463    }
464
465    #[test]
466    fn fallible_builder_initial_error_capture() {
467        let ctx = Context::empty();
468        let mut builder = match Result::<InitialError, Error>::from_events(qname(), attrs(), &ctx) {
469            Ok(v) => v,
470            other => panic!("unexpected result: {:?}", other),
471        };
472        match builder.feed(
473            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
474            &ctx,
475        ) {
476            Ok(None) => (),
477            other => panic!("unexpected result: {:?}", other),
478        };
479        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
480            Ok(Some(Err(Error::Other("some error")))) => (),
481            other => panic!("unexpected result: {:?}", other),
482        };
483    }
484
485    #[test]
486    fn fallible_builder_initial_error_capture_allows_nested_stuff() {
487        let ctx = Context::empty();
488        let mut builder = match Result::<InitialError, Error>::from_events(qname(), attrs(), &ctx) {
489            Ok(v) => v,
490            other => panic!("unexpected result: {:?}", other),
491        };
492        match builder.feed(
493            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
494            &ctx,
495        ) {
496            Ok(None) => (),
497            other => panic!("unexpected result: {:?}", other),
498        };
499        match builder.feed(
500            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
501            &ctx,
502        ) {
503            Ok(None) => (),
504            other => panic!("unexpected result: {:?}", other),
505        };
506        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
507            Ok(None) => (),
508            other => panic!("unexpected result: {:?}", other),
509        };
510        match builder.feed(
511            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
512            &ctx,
513        ) {
514            Ok(None) => (),
515            other => panic!("unexpected result: {:?}", other),
516        };
517        match builder.feed(
518            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
519            &ctx,
520        ) {
521            Ok(None) => (),
522            other => panic!("unexpected result: {:?}", other),
523        };
524        match builder.feed(
525            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
526            &ctx,
527        ) {
528            Ok(None) => (),
529            other => panic!("unexpected result: {:?}", other),
530        };
531        match builder.feed(
532            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
533            &ctx,
534        ) {
535            Ok(None) => (),
536            other => panic!("unexpected result: {:?}", other),
537        };
538        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
539            Ok(None) => (),
540            other => panic!("unexpected result: {:?}", other),
541        };
542        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
543            Ok(None) => (),
544            other => panic!("unexpected result: {:?}", other),
545        };
546        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
547            Ok(Some(Err(Error::Other("some error")))) => (),
548            other => panic!("unexpected result: {:?}", other),
549        };
550    }
551
552    #[test]
553    fn fallible_builder_content_error_capture() {
554        let ctx = Context::empty();
555        let mut builder = match Result::<FailOnContent, Error>::from_events(qname(), attrs(), &ctx)
556        {
557            Ok(v) => v,
558            other => panic!("unexpected result: {:?}", other),
559        };
560        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
561            Ok(Some(Err(Error::Other("content error")))) => (),
562            other => panic!("unexpected result: {:?}", other),
563        };
564    }
565
566    #[test]
567    fn fallible_builder_content_error_capture_with_more_content() {
568        let ctx = Context::empty();
569        let mut builder = match Result::<FailOnContent, Error>::from_events(qname(), attrs(), &ctx)
570        {
571            Ok(v) => v,
572            other => panic!("unexpected result: {:?}", other),
573        };
574        match builder.feed(
575            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
576            &ctx,
577        ) {
578            Ok(None) => (),
579            other => panic!("unexpected result: {:?}", other),
580        };
581        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
582            Ok(Some(Err(Error::Other("content error")))) => (),
583            other => panic!("unexpected result: {:?}", other),
584        };
585    }
586
587    #[test]
588    fn fallible_builder_content_error_capture_with_nested_content() {
589        let ctx = Context::empty();
590        let mut builder = match Result::<FailOnContent, Error>::from_events(qname(), attrs(), &ctx)
591        {
592            Ok(v) => v,
593            other => panic!("unexpected result: {:?}", other),
594        };
595        match builder.feed(
596            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
597            &ctx,
598        ) {
599            Ok(None) => (),
600            other => panic!("unexpected result: {:?}", other),
601        };
602        match builder.feed(
603            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
604            &ctx,
605        ) {
606            Ok(None) => (),
607            other => panic!("unexpected result: {:?}", other),
608        };
609        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
610            Ok(None) => (),
611            other => panic!("unexpected result: {:?}", other),
612        };
613        match builder.feed(
614            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
615            &ctx,
616        ) {
617            Ok(None) => (),
618            other => panic!("unexpected result: {:?}", other),
619        };
620        match builder.feed(
621            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
622            &ctx,
623        ) {
624            Ok(None) => (),
625            other => panic!("unexpected result: {:?}", other),
626        };
627        match builder.feed(
628            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
629            &ctx,
630        ) {
631            Ok(None) => (),
632            other => panic!("unexpected result: {:?}", other),
633        };
634        match builder.feed(
635            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
636            &ctx,
637        ) {
638            Ok(None) => (),
639            other => panic!("unexpected result: {:?}", other),
640        };
641        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
642            Ok(None) => (),
643            other => panic!("unexpected result: {:?}", other),
644        };
645        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
646            Ok(None) => (),
647            other => panic!("unexpected result: {:?}", other),
648        };
649        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
650            Ok(Some(Err(Error::Other("content error")))) => (),
651            other => panic!("unexpected result: {:?}", other),
652        };
653    }
654}