xso/
lib.rs

1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![forbid(unsafe_code)]
4#![warn(missing_docs)]
5/*!
6# XML Streamed Objects -- serde-like parsing for XML
7
8This crate provides the traits for parsing XML data into Rust structs, and
9vice versa.
10
11While it is in 0.0.x versions, many features still need to be developed, but
12rest assured that there is a solid plan to get it fully usable for even
13advanced XML scenarios.
14
15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
16use of this library in parsing XML streams like specified in RFC 6120.
17*/
18
19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
20//
21// This Source Code Form is subject to the terms of the Mozilla Public
22// License, v. 2.0. If a copy of the MPL was not distributed with this
23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
24
25#![no_std]
26
27extern crate alloc;
28#[cfg(feature = "std")]
29extern crate std;
30#[cfg(feature = "std")]
31use std::io;
32
33pub mod asxml;
34pub mod error;
35pub mod fromxml;
36#[cfg(feature = "minidom")]
37pub mod minidom_compat;
38mod rxml_util;
39pub mod text;
40
41#[doc(hidden)]
42#[cfg(feature = "macros")]
43pub mod exports {
44    #[cfg(feature = "minidom")]
45    pub use minidom;
46    pub use rxml;
47
48    // These re-exports are necessary to support both std and no_std in code
49    // generated by the macros.
50    //
51    // If we attempted to use ::alloc directly from macros, std builds would
52    // not work because alloc is not generally present in builds using std.
53    // If we used ::std, no_std builds would obviously not work. By exporting
54    // std as alloc in std builds, we can safely use the alloc types from
55    // there.
56    //
57    // Obviously, we have to be careful in xso-proc to not refer to types
58    // which are not in alloc.
59    #[cfg(not(feature = "std"))]
60    pub extern crate alloc;
61    #[cfg(feature = "std")]
62    pub extern crate std as alloc;
63
64    /// The built-in `bool` type.
65    ///
66    /// This is re-exported for use by macros in cases where we cannot rely on
67    /// people not having done `type bool = str` or some similar shenanigans.
68    pub type CoreBool = bool;
69
70    /// The built-in `u8` type.
71    ///
72    /// This is re-exported for use by macros in cases where we cannot rely on
73    /// people not having done `type u8 = str` or some similar shenanigans.
74    pub type CoreU8 = u8;
75}
76
77use alloc::{
78    borrow::{Cow, ToOwned},
79    boxed::Box,
80    string::String,
81    vec::Vec,
82};
83
84#[doc(inline)]
85pub use text::TextCodec;
86
87#[doc(inline)]
88pub use rxml_util::Item;
89
90pub use asxml::PrintRawXml;
91
92#[doc = include_str!("from_xml_doc.md")]
93#[doc(inline)]
94#[cfg(feature = "macros")]
95pub use xso_proc::FromXml;
96
97/// # Make a struct or enum serialisable to XML
98///
99/// This derives the [`AsXml`] trait on a struct or enum. It is the
100/// counterpart to [`macro@FromXml`].
101///
102/// The attributes necessary and available for the derivation to work are
103/// documented on [`macro@FromXml`].
104#[doc(inline)]
105#[cfg(feature = "macros")]
106pub use xso_proc::AsXml;
107
108/// Trait allowing to iterate a struct's contents as serialisable
109/// [`Item`]s.
110///
111/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
112/// type is considered a non-breaking change for any given implementation of
113/// this trait. Always refer to a type's iterator type using fully-qualified
114/// notation, for example: `<T as xso::AsXml>::ItemIter`.
115pub trait AsXml {
116    /// The iterator type.
117    ///
118    /// **Important:** Changing this type is considered a non-breaking change
119    /// for any given implementation of this trait. Always refer to a type's
120    /// iterator type using fully-qualified notation, for example:
121    /// `<T as xso::AsXml>::ItemIter`.
122    type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
123    where
124        Self: 'x;
125
126    /// Return an iterator which emits the contents of the struct or enum as
127    /// serialisable [`Item`] items.
128    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
129}
130
131/// Trait for a temporary object allowing to construct a struct from
132/// [`rxml::Event`] items.
133///
134/// Objects of this type are generally constructed through
135/// [`FromXml::from_events`] and are used to build Rust structs or enums from
136/// XML data. The XML data must be fed as `rxml::Event` to the
137/// [`feed`][`Self::feed`] method.
138pub trait FromEventsBuilder {
139    /// The type which will be constructed by this builder.
140    type Output;
141
142    /// Feed another [`rxml::Event`] into the element construction
143    /// process.
144    ///
145    /// Once the construction process completes, `Ok(Some(_))` is returned.
146    /// When valid data has been fed but more events are needed to fully
147    /// construct the resulting struct, `Ok(None)` is returned.
148    ///
149    /// If the construction fails, `Err(_)` is returned. Errors are generally
150    /// fatal and the builder should be assumed to be broken at that point.
151    /// Feeding more events after an error may result in panics, errors or
152    /// inconsistent result data, though it may never result in unsound or
153    /// unsafe behaviour.
154    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
155}
156
157/// Trait allowing to construct a struct from a stream of
158/// [`rxml::Event`] items.
159///
160/// To use this, first call [`FromXml::from_events`] with the qualified
161/// name and the attributes of the corresponding
162/// [`rxml::Event::StartElement`] event. If the call succeeds, the
163/// returned builder object must be fed with the events representing the
164/// contents of the element, and then with the `EndElement` event.
165///
166/// The `StartElement` passed to `from_events` must not be passed to `feed`.
167///
168/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
169/// is considered a non-breaking change for any given implementation of this
170/// trait. Always refer to a type's builder type using fully-qualified
171/// notation, for example: `<T as xso::FromXml>::Builder`.
172pub trait FromXml {
173    /// A builder type used to construct the element.
174    ///
175    /// **Important:** Changing this type is considered a non-breaking change
176    /// for any given implementation of this trait. Always refer to a type's
177    /// builder type using fully-qualified notation, for example:
178    /// `<T as xso::FromXml>::Builder`.
179    type Builder: FromEventsBuilder<Output = Self>;
180
181    /// Attempt to initiate the streamed construction of this struct from XML.
182    ///
183    /// If the passed qualified `name` and `attrs` match the element's type,
184    /// the [`Self::Builder`] is returned and should be fed with XML events
185    /// by the caller.
186    ///
187    /// Otherwise, an appropriate error is returned.
188    fn from_events(
189        name: rxml::QName,
190        attrs: rxml::AttrMap,
191    ) -> Result<Self::Builder, self::error::FromEventsError>;
192}
193
194/// Trait allowing to convert XML text to a value.
195///
196/// This trait is similar to [`core::str::FromStr`], however, due to
197/// restrictions imposed by the orphan rule, a separate trait is needed.
198/// Implementations for many standard library types are available. In
199/// addition, the following feature flags can enable more implementations:
200///
201/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
202/// - `uuid`: `uuid::Uuid`
203///
204/// Because of this unfortunate situation, we are **extremely liberal** with
205/// accepting optional dependencies for this purpose. You are very welcome to
206/// make merge requests against this crate adding support for parsing
207/// third-party crates.
208pub trait FromXmlText: Sized {
209    /// Convert the given XML text to a value.
210    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
211}
212
213impl FromXmlText for String {
214    /// Return the string unchanged.
215    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
216        Ok(data)
217    }
218}
219
220impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
221    /// Return a [`Cow::Owned`] containing the parsed value.
222    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
223        Ok(Cow::Owned(T::from_xml_text(data)?))
224    }
225}
226
227impl<T: FromXmlText> FromXmlText for Option<T> {
228    /// Return a [`Some`] containing the parsed value.
229    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
230        Ok(Some(T::from_xml_text(data)?))
231    }
232}
233
234impl<T: FromXmlText> FromXmlText for Box<T> {
235    /// Return a [`Box`] containing the parsed value.
236    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
237        Ok(Box::new(T::from_xml_text(data)?))
238    }
239}
240
241/// Trait to convert a value to an XML text string.
242///
243/// Implementing this trait for a type allows it to be used both for XML
244/// character data within elements and for XML attributes. For XML attributes,
245/// the behaviour is defined by [`AsXmlText::as_optional_xml_text`], while
246/// XML element text content uses [`AsXmlText::as_xml_text`]. Implementing
247/// [`AsXmlText`] automatically provides an implementation of
248/// [`AsOptionalXmlText`].
249///
250/// If your type should only be used in XML attributes and has no correct
251/// serialisation in XML text, you should *only* implement
252/// [`AsOptionalXmlText`] and omit the [`AsXmlText`] implementation.
253///
254/// This trait is implemented for many standard library types implementing
255/// [`core::fmt::Display`]. In addition, the following feature flags can enable
256/// more implementations:
257///
258/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
259/// - `uuid`: `uuid::Uuid`
260///
261/// Because of the unfortunate situation as described in [`FromXmlText`], we
262/// are **extremely liberal** with accepting optional dependencies for this
263/// purpose. You are very welcome to make merge requests against this crate
264/// adding support for parsing third-party crates.
265pub trait AsXmlText {
266    /// Convert the value to an XML string in a context where an absent value
267    /// cannot be represented.
268    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
269
270    /// Convert the value to an XML string in a context where an absent value
271    /// can be represented.
272    ///
273    /// The provided implementation will always return the result of
274    /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
275    /// this method, implementors can customize the behaviour for certain
276    /// values.
277    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
278        Ok(Some(self.as_xml_text()?))
279    }
280}
281
282impl AsXmlText for String {
283    /// Return the borrowed string contents.
284    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
285        Ok(Cow::Borrowed(self))
286    }
287}
288
289impl AsXmlText for str {
290    /// Return the borrowed string contents.
291    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
292        Ok(Cow::Borrowed(self))
293    }
294}
295
296impl<T: AsXmlText> AsXmlText for Box<T> {
297    /// Return the borrowed [`Box`] contents.
298    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
299        T::as_xml_text(self)
300    }
301}
302
303impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
304    /// Return the borrowed [`Cow`] contents.
305    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
306        B::as_xml_text(self)
307    }
308}
309
310impl<T: AsXmlText> AsXmlText for &T {
311    /// Delegate to the `AsXmlText` implementation on `T`.
312    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
313        T::as_xml_text(*self)
314    }
315}
316
317/// Specialized variant of [`AsXmlText`].
318///
319/// Normally, it should not be necessary to implement this trait as it is
320/// automatically implemented for all types implementing [`AsXmlText`].
321/// However, if your type can only be serialised as an XML attribute (for
322/// example because an absent value has a particular meaning), it is correct
323/// to implement [`AsOptionalXmlText`] **instead of** [`AsXmlText`].
324///
325/// If your type can be serialised as both (text and attribute) but needs
326/// special handling in attributes, implement [`AsXmlText`] but provide a
327/// custom implementation of [`AsXmlText::as_optional_xml_text`].
328pub trait AsOptionalXmlText {
329    /// Convert the value to an XML string in a context where an absent value
330    /// can be represented.
331    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
332}
333
334impl<T: AsXmlText> AsOptionalXmlText for T {
335    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
336        <Self as AsXmlText>::as_optional_xml_text(self)
337    }
338}
339
340impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
341    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
342        self.as_ref()
343            .map(T::as_optional_xml_text)
344            .transpose()
345            .map(Option::flatten)
346    }
347}
348
349/// Control how unknown attributes are handled.
350///
351/// The variants of this enum are referenced in the
352/// `#[xml(on_unknown_attribute = ..)]` which can be used on structs and
353/// enum variants. The specified variant controls how attributes, which are
354/// not handled by any member of the compound, are handled during parsing.
355#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
356pub enum UnknownAttributePolicy {
357    /// All unknown attributes are discarded.
358    ///
359    /// This is the default policy if the crate is built with the
360    /// `non-pedantic` feature.
361    #[cfg_attr(feature = "non-pedantic", default)]
362    Discard,
363
364    /// The first unknown attribute which is encountered generates a fatal
365    /// parsing error.
366    ///
367    /// This is the default policy if the crate is built **without** the
368    /// `non-pedantic` feature.
369    #[cfg_attr(not(feature = "non-pedantic"), default)]
370    Fail,
371}
372
373impl UnknownAttributePolicy {
374    #[doc(hidden)]
375    /// Implementation of the policy.
376    ///
377    /// This is an internal API and not subject to semver versioning.
378    pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
379        match self {
380            Self::Fail => Err(self::error::Error::Other(msg)),
381            Self::Discard => Ok(()),
382        }
383    }
384}
385
386/// Control how unknown children are handled.
387///
388/// The variants of this enum are referenced in the
389/// `#[xml(on_unknown_child = ..)]` which can be used on structs and
390/// enum variants. The specified variant controls how children, which are not
391/// handled by any member of the compound, are handled during parsing.
392#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
393pub enum UnknownChildPolicy {
394    /// All unknown children are discarded.
395    ///
396    /// This is the default policy if the crate is built with the
397    /// `non-pedantic` feature.
398    #[cfg_attr(feature = "non-pedantic", default)]
399    Discard,
400
401    /// The first unknown child which is encountered generates a fatal
402    /// parsing error.
403    ///
404    /// This is the default policy if the crate is built **without** the
405    /// `non-pedantic` feature.
406    #[cfg_attr(not(feature = "non-pedantic"), default)]
407    Fail,
408}
409
410impl UnknownChildPolicy {
411    #[doc(hidden)]
412    /// Implementation of the policy.
413    ///
414    /// This is an internal API and not subject to semver versioning.
415    pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
416        match self {
417            Self::Fail => Err(self::error::Error::Other(msg)),
418            Self::Discard => Ok(()),
419        }
420    }
421}
422
423/// Attempt to transform a type implementing [`AsXml`] into another
424/// type which implements [`FromXml`].
425pub fn transform<T: FromXml, F: AsXml>(from: &F) -> Result<T, self::error::Error> {
426    let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
427    let (qname, attrs) = match iter.next() {
428        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
429        Some(Err(e)) => return Err(e),
430        _ => panic!("into_event_iter did not start with StartElement event!"),
431    };
432    let mut sink = match T::from_events(qname, attrs) {
433        Ok(v) => v,
434        Err(self::error::FromEventsError::Mismatch { .. }) => {
435            return Err(self::error::Error::TypeMismatch)
436        }
437        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
438    };
439    for event in iter {
440        let event = event?;
441        if let Some(v) = sink.feed(event)? {
442            return Ok(v);
443        }
444    }
445    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
446}
447
448/// Attempt to convert a [`minidom::Element`] into a type implementing
449/// [`FromXml`], fallably.
450///
451/// Unlike [`transform`] (which can also be used with an element), this
452/// function will return the element unharmed if its element header does not
453/// match the expectations of `T`.
454#[cfg(feature = "minidom")]
455pub fn try_from_element<T: FromXml>(
456    from: minidom::Element,
457) -> Result<T, self::error::FromElementError> {
458    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
459    let mut sink = match T::from_events(qname, attrs) {
460        Ok(v) => v,
461        Err(self::error::FromEventsError::Mismatch { .. }) => {
462            return Err(self::error::FromElementError::Mismatch(from))
463        }
464        Err(self::error::FromEventsError::Invalid(e)) => {
465            return Err(self::error::FromElementError::Invalid(e))
466        }
467    };
468
469    let mut iter = from.as_xml_iter()?;
470    // consume the element header
471    for item in &mut iter {
472        let item = item?;
473        match item {
474            // discard the element header
475            Item::XmlDeclaration(..) => (),
476            Item::ElementHeadStart(..) => (),
477            Item::Attribute(..) => (),
478            Item::ElementHeadEnd => {
479                // now that the element header is over, we break out
480                break;
481            }
482            Item::Text(..) => panic!("text before end of element header"),
483            Item::ElementFoot => panic!("element foot before end of element header"),
484        }
485    }
486    let iter = self::rxml_util::ItemToEvent::new(iter);
487    for event in iter {
488        let event = event?;
489        if let Some(v) = sink.feed(event)? {
490            return Ok(v);
491        }
492    }
493    // unreachable! instead of error here, because minidom::Element always
494    // produces the complete event sequence of a single element, and FromXml
495    // implementations must be constructible from that.
496    unreachable!("minidom::Element did not produce enough events to complete element")
497}
498
499#[cfg(feature = "std")]
500fn map_nonio_error<T>(r: Result<T, io::Error>) -> Result<T, self::error::Error> {
501    match r {
502        Ok(v) => Ok(v),
503        Err(e) => match e.downcast::<rxml::Error>() {
504            Ok(e) => Err(e.into()),
505            Err(_) => unreachable!("I/O error cannot be caused by &[]"),
506        },
507    }
508}
509
510#[cfg(feature = "std")]
511fn read_start_event<I: io::BufRead>(
512    r: &mut rxml::Reader<I>,
513) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
514    for ev in r {
515        match map_nonio_error(ev)? {
516            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
517            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
518            _ => {
519                return Err(self::error::Error::Other(
520                    "Unexpected event at start of document",
521                ))
522            }
523        }
524    }
525    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
526        rxml::error::ErrorContext::DocumentBegin,
527    ))))
528}
529
530/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
531/// containing XML data.
532#[cfg(feature = "std")]
533pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
534    let mut reader = rxml::Reader::new(&mut buf);
535    let (name, attrs) = read_start_event(&mut reader)?;
536    let mut builder = match T::from_events(name, attrs) {
537        Ok(v) => v,
538        Err(self::error::FromEventsError::Mismatch { .. }) => {
539            return Err(self::error::Error::TypeMismatch)
540        }
541        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
542    };
543    for ev in reader {
544        if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
545            return Ok(v);
546        }
547    }
548    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
549}
550
551#[cfg(feature = "std")]
552fn read_start_event_io<I: io::BufRead>(
553    r: &mut rxml::Reader<I>,
554) -> io::Result<(rxml::QName, rxml::AttrMap)> {
555    for ev in r {
556        match ev? {
557            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
558            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
559            _ => {
560                return Err(io::Error::new(
561                    io::ErrorKind::InvalidData,
562                    self::error::Error::Other("Unexpected event at start of document"),
563                ))
564            }
565        }
566    }
567    Err(io::Error::new(
568        io::ErrorKind::InvalidData,
569        self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
570            rxml::error::ErrorContext::DocumentBegin,
571        ))),
572    ))
573}
574
575/// Attempt to parse a type implementing [`FromXml`] from a reader.
576#[cfg(feature = "std")]
577pub fn from_reader<T: FromXml, R: io::BufRead>(r: R) -> io::Result<T> {
578    let mut reader = rxml::Reader::new(r);
579    let (name, attrs) = read_start_event_io(&mut reader)?;
580    let mut builder = match T::from_events(name, attrs) {
581        Ok(v) => v,
582        Err(self::error::FromEventsError::Mismatch { .. }) => {
583            return Err(self::error::Error::TypeMismatch)
584                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
585        }
586        Err(self::error::FromEventsError::Invalid(e)) => {
587            return Err(e).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
588        }
589    };
590    for ev in reader {
591        if let Some(v) = builder
592            .feed(ev?)
593            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?
594        {
595            return Ok(v);
596        }
597    }
598    Err(io::Error::new(
599        io::ErrorKind::UnexpectedEof,
600        self::error::Error::XmlError(rxml::Error::InvalidEof(None)),
601    ))
602}
603
604/// Attempt to serialise a type implementing [`AsXml`] to a vector of bytes.
605pub fn to_vec<T: AsXml>(xso: &T) -> Result<Vec<u8>, self::error::Error> {
606    let iter = xso.as_xml_iter()?;
607    let mut writer = rxml::writer::Encoder::new();
608    let mut buf = Vec::new();
609    for item in iter {
610        let item = item?;
611        writer.encode(item.as_rxml_item(), &mut buf)?;
612    }
613    Ok(buf)
614}
615
616/// Return true if the string contains exclusively XML whitespace.
617///
618/// XML whitespace is defined as U+0020 (space), U+0009 (tab), U+000a
619/// (newline) and U+000d (carriage return).
620pub fn is_xml_whitespace<T: AsRef<[u8]>>(s: T) -> bool {
621    s.as_ref()
622        .iter()
623        .all(|b| *b == b' ' || *b == b'\t' || *b == b'\r' || *b == b'\n')
624}