xso/
text.rs

1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! # Convert data to and from XML text
8//!
9//! This module provides traits and types related to conversion of XML text
10//! data to and from Rust types, as well as the [`AsXmlText`],
11//! [`AsOptionalXmlText`][`crate::AsOptionalXmlText`] and [`FromXmlText`]
12//! implementations for foreign and standard-library types.
13//!
14//! ## Support for types from third-party crates
15//!
16//! Beyond the standard library types, the following additional types are
17//! supported:
18//!
19//! | Feature gate | Types |
20//! | --- | --- |
21//! | `jid` | `jid::Jid`, `jid::BareJid`, `jid::FullJid` |
22//! | `serde_json` | `serde_json::Value` |
23//! | `uuid` | `uuid::Uuid` |
24//!
25//! ### Adding support for more types
26//!
27//! Due to the orphan rule, it is not possible for applications to implement
28//! [`AsXmlText`], [`AsOptionalXmlText`][`crate::AsOptionalXmlText`] or
29//! [`FromXmlText`] on types which originate from third-party crates. Because
30//! of that, we are **extremely liberal** at accepting merge requests for
31//! implementations of these traits for types from third-party crates.
32//!
33//! The only requirement is that the implementation is gated behind a feature
34//! flag which is disabled-by-default.
35//!
36//! ### Workaround for unsupported types
37//!
38//! If making a merge request against `xso` and waiting for a release is not
39//! an option, you can use newtype wrappers in almost all cases, for example:
40//!
41#![cfg_attr(
42    not(all(feature = "std", feature = "macros")),
43    doc = "Because the std or macros feature was not enabled at doc build time, the example cannot be tested.\n\n```ignore\n"
44)]
45#![cfg_attr(all(feature = "std", feature = "macros"), doc = "\n```\n")]
46//! # use xso::{AsXml, FromXml, AsXmlText, FromXmlText, error::Error};
47//! # use std::borrow::Cow;
48//! use std::process::ExitCode;
49//!
50//! struct MyExitCode(ExitCode);
51//!
52//! impl AsXmlText for MyExitCode {
53//!     fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
54//!         match self.0 {
55//!             ExitCode::FAILURE => Ok(Cow::Borrowed("failure")),
56//!             ExitCode::SUCCESS => Ok(Cow::Borrowed("success")),
57//!             _ => Err(Error::Other("unknown exit code")),
58//!         }
59//!     }
60//! }
61//!
62//! impl FromXmlText for MyExitCode {
63//!     fn from_xml_text(s: String) -> Result<Self, Error> {
64//!         match s.as_str() {
65//!             "failure" => Ok(Self(ExitCode::FAILURE)),
66//!             "success" => Ok(Self(ExitCode::SUCCESS)),
67//!             _ => Err(Error::Other("unknown exit code")),
68//!         }
69//!     }
70//! }
71//!
72//! #[derive(AsXml, FromXml)]
73//! #[xml(namespace = "urn:example", name = "process-result")]
74//! struct ProcessResult {
75//!     #[xml(attribute)]
76//!     code: MyExitCode,
77//!     #[xml(text)]
78//!     stdout: String,
79//! }
80//! ```
81//!
82//! Of course, such an approach reduces the usability of your struct (and
83//! comes with issues once references are needed), so making a merge request
84//! against `xso` is generally preferable.
85
86use core::marker::PhantomData;
87
88use alloc::{
89    borrow::{Cow, ToOwned},
90    boxed::Box,
91    format,
92    string::{String, ToString},
93    vec::Vec,
94};
95
96use crate::{error::Error, AsOptionalXmlText, AsXmlText, FromXmlText};
97
98#[cfg(feature = "base64")]
99use base64::engine::general_purpose::STANDARD as StandardBase64Engine;
100
101/// # Generate `AsXmlText` and `FromXmlText` implementations
102///
103/// This macro generates an `AsXmlText` implementation which uses
104/// [`Display`][`core::fmt::Display`] and an `FromXmlText` which uses
105/// [`FromStr`][`core::str::FromStr`] for the types it is called on.
106///
107/// ## Syntax
108///
109/// The macro accepts a comma-separated list of types. Optionally, each type
110/// can be preceded by a `#[cfg(..)]` attribute to make the implementations
111/// conditional on a feature.
112///
113/// ## Example
114///
115#[cfg_attr(
116    not(feature = "macros"),
117    doc = "Because the macros feature was not enabled at doc build time, the example cannot be tested.\n\n```ignore\n"
118)]
119#[cfg_attr(feature = "macros", doc = "\n```\n")]
120/// # use xso::convert_via_fromstr_and_display;
121/// # use core::fmt::{self, Display};
122/// # use core::str::FromStr;
123/// struct Foo;
124///
125/// impl FromStr for Foo {
126/// #    type Err = core::convert::Infallible;
127/// #
128/// #    fn from_str(s: &str) -> Result<Self, Self::Err> { todo!() }
129///     /* ... */
130/// }
131///
132/// impl Display for Foo {
133/// #    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { todo!() }
134///     /* ... */
135/// }
136///
137/// convert_via_fromstr_and_display!(
138///     Foo,
139/// );
140/// ```
141#[macro_export]
142macro_rules! convert_via_fromstr_and_display {
143    ($($(#[cfg $cfg:tt])?$t:ty),+ $(,)?) => {
144        $(
145            $(
146                #[cfg $cfg]
147            )?
148            impl $crate::FromXmlText for $t {
149                #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
150                fn from_xml_text(s: String) -> Result<Self, $crate::error::Error> {
151                    s.parse().map_err($crate::error::Error::text_parse_error)
152                }
153            }
154
155            $(
156                #[cfg $cfg]
157            )?
158            impl $crate::AsXmlText for $t {
159                #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
160                fn as_xml_text(&self) -> Result<$crate::exports::alloc::borrow::Cow<'_, str>, $crate::error::Error> {
161                    Ok($crate::exports::alloc::borrow::Cow::Owned(self.to_string()))
162                }
163            }
164        )+
165    }
166}
167
168/// This provides an implementation compliant with xsd::bool.
169impl FromXmlText for bool {
170    /// Parse a boolean from XML text.
171    ///
172    /// The values `"1"` and `"true"` are considered true. The values `"0"`
173    /// and `"false"` are considered `false`. Any other value is invalid and
174    /// will return an error.
175    fn from_xml_text(s: String) -> Result<Self, Error> {
176        match s.as_str() {
177            "1" => "true",
178            "0" => "false",
179            other => other,
180        }
181        .parse()
182        .map_err(Error::text_parse_error)
183    }
184}
185
186/// This provides an implementation compliant with xsd::bool.
187impl AsXmlText for bool {
188    /// Convert a boolean to XML text.
189    ///
190    /// `true` is converted to `"true"` and `false` is converted to `"false"`.
191    /// This implementation never fails.
192    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
193        match self {
194            true => Ok(Cow::Borrowed("true")),
195            false => Ok(Cow::Borrowed("false")),
196        }
197    }
198}
199
200convert_via_fromstr_and_display! {
201    u8,
202    u16,
203    u32,
204    u64,
205    u128,
206    usize,
207    i8,
208    i16,
209    i32,
210    i64,
211    i128,
212    isize,
213    f32,
214    f64,
215    char,
216    core::net::IpAddr,
217    core::net::Ipv4Addr,
218    core::net::Ipv6Addr,
219    core::net::SocketAddr,
220    core::net::SocketAddrV4,
221    core::net::SocketAddrV6,
222    core::num::NonZeroU8,
223    core::num::NonZeroU16,
224    core::num::NonZeroU32,
225    core::num::NonZeroU64,
226    core::num::NonZeroU128,
227    core::num::NonZeroUsize,
228    core::num::NonZeroI8,
229    core::num::NonZeroI16,
230    core::num::NonZeroI32,
231    core::num::NonZeroI64,
232    core::num::NonZeroI128,
233    core::num::NonZeroIsize,
234
235    #[cfg(feature = "uuid")]
236    uuid::Uuid,
237
238    #[cfg(feature = "jid")]
239    jid::Jid,
240    #[cfg(feature = "jid")]
241    jid::FullJid,
242    #[cfg(feature = "jid")]
243    jid::BareJid,
244    #[cfg(feature = "jid")]
245    jid::NodePart,
246    #[cfg(feature = "jid")]
247    jid::DomainPart,
248    #[cfg(feature = "jid")]
249    jid::ResourcePart,
250
251    #[cfg(feature = "serde_json")]
252    serde_json::Value,
253}
254
255impl FromXmlText for String {
256    /// Return the string unchanged.
257    fn from_xml_text(data: String) -> Result<Self, Error> {
258        Ok(data)
259    }
260}
261
262impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
263    /// Return a [`Cow::Owned`] containing the parsed value.
264    fn from_xml_text(data: String) -> Result<Self, Error> {
265        Ok(Cow::Owned(T::from_xml_text(data)?))
266    }
267}
268
269impl<T: FromXmlText> FromXmlText for Option<T> {
270    /// Return a [`Some`] containing the parsed value.
271    fn from_xml_text(data: String) -> Result<Self, Error> {
272        Ok(Some(T::from_xml_text(data)?))
273    }
274}
275
276impl<T: FromXmlText> FromXmlText for Box<T> {
277    /// Return a [`Box`] containing the parsed value.
278    fn from_xml_text(data: String) -> Result<Self, Error> {
279        Ok(Box::new(T::from_xml_text(data)?))
280    }
281}
282
283impl AsXmlText for String {
284    /// Return the borrowed string contents.
285    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
286        Ok(Cow::Borrowed(self))
287    }
288}
289
290impl AsXmlText for str {
291    /// Return the borrowed string contents.
292    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
293        Ok(Cow::Borrowed(self))
294    }
295}
296
297impl AsXmlText for &str {
298    /// Return the borrowed string contents.
299    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
300        Ok(Cow::Borrowed(self))
301    }
302}
303
304impl<T: AsXmlText> AsXmlText for Box<T> {
305    /// Return the borrowed [`Box`] contents.
306    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
307        T::as_xml_text(self)
308    }
309}
310
311impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
312    /// Return the borrowed [`Cow`] contents.
313    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
314        B::as_xml_text(self)
315    }
316}
317
318impl<T: AsXmlText> AsXmlText for &T {
319    /// Delegate to the `AsXmlText` implementation on `T`.
320    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
321        T::as_xml_text(*self)
322    }
323}
324
325impl<T: AsXmlText> AsOptionalXmlText for T {
326    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, Error> {
327        <Self as AsXmlText>::as_optional_xml_text(self)
328    }
329}
330
331impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
332    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, Error> {
333        self.as_ref()
334            .map(T::as_optional_xml_text)
335            .transpose()
336            .map(Option::flatten)
337    }
338}
339
340/// Represent a way to encode/decode text data into a Rust type.
341///
342/// This trait can be used in scenarios where implementing [`FromXmlText`]
343/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
344/// following:
345///
346/// 1. The type originates in a foreign crate, preventing the implementation
347///    of foreign traits.
348///
349/// 2. There is more than one way to convert a value to/from XML.
350///
351/// The codec to use for a text can be specified in the attributes understood
352/// by `FromXml` and `AsXml` derive macros. See the documentation of the
353/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
354#[diagnostic::on_unimplemented(
355    message = "`{Self}` cannot be used as XML text codec for values of type `{T}`."
356)]
357pub trait TextCodec<T> {
358    /// Decode a string value into the type.
359    fn decode(&self, s: String) -> Result<T, Error>;
360
361    /// Encode the type as string value.
362    ///
363    /// If this returns `None`, the string value is not emitted at all.
364    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
365
366    /// Apply a filter to this codec.
367    ///
368    /// Filters preprocess strings before they are handed to the codec for
369    /// parsing, allowing to, for example, make the codec ignore irrelevant
370    /// content by stripping it.
371    // NOTE: The bound on T is needed because any given type A may implement
372    // TextCodec for any number of types. If we pass T down to the `Filtered`
373    // struct, rustc can do type inference on which `TextCodec`
374    // implementation the `filtered` method is supposed to have been called
375    // on.
376    fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
377    where
378        // placing the bound here (instead of on the `TextCodec<T>` trait
379        // itself) preserves object-safety of TextCodec<T>.
380        Self: Sized,
381    {
382        Filtered {
383            filter,
384            codec: self,
385            bound: PhantomData,
386        }
387    }
388}
389
390/// Wrapper struct to apply a filter to a codec.
391///
392/// You can construct a value of this type via [`TextCodec::filtered`].
393// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
394pub struct Filtered<F, C, T> {
395    filter: F,
396    codec: C,
397    bound: PhantomData<T>,
398}
399
400impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
401    fn decode(&self, s: String) -> Result<T, Error> {
402        let s = self.filter.preprocess(s);
403        self.codec.decode(s)
404    }
405
406    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
407        self.codec.encode(value)
408    }
409}
410
411/// Text codec which does no transform.
412pub struct Plain;
413
414impl TextCodec<String> for Plain {
415    fn decode(&self, s: String) -> Result<String, Error> {
416        Ok(s)
417    }
418
419    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
420        Ok(Some(Cow::Borrowed(value.as_str())))
421    }
422}
423
424/// Text codec which returns `None` if the input to decode is the empty string, instead of
425/// attempting to decode it.
426///
427/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
428/// `Option<_>` otherwise.
429pub struct EmptyAsNone;
430
431impl<T> TextCodec<Option<T>> for EmptyAsNone
432where
433    T: FromXmlText + AsXmlText,
434{
435    fn decode(&self, s: String) -> Result<Option<T>, Error> {
436        if s.is_empty() {
437            Ok(None)
438        } else {
439            Some(T::from_xml_text(s)).transpose()
440        }
441    }
442
443    fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
444        Ok(value
445            .as_ref()
446            .map(AsXmlText::as_xml_text)
447            .transpose()?
448            .and_then(|v| (!v.is_empty()).then_some(v)))
449    }
450}
451
452/// Text codec which returns None instead of the empty string.
453pub struct EmptyAsError;
454
455impl TextCodec<String> for EmptyAsError {
456    fn decode(&self, s: String) -> Result<String, Error> {
457        if s.is_empty() {
458            Err(Error::Other("Empty text node."))
459        } else {
460            Ok(s)
461        }
462    }
463
464    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
465        if value.is_empty() {
466            Err(Error::Other("Empty text node."))
467        } else {
468            Ok(Some(Cow::Borrowed(value.as_str())))
469        }
470    }
471}
472
473/// Trait for preprocessing text data from XML.
474///
475/// This may be used by codecs to allow to customize some of their behaviour.
476pub trait TextFilter {
477    /// Process the incoming string and return the result of the processing.
478    fn preprocess(&self, s: String) -> String;
479}
480
481/// Text preprocessor which returns the input unchanged.
482pub struct NoFilter;
483
484impl TextFilter for NoFilter {
485    fn preprocess(&self, s: String) -> String {
486        s
487    }
488}
489
490/// Text preprocessor to remove all whitespace.
491pub struct StripWhitespace;
492
493impl TextFilter for StripWhitespace {
494    fn preprocess(&self, s: String) -> String {
495        let s: String = s
496            .chars()
497            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
498            .collect();
499        s
500    }
501}
502
503/// Text codec transforming text to binary using standard `base64`.
504///
505/// `Base64` uses the [`base64::engine::general_purpose::STANDARD`] engine.
506/// [`TextCodec`] is also automatically implemented for any value which
507/// implements [`base64::engine::Engine`], allowing you to choose different
508/// alphabets easily.
509#[cfg(feature = "base64")]
510pub struct Base64;
511
512#[cfg(feature = "base64")]
513impl TextCodec<Vec<u8>> for Base64 {
514    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
515        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
516            .map_err(Error::text_parse_error)
517    }
518
519    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
520        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
521            &StandardBase64Engine,
522            value,
523        ))))
524    }
525}
526
527#[cfg(feature = "base64")]
528impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
529    fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
530        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
531            .map_err(Error::text_parse_error)
532            .map(Cow::Owned)
533    }
534
535    fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
536        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
537            &StandardBase64Engine,
538            value,
539        ))))
540    }
541}
542
543#[cfg(feature = "base64")]
544impl<T> TextCodec<Option<T>> for Base64
545where
546    Base64: TextCodec<T>,
547{
548    fn decode(&self, s: String) -> Result<Option<T>, Error> {
549        if s.is_empty() {
550            return Ok(None);
551        }
552        Ok(Some(self.decode(s)?))
553    }
554
555    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
556        decoded
557            .as_ref()
558            .map(|x| self.encode(x))
559            .transpose()
560            .map(Option::flatten)
561    }
562}
563
564#[cfg(feature = "base64")]
565impl<T: base64::engine::Engine> TextCodec<Vec<u8>> for T {
566    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
567        base64::engine::Engine::decode(self, s.as_bytes()).map_err(Error::text_parse_error)
568    }
569
570    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
571        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
572            self, value,
573        ))))
574    }
575}
576
577#[cfg(feature = "base64")]
578impl<T: base64::engine::Engine, U> TextCodec<Option<U>> for T
579where
580    T: TextCodec<U>,
581{
582    fn decode(&self, s: String) -> Result<Option<U>, Error> {
583        if s.is_empty() {
584            return Ok(None);
585        }
586        Ok(Some(TextCodec::decode(self, s)?))
587    }
588
589    fn encode<'x>(&self, decoded: &'x Option<U>) -> Result<Option<Cow<'x, str>>, Error> {
590        decoded
591            .as_ref()
592            .map(|x| TextCodec::encode(self, x))
593            .transpose()
594            .map(Option::flatten)
595    }
596}
597
598/// Text codec transforming text to binary using hexadecimal nibbles.
599///
600/// The length must be known at compile-time.
601pub struct FixedHex<const N: usize>;
602
603impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
604    fn decode(&self, s: String) -> Result<[u8; N], Error> {
605        if s.len() != 2 * N {
606            return Err(Error::Other("Invalid length"));
607        }
608
609        let mut bytes = [0u8; N];
610        for i in 0..N {
611            bytes[i] =
612                u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
613        }
614
615        Ok(bytes)
616    }
617
618    fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
619        let mut bytes = String::with_capacity(N * 2);
620        for byte in value {
621            bytes.extend(format!("{:02x}", byte).chars());
622        }
623        Ok(Some(Cow::Owned(bytes)))
624    }
625}
626
627impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
628where
629    FixedHex<N>: TextCodec<T>,
630{
631    fn decode(&self, s: String) -> Result<Option<T>, Error> {
632        if s.is_empty() {
633            return Ok(None);
634        }
635        Ok(Some(self.decode(s)?))
636    }
637
638    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
639        decoded
640            .as_ref()
641            .map(|x| self.encode(x))
642            .transpose()
643            .map(Option::flatten)
644    }
645}
646
647/// Text codec for colon-separated bytes of uppercase hexadecimal.
648pub struct ColonSeparatedHex;
649
650impl TextCodec<Vec<u8>> for ColonSeparatedHex {
651    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
652        assert_eq!((s.len() + 1) % 3, 0);
653        let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
654        for i in 0..(1 + s.len()) / 3 {
655            let byte =
656                u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
657            if 3 * i + 2 < s.len() {
658                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
659            }
660            bytes.push(byte);
661        }
662        Ok(bytes)
663    }
664
665    fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
666        // TODO: Super inefficient!
667        let mut bytes = Vec::with_capacity(decoded.len());
668        for byte in decoded {
669            bytes.push(format!("{:02X}", byte));
670        }
671        Ok(Some(Cow::Owned(bytes.join(":"))))
672    }
673}