xso/
text.rs

1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! # Convert data to and from XML text
8//!
9//! This module provides traits and types related to conversion of XML text
10//! data to and from Rust types, as well as the [`AsXmlText`],
11//! [`AsOptionalXmlText`][`crate::AsOptionalXmlText`] and [`FromXmlText`]
12//! implementations for foreign and standard-library types.
13//!
14//! ## Support for types from third-party crates
15//!
16//! Beyond the standard library types, the following additional types are
17//! supported:
18//!
19//! | Feature gate | Types |
20//! | --- | --- |
21//! | `jid` | `jid::Jid`, `jid::BareJid`, `jid::FullJid` |
22//! | `serde_json` | `serde_json::Value` |
23//! | `uuid` | `uuid::Uuid` |
24//!
25//! ### Adding support for more types
26//!
27//! Due to the orphan rule, it is not possible for applications to implement
28//! [`AsXmlText`], [`AsOptionalXmlText`][`crate::AsOptionalXmlText`] or
29//! [`FromXmlText`] on types which originate from third-party crates. Because
30//! of that, we are **extremely liberal** at accepting merge requests for
31//! implementations of these traits for types from third-party crates.
32//!
33//! The only requirement is that the implementation is gated behind a feature
34//! flag which is disabled-by-default.
35//!
36//! ### Workaround for unsupported types
37//!
38//! If making a merge request against `xso` and waiting for a release is not
39//! an option, you can use newtype wrappers in almost all cases, for example:
40//!
41#![cfg_attr(
42    not(all(feature = "std", feature = "macros")),
43    doc = "Because the std or macros feature was not enabled at doc build time, the example cannot be tested.\n\n```ignore\n"
44)]
45#![cfg_attr(all(feature = "std", feature = "macros"), doc = "\n```\n")]
46//! # use xso::{AsXml, FromXml, AsXmlText, FromXmlText, error::Error};
47//! # use std::borrow::Cow;
48//! use std::process::ExitCode;
49//!
50//! struct MyExitCode(ExitCode);
51//!
52//! impl AsXmlText for MyExitCode {
53//!     fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
54//!         match self.0 {
55//!             ExitCode::FAILURE => Ok(Cow::Borrowed("failure")),
56//!             ExitCode::SUCCESS => Ok(Cow::Borrowed("success")),
57//!             _ => Err(Error::Other("unknown exit code")),
58//!         }
59//!     }
60//! }
61//!
62//! impl FromXmlText for MyExitCode {
63//!     fn from_xml_text(s: String) -> Result<Self, Error> {
64//!         match s.as_str() {
65//!             "failure" => Ok(Self(ExitCode::FAILURE)),
66//!             "success" => Ok(Self(ExitCode::SUCCESS)),
67//!             _ => Err(Error::Other("unknown exit code")),
68//!         }
69//!     }
70//! }
71//!
72//! #[derive(AsXml, FromXml)]
73//! #[xml(namespace = "urn:example", name = "process-result")]
74//! struct ProcessResult {
75//!     #[xml(attribute)]
76//!     code: MyExitCode,
77//!     #[xml(text)]
78//!     stdout: String,
79//! }
80//! ```
81//!
82//! Of course, such an approach reduces the usability of your struct (and
83//! comes with issues once references are needed), so making a merge request
84//! against `xso` is generally preferable.
85
86use core::marker::PhantomData;
87
88use alloc::{
89    borrow::Cow,
90    format,
91    string::{String, ToString},
92    vec::Vec,
93};
94
95use crate::{error::Error, AsXmlText, FromXmlText};
96
97#[cfg(feature = "base64")]
98use base64::engine::general_purpose::STANDARD as StandardBase64Engine;
99
100/// # Generate `AsXmlText` and `FromXmlText` implementations
101///
102/// This macro generates an `AsXmlText` implementation which uses
103/// [`Display`][`core::fmt::Display`] and an `FromXmlText` which uses
104/// [`FromStr`][`core::str::FromStr`] for the types it is called on.
105///
106/// ## Syntax
107///
108/// The macro accepts a comma-separated list of types. Optionally, each type
109/// can be preceded by a `#[cfg(..)]` attribute to make the implementations
110/// conditional on a feature.
111///
112/// ## Example
113///
114#[cfg_attr(
115    not(feature = "macros"),
116    doc = "Because the macros feature was not enabled at doc build time, the example cannot be tested.\n\n```ignore\n"
117)]
118#[cfg_attr(feature = "macros", doc = "\n```\n")]
119/// # use xso::convert_via_fromstr_and_display;
120/// # use core::fmt::{self, Display};
121/// # use core::str::FromStr;
122/// struct Foo;
123///
124/// impl FromStr for Foo {
125/// #    type Err = core::convert::Infallible;
126/// #
127/// #    fn from_str(s: &str) -> Result<Self, Self::Err> { todo!() }
128///     /* ... */
129/// }
130///
131/// impl Display for Foo {
132/// #    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { todo!() }
133///     /* ... */
134/// }
135///
136/// convert_via_fromstr_and_display!(
137///     Foo,
138/// );
139/// ```
140#[macro_export]
141macro_rules! convert_via_fromstr_and_display {
142    ($($(#[cfg $cfg:tt])?$t:ty),+ $(,)?) => {
143        $(
144            $(
145                #[cfg $cfg]
146            )?
147            impl $crate::FromXmlText for $t {
148                #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
149                fn from_xml_text(s: String) -> Result<Self, $crate::error::Error> {
150                    s.parse().map_err($crate::error::Error::text_parse_error)
151                }
152            }
153
154            $(
155                #[cfg $cfg]
156            )?
157            impl $crate::AsXmlText for $t {
158                #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
159                fn as_xml_text(&self) -> Result<$crate::exports::alloc::borrow::Cow<'_, str>, $crate::error::Error> {
160                    Ok($crate::exports::alloc::borrow::Cow::Owned(self.to_string()))
161                }
162            }
163        )+
164    }
165}
166
167/// This provides an implementation compliant with xsd::bool.
168impl FromXmlText for bool {
169    /// Parse a boolean from XML text.
170    ///
171    /// The values `"1"` and `"true"` are considered true. The values `"0"`
172    /// and `"false"` are considered `false`. Any other value is invalid and
173    /// will return an error.
174    fn from_xml_text(s: String) -> Result<Self, Error> {
175        match s.as_str() {
176            "1" => "true",
177            "0" => "false",
178            other => other,
179        }
180        .parse()
181        .map_err(Error::text_parse_error)
182    }
183}
184
185/// This provides an implementation compliant with xsd::bool.
186impl AsXmlText for bool {
187    /// Convert a boolean to XML text.
188    ///
189    /// `true` is converted to `"true"` and `false` is converted to `"false"`.
190    /// This implementation never fails.
191    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
192        match self {
193            true => Ok(Cow::Borrowed("true")),
194            false => Ok(Cow::Borrowed("false")),
195        }
196    }
197}
198
199convert_via_fromstr_and_display! {
200    u8,
201    u16,
202    u32,
203    u64,
204    u128,
205    usize,
206    i8,
207    i16,
208    i32,
209    i64,
210    i128,
211    isize,
212    f32,
213    f64,
214    char,
215    core::net::IpAddr,
216    core::net::Ipv4Addr,
217    core::net::Ipv6Addr,
218    core::net::SocketAddr,
219    core::net::SocketAddrV4,
220    core::net::SocketAddrV6,
221    core::num::NonZeroU8,
222    core::num::NonZeroU16,
223    core::num::NonZeroU32,
224    core::num::NonZeroU64,
225    core::num::NonZeroU128,
226    core::num::NonZeroUsize,
227    core::num::NonZeroI8,
228    core::num::NonZeroI16,
229    core::num::NonZeroI32,
230    core::num::NonZeroI64,
231    core::num::NonZeroI128,
232    core::num::NonZeroIsize,
233
234    #[cfg(feature = "uuid")]
235    uuid::Uuid,
236
237    #[cfg(feature = "jid")]
238    jid::Jid,
239    #[cfg(feature = "jid")]
240    jid::FullJid,
241    #[cfg(feature = "jid")]
242    jid::BareJid,
243    #[cfg(feature = "jid")]
244    jid::NodePart,
245    #[cfg(feature = "jid")]
246    jid::DomainPart,
247    #[cfg(feature = "jid")]
248    jid::ResourcePart,
249
250    #[cfg(feature = "serde_json")]
251    serde_json::Value,
252}
253
254/// Represent a way to encode/decode text data into a Rust type.
255///
256/// This trait can be used in scenarios where implementing [`FromXmlText`]
257/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
258/// following:
259///
260/// 1. The type originates in a foreign crate, preventing the implementation
261///    of foreign traits.
262///
263/// 2. There is more than one way to convert a value to/from XML.
264///
265/// The codec to use for a text can be specified in the attributes understood
266/// by `FromXml` and `AsXml` derive macros. See the documentation of the
267/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
268#[diagnostic::on_unimplemented(
269    message = "`{Self}` cannot be used as XML text codec for values of type `{T}`."
270)]
271pub trait TextCodec<T> {
272    /// Decode a string value into the type.
273    fn decode(&self, s: String) -> Result<T, Error>;
274
275    /// Encode the type as string value.
276    ///
277    /// If this returns `None`, the string value is not emitted at all.
278    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
279
280    /// Apply a filter to this codec.
281    ///
282    /// Filters preprocess strings before they are handed to the codec for
283    /// parsing, allowing to, for example, make the codec ignore irrelevant
284    /// content by stripping it.
285    // NOTE: The bound on T is needed because any given type A may implement
286    // TextCodec for any number of types. If we pass T down to the `Filtered`
287    // struct, rustc can do type inference on which `TextCodec`
288    // implementation the `filtered` method is supposed to have been called
289    // on.
290    fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
291    where
292        // placing the bound here (instead of on the `TextCodec<T>` trait
293        // itself) preserves object-safety of TextCodec<T>.
294        Self: Sized,
295    {
296        Filtered {
297            filter,
298            codec: self,
299            bound: PhantomData,
300        }
301    }
302}
303
304/// Wrapper struct to apply a filter to a codec.
305///
306/// You can construct a value of this type via [`TextCodec::filtered`].
307// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
308pub struct Filtered<F, C, T> {
309    filter: F,
310    codec: C,
311    bound: PhantomData<T>,
312}
313
314impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
315    fn decode(&self, s: String) -> Result<T, Error> {
316        let s = self.filter.preprocess(s);
317        self.codec.decode(s)
318    }
319
320    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
321        self.codec.encode(value)
322    }
323}
324
325/// Text codec which does no transform.
326pub struct Plain;
327
328impl TextCodec<String> for Plain {
329    fn decode(&self, s: String) -> Result<String, Error> {
330        Ok(s)
331    }
332
333    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
334        Ok(Some(Cow::Borrowed(value.as_str())))
335    }
336}
337
338/// Text codec which returns `None` if the input to decode is the empty string, instead of
339/// attempting to decode it.
340///
341/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
342/// `Option<_>` otherwise.
343pub struct EmptyAsNone;
344
345impl<T> TextCodec<Option<T>> for EmptyAsNone
346where
347    T: FromXmlText + AsXmlText,
348{
349    fn decode(&self, s: String) -> Result<Option<T>, Error> {
350        if s.is_empty() {
351            Ok(None)
352        } else {
353            Some(T::from_xml_text(s)).transpose()
354        }
355    }
356
357    fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
358        Ok(value
359            .as_ref()
360            .map(AsXmlText::as_xml_text)
361            .transpose()?
362            .and_then(|v| (!v.is_empty()).then_some(v)))
363    }
364}
365
366/// Text codec which returns None instead of the empty string.
367pub struct EmptyAsError;
368
369impl TextCodec<String> for EmptyAsError {
370    fn decode(&self, s: String) -> Result<String, Error> {
371        if s.is_empty() {
372            Err(Error::Other("Empty text node."))
373        } else {
374            Ok(s)
375        }
376    }
377
378    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
379        if value.is_empty() {
380            Err(Error::Other("Empty text node."))
381        } else {
382            Ok(Some(Cow::Borrowed(value.as_str())))
383        }
384    }
385}
386
387/// Trait for preprocessing text data from XML.
388///
389/// This may be used by codecs to allow to customize some of their behaviour.
390pub trait TextFilter {
391    /// Process the incoming string and return the result of the processing.
392    fn preprocess(&self, s: String) -> String;
393}
394
395/// Text preprocessor which returns the input unchanged.
396pub struct NoFilter;
397
398impl TextFilter for NoFilter {
399    fn preprocess(&self, s: String) -> String {
400        s
401    }
402}
403
404/// Text preprocessor to remove all whitespace.
405pub struct StripWhitespace;
406
407impl TextFilter for StripWhitespace {
408    fn preprocess(&self, s: String) -> String {
409        let s: String = s
410            .chars()
411            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
412            .collect();
413        s
414    }
415}
416
417/// Text codec transforming text to binary using standard `base64`.
418///
419/// `Base64` uses the [`base64::engine::general_purpose::STANDARD`] engine.
420/// [`TextCodec`] is also automatically implemented for any value which
421/// implements [`base64::engine::Engine`], allowing you to choose different
422/// alphabets easily.
423#[cfg(feature = "base64")]
424pub struct Base64;
425
426#[cfg(feature = "base64")]
427impl TextCodec<Vec<u8>> for Base64 {
428    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
429        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
430            .map_err(Error::text_parse_error)
431    }
432
433    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
434        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
435            &StandardBase64Engine,
436            value,
437        ))))
438    }
439}
440
441#[cfg(feature = "base64")]
442impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
443    fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
444        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
445            .map_err(Error::text_parse_error)
446            .map(Cow::Owned)
447    }
448
449    fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
450        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
451            &StandardBase64Engine,
452            value,
453        ))))
454    }
455}
456
457#[cfg(feature = "base64")]
458impl<T> TextCodec<Option<T>> for Base64
459where
460    Base64: TextCodec<T>,
461{
462    fn decode(&self, s: String) -> Result<Option<T>, Error> {
463        if s.is_empty() {
464            return Ok(None);
465        }
466        Ok(Some(self.decode(s)?))
467    }
468
469    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
470        decoded
471            .as_ref()
472            .map(|x| self.encode(x))
473            .transpose()
474            .map(Option::flatten)
475    }
476}
477
478#[cfg(feature = "base64")]
479impl<T: base64::engine::Engine> TextCodec<Vec<u8>> for T {
480    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
481        base64::engine::Engine::decode(self, s.as_bytes()).map_err(Error::text_parse_error)
482    }
483
484    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
485        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
486            self, value,
487        ))))
488    }
489}
490
491#[cfg(feature = "base64")]
492impl<T: base64::engine::Engine, U> TextCodec<Option<U>> for T
493where
494    T: TextCodec<U>,
495{
496    fn decode(&self, s: String) -> Result<Option<U>, Error> {
497        if s.is_empty() {
498            return Ok(None);
499        }
500        Ok(Some(TextCodec::decode(self, s)?))
501    }
502
503    fn encode<'x>(&self, decoded: &'x Option<U>) -> Result<Option<Cow<'x, str>>, Error> {
504        decoded
505            .as_ref()
506            .map(|x| TextCodec::encode(self, x))
507            .transpose()
508            .map(Option::flatten)
509    }
510}
511
512/// Text codec transforming text to binary using hexadecimal nibbles.
513///
514/// The length must be known at compile-time.
515pub struct FixedHex<const N: usize>;
516
517impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
518    fn decode(&self, s: String) -> Result<[u8; N], Error> {
519        if s.len() != 2 * N {
520            return Err(Error::Other("Invalid length"));
521        }
522
523        let mut bytes = [0u8; N];
524        for i in 0..N {
525            bytes[i] =
526                u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
527        }
528
529        Ok(bytes)
530    }
531
532    fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
533        let mut bytes = String::with_capacity(N * 2);
534        for byte in value {
535            bytes.extend(format!("{:02x}", byte).chars());
536        }
537        Ok(Some(Cow::Owned(bytes)))
538    }
539}
540
541impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
542where
543    FixedHex<N>: TextCodec<T>,
544{
545    fn decode(&self, s: String) -> Result<Option<T>, Error> {
546        if s.is_empty() {
547            return Ok(None);
548        }
549        Ok(Some(self.decode(s)?))
550    }
551
552    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
553        decoded
554            .as_ref()
555            .map(|x| self.encode(x))
556            .transpose()
557            .map(Option::flatten)
558    }
559}
560
561/// Text codec for colon-separated bytes of uppercase hexadecimal.
562pub struct ColonSeparatedHex;
563
564impl TextCodec<Vec<u8>> for ColonSeparatedHex {
565    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
566        assert_eq!((s.len() + 1) % 3, 0);
567        let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
568        for i in 0..(1 + s.len()) / 3 {
569            let byte =
570                u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
571            if 3 * i + 2 < s.len() {
572                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
573            }
574            bytes.push(byte);
575        }
576        Ok(bytes)
577    }
578
579    fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
580        // TODO: Super inefficient!
581        let mut bytes = Vec::with_capacity(decoded.len());
582        for byte in decoded {
583            bytes.push(format!("{:02X}", byte));
584        }
585        Ok(Some(Cow::Owned(bytes.join(":"))))
586    }
587}