xso/
text.rs

1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! # Convert data to and from XML text
8//!
9//! This module provides traits and types related to conversion of XML text
10//! data to and from Rust types, as well as the [`AsXmlText`],
11//! [`AsOptionalXmlText`][`crate::AsOptionalXmlText`] and [`FromXmlText`]
12//! implementations for foreign and standard-library types.
13//!
14//! ## Support for types from third-party crates
15//!
16//! Beyond the standard library types, the following additional types are
17//! supported:
18//!
19//! | Feature gate | Types |
20//! | --- | --- |
21//! | `jid` | `jid::Jid`, `jid::BareJid`, `jid::FullJid` |
22//! | `serde_json` | `serde_json::Value` |
23//! | `uuid` | `uuid::Uuid` |
24//!
25//! ### Adding support for more types
26//!
27//! Due to the orphan rule, it is not possible for applications to implement
28//! [`AsXmlText`], [`AsOptionalXmlText`][`crate::AsOptionalXmlText`] or
29//! [`FromXmlText`] on types which originate from third-party crates. Because
30//! of that, we are **extremely liberal** at accepting merge requests for
31//! implementations of these traits for types from third-party crates.
32//!
33//! The only requirement is that the implementation is gated behind a feature
34//! flag which is disabled-by-default.
35//!
36//! ### Workaround for unsupported types
37//!
38//! If making a merge request against `xso` and waiting for a release is not
39//! an option, you can use newtype wrappers in almost all cases, for example:
40//!
41#![cfg_attr(
42    not(all(feature = "std", feature = "macros")),
43    doc = "Because the std or macros feature was not enabled at doc build time, the example cannot be tested.\n\n```ignore\n"
44)]
45#![cfg_attr(all(feature = "std", feature = "macros"), doc = "\n```\n")]
46//! # use xso::{AsXml, FromXml, AsXmlText, FromXmlText, error::Error};
47//! # use std::borrow::Cow;
48//! use std::process::ExitCode;
49//!
50//! struct MyExitCode(ExitCode);
51//!
52//! impl AsXmlText for MyExitCode {
53//!     fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
54//!         match self.0 {
55//!             ExitCode::FAILURE => Ok(Cow::Borrowed("failure")),
56//!             ExitCode::SUCCESS => Ok(Cow::Borrowed("success")),
57//!             _ => Err(Error::Other("unknown exit code")),
58//!         }
59//!     }
60//! }
61//!
62//! impl FromXmlText for MyExitCode {
63//!     fn from_xml_text(s: String) -> Result<Self, Error> {
64//!         match s.as_str() {
65//!             "failure" => Ok(Self(ExitCode::FAILURE)),
66//!             "success" => Ok(Self(ExitCode::SUCCESS)),
67//!             _ => Err(Error::Other("unknown exit code")),
68//!         }
69//!     }
70//! }
71//!
72//! #[derive(AsXml, FromXml)]
73//! #[xml(namespace = "urn:example", name = "process-result")]
74//! struct ProcessResult {
75//!     #[xml(attribute)]
76//!     code: MyExitCode,
77//!     #[xml(text)]
78//!     stdout: String,
79//! }
80//! ```
81//!
82//! Of course, such an approach reduces the usability of your struct (and
83//! comes with issues once references are needed), so making a merge request
84//! against `xso` is generally preferable.
85
86use core::marker::PhantomData;
87
88use alloc::{
89    borrow::Cow,
90    format,
91    string::{String, ToString},
92    vec::Vec,
93};
94
95use crate::{error::Error, AsXmlText, FromXmlText};
96
97#[cfg(feature = "base64")]
98use base64::engine::general_purpose::STANDARD as StandardBase64Engine;
99
100macro_rules! convert_via_fromstr_and_display {
101    ($($(#[cfg $cfg:tt])?$t:ty,)+) => {
102        $(
103            $(
104                #[cfg $cfg]
105            )?
106            impl FromXmlText for $t {
107                #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
108                fn from_xml_text(s: String) -> Result<Self, Error> {
109                    s.parse().map_err(Error::text_parse_error)
110                }
111            }
112
113            $(
114                #[cfg $cfg]
115            )?
116            impl AsXmlText for $t {
117                #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
118                fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
119                    Ok(Cow::Owned(self.to_string()))
120                }
121            }
122        )+
123    }
124}
125
126/// This provides an implementation compliant with xsd::bool.
127impl FromXmlText for bool {
128    /// Parse a boolean from XML text.
129    ///
130    /// The values `"1"` and `"true"` are considered true. The values `"0"`
131    /// and `"false"` are considered `false`. Any other value is invalid and
132    /// will return an error.
133    fn from_xml_text(s: String) -> Result<Self, Error> {
134        match s.as_str() {
135            "1" => "true",
136            "0" => "false",
137            other => other,
138        }
139        .parse()
140        .map_err(Error::text_parse_error)
141    }
142}
143
144/// This provides an implementation compliant with xsd::bool.
145impl AsXmlText for bool {
146    /// Convert a boolean to XML text.
147    ///
148    /// `true` is converted to `"true"` and `false` is converted to `"false"`.
149    /// This implementation never fails.
150    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
151        match self {
152            true => Ok(Cow::Borrowed("true")),
153            false => Ok(Cow::Borrowed("false")),
154        }
155    }
156}
157
158convert_via_fromstr_and_display! {
159    u8,
160    u16,
161    u32,
162    u64,
163    u128,
164    usize,
165    i8,
166    i16,
167    i32,
168    i64,
169    i128,
170    isize,
171    f32,
172    f64,
173    char,
174    core::net::IpAddr,
175    core::net::Ipv4Addr,
176    core::net::Ipv6Addr,
177    core::net::SocketAddr,
178    core::net::SocketAddrV4,
179    core::net::SocketAddrV6,
180    core::num::NonZeroU8,
181    core::num::NonZeroU16,
182    core::num::NonZeroU32,
183    core::num::NonZeroU64,
184    core::num::NonZeroU128,
185    core::num::NonZeroUsize,
186    core::num::NonZeroI8,
187    core::num::NonZeroI16,
188    core::num::NonZeroI32,
189    core::num::NonZeroI64,
190    core::num::NonZeroI128,
191    core::num::NonZeroIsize,
192
193    #[cfg(feature = "uuid")]
194    uuid::Uuid,
195
196    #[cfg(feature = "jid")]
197    jid::Jid,
198    #[cfg(feature = "jid")]
199    jid::FullJid,
200    #[cfg(feature = "jid")]
201    jid::BareJid,
202    #[cfg(feature = "jid")]
203    jid::NodePart,
204    #[cfg(feature = "jid")]
205    jid::DomainPart,
206    #[cfg(feature = "jid")]
207    jid::ResourcePart,
208
209    #[cfg(feature = "serde_json")]
210    serde_json::Value,
211}
212
213/// Represent a way to encode/decode text data into a Rust type.
214///
215/// This trait can be used in scenarios where implementing [`FromXmlText`]
216/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
217/// following:
218///
219/// 1. The type originates in a foreign crate, preventing the implementation
220///    of foreign traits.
221///
222/// 2. There is more than one way to convert a value to/from XML.
223///
224/// The codec to use for a text can be specified in the attributes understood
225/// by `FromXml` and `AsXml` derive macros. See the documentation of the
226/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
227pub trait TextCodec<T> {
228    /// Decode a string value into the type.
229    fn decode(&self, s: String) -> Result<T, Error>;
230
231    /// Encode the type as string value.
232    ///
233    /// If this returns `None`, the string value is not emitted at all.
234    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
235
236    /// Apply a filter to this codec.
237    ///
238    /// Filters preprocess strings before they are handed to the codec for
239    /// parsing, allowing to, for example, make the codec ignore irrelevant
240    /// content by stripping it.
241    // NOTE: The bound on T is needed because any given type A may implement
242    // TextCodec for any number of types. If we pass T down to the `Filtered`
243    // struct, rustc can do type inference on which `TextCodec`
244    // implementation the `filtered` method is supposed to have been called
245    // on.
246    fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
247    where
248        // placing the bound here (instead of on the `TextCodec<T>` trait
249        // itself) preserves object-safety of TextCodec<T>.
250        Self: Sized,
251    {
252        Filtered {
253            filter,
254            codec: self,
255            bound: PhantomData,
256        }
257    }
258}
259
260/// Wrapper struct to apply a filter to a codec.
261///
262/// You can construct a value of this type via [`TextCodec::filtered`].
263// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
264pub struct Filtered<F, C, T> {
265    filter: F,
266    codec: C,
267    bound: PhantomData<T>,
268}
269
270impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
271    fn decode(&self, s: String) -> Result<T, Error> {
272        let s = self.filter.preprocess(s);
273        self.codec.decode(s)
274    }
275
276    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
277        self.codec.encode(value)
278    }
279}
280
281/// Text codec which does no transform.
282pub struct Plain;
283
284impl TextCodec<String> for Plain {
285    fn decode(&self, s: String) -> Result<String, Error> {
286        Ok(s)
287    }
288
289    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
290        Ok(Some(Cow::Borrowed(value.as_str())))
291    }
292}
293
294/// Text codec which returns `None` if the input to decode is the empty string, instead of
295/// attempting to decode it.
296///
297/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
298/// `Option<_>` otherwise.
299pub struct EmptyAsNone;
300
301impl<T> TextCodec<Option<T>> for EmptyAsNone
302where
303    T: FromXmlText + AsXmlText,
304{
305    fn decode(&self, s: String) -> Result<Option<T>, Error> {
306        if s.is_empty() {
307            Ok(None)
308        } else {
309            Some(T::from_xml_text(s)).transpose()
310        }
311    }
312
313    fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
314        Ok(value
315            .as_ref()
316            .map(AsXmlText::as_xml_text)
317            .transpose()?
318            .and_then(|v| (!v.is_empty()).then_some(v)))
319    }
320}
321
322/// Text codec which returns None instead of the empty string.
323pub struct EmptyAsError;
324
325impl TextCodec<String> for EmptyAsError {
326    fn decode(&self, s: String) -> Result<String, Error> {
327        if s.is_empty() {
328            Err(Error::Other("Empty text node."))
329        } else {
330            Ok(s)
331        }
332    }
333
334    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
335        if value.is_empty() {
336            Err(Error::Other("Empty text node."))
337        } else {
338            Ok(Some(Cow::Borrowed(value.as_str())))
339        }
340    }
341}
342
343/// Trait for preprocessing text data from XML.
344///
345/// This may be used by codecs to allow to customize some of their behaviour.
346pub trait TextFilter {
347    /// Process the incoming string and return the result of the processing.
348    fn preprocess(&self, s: String) -> String;
349}
350
351/// Text preprocessor which returns the input unchanged.
352pub struct NoFilter;
353
354impl TextFilter for NoFilter {
355    fn preprocess(&self, s: String) -> String {
356        s
357    }
358}
359
360/// Text preprocessor to remove all whitespace.
361pub struct StripWhitespace;
362
363impl TextFilter for StripWhitespace {
364    fn preprocess(&self, s: String) -> String {
365        let s: String = s
366            .chars()
367            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
368            .collect();
369        s
370    }
371}
372
373/// Text codec transforming text to binary using standard `base64`.
374///
375/// `Base64` uses the [`base64::engine::general_purpose::STANDARD`] engine.
376/// [`TextCodec`] is also automatically implemented for any value which
377/// implements [`base64::engine::Engine`], allowing you to choose different
378/// alphabets easily.
379#[cfg(feature = "base64")]
380pub struct Base64;
381
382#[cfg(feature = "base64")]
383impl TextCodec<Vec<u8>> for Base64 {
384    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
385        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
386            .map_err(Error::text_parse_error)
387    }
388
389    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
390        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
391            &StandardBase64Engine,
392            value,
393        ))))
394    }
395}
396
397#[cfg(feature = "base64")]
398impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
399    fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
400        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
401            .map_err(Error::text_parse_error)
402            .map(Cow::Owned)
403    }
404
405    fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
406        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
407            &StandardBase64Engine,
408            value,
409        ))))
410    }
411}
412
413#[cfg(feature = "base64")]
414impl<T> TextCodec<Option<T>> for Base64
415where
416    Base64: TextCodec<T>,
417{
418    fn decode(&self, s: String) -> Result<Option<T>, Error> {
419        if s.is_empty() {
420            return Ok(None);
421        }
422        Ok(Some(self.decode(s)?))
423    }
424
425    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
426        decoded
427            .as_ref()
428            .map(|x| self.encode(x))
429            .transpose()
430            .map(Option::flatten)
431    }
432}
433
434#[cfg(feature = "base64")]
435impl<T: base64::engine::Engine> TextCodec<Vec<u8>> for T {
436    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
437        base64::engine::Engine::decode(self, s.as_bytes()).map_err(Error::text_parse_error)
438    }
439
440    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
441        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
442            self, value,
443        ))))
444    }
445}
446
447#[cfg(feature = "base64")]
448impl<T: base64::engine::Engine, U> TextCodec<Option<U>> for T
449where
450    T: TextCodec<U>,
451{
452    fn decode(&self, s: String) -> Result<Option<U>, Error> {
453        if s.is_empty() {
454            return Ok(None);
455        }
456        Ok(Some(TextCodec::decode(self, s)?))
457    }
458
459    fn encode<'x>(&self, decoded: &'x Option<U>) -> Result<Option<Cow<'x, str>>, Error> {
460        decoded
461            .as_ref()
462            .map(|x| TextCodec::encode(self, x))
463            .transpose()
464            .map(Option::flatten)
465    }
466}
467
468/// Text codec transforming text to binary using hexadecimal nibbles.
469///
470/// The length must be known at compile-time.
471pub struct FixedHex<const N: usize>;
472
473impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
474    fn decode(&self, s: String) -> Result<[u8; N], Error> {
475        if s.len() != 2 * N {
476            return Err(Error::Other("Invalid length"));
477        }
478
479        let mut bytes = [0u8; N];
480        for i in 0..N {
481            bytes[i] =
482                u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
483        }
484
485        Ok(bytes)
486    }
487
488    fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
489        let mut bytes = String::with_capacity(N * 2);
490        for byte in value {
491            bytes.extend(format!("{:02x}", byte).chars());
492        }
493        Ok(Some(Cow::Owned(bytes)))
494    }
495}
496
497impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
498where
499    FixedHex<N>: TextCodec<T>,
500{
501    fn decode(&self, s: String) -> Result<Option<T>, Error> {
502        if s.is_empty() {
503            return Ok(None);
504        }
505        Ok(Some(self.decode(s)?))
506    }
507
508    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
509        decoded
510            .as_ref()
511            .map(|x| self.encode(x))
512            .transpose()
513            .map(Option::flatten)
514    }
515}
516
517/// Text codec for colon-separated bytes of uppercase hexadecimal.
518pub struct ColonSeparatedHex;
519
520impl TextCodec<Vec<u8>> for ColonSeparatedHex {
521    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
522        assert_eq!((s.len() + 1) % 3, 0);
523        let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
524        for i in 0..(1 + s.len()) / 3 {
525            let byte =
526                u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
527            if 3 * i + 2 < s.len() {
528                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
529            }
530            bytes.push(byte);
531        }
532        Ok(bytes)
533    }
534
535    fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
536        // TODO: Super inefficient!
537        let mut bytes = Vec::with_capacity(decoded.len());
538        for byte in decoded {
539            bytes.push(format!("{:02X}", byte));
540        }
541        Ok(Some(Cow::Owned(bytes.join(":"))))
542    }
543}