xso/
text.rs

1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! Module containing implementations for conversions to/from XML text.
8
9use core::marker::PhantomData;
10
11use alloc::{
12    borrow::Cow,
13    format,
14    string::{String, ToString},
15    vec::Vec,
16};
17
18use crate::{error::Error, AsXmlText, FromXmlText};
19
20#[cfg(feature = "base64")]
21use base64::engine::general_purpose::STANDARD as StandardBase64Engine;
22
23macro_rules! convert_via_fromstr_and_display {
24    ($($(#[cfg $cfg:tt])?$t:ty,)+) => {
25        $(
26            $(
27                #[cfg $cfg]
28            )?
29            impl FromXmlText for $t {
30                #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
31                fn from_xml_text(s: String) -> Result<Self, Error> {
32                    s.parse().map_err(Error::text_parse_error)
33                }
34            }
35
36            $(
37                #[cfg $cfg]
38            )?
39            impl AsXmlText for $t {
40                #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
41                fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
42                    Ok(Cow::Owned(self.to_string()))
43                }
44            }
45        )+
46    }
47}
48
49/// This provides an implementation compliant with xsd::bool.
50impl FromXmlText for bool {
51    /// Parse a boolean from XML text.
52    ///
53    /// The values `"1"` and `"true"` are considered true. The values `"0"`
54    /// and `"false"` are considered `false`. Any other value is invalid and
55    /// will return an error.
56    fn from_xml_text(s: String) -> Result<Self, Error> {
57        match s.as_str() {
58            "1" => "true",
59            "0" => "false",
60            other => other,
61        }
62        .parse()
63        .map_err(Error::text_parse_error)
64    }
65}
66
67/// This provides an implementation compliant with xsd::bool.
68impl AsXmlText for bool {
69    /// Convert a boolean to XML text.
70    ///
71    /// `true` is converted to `"true"` and `false` is converted to `"false"`.
72    /// This implementation never fails.
73    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
74        match self {
75            true => Ok(Cow::Borrowed("true")),
76            false => Ok(Cow::Borrowed("false")),
77        }
78    }
79}
80
81convert_via_fromstr_and_display! {
82    u8,
83    u16,
84    u32,
85    u64,
86    u128,
87    usize,
88    i8,
89    i16,
90    i32,
91    i64,
92    i128,
93    isize,
94    f32,
95    f64,
96    char,
97    core::net::IpAddr,
98    core::net::Ipv4Addr,
99    core::net::Ipv6Addr,
100    core::net::SocketAddr,
101    core::net::SocketAddrV4,
102    core::net::SocketAddrV6,
103    core::num::NonZeroU8,
104    core::num::NonZeroU16,
105    core::num::NonZeroU32,
106    core::num::NonZeroU64,
107    core::num::NonZeroU128,
108    core::num::NonZeroUsize,
109    core::num::NonZeroI8,
110    core::num::NonZeroI16,
111    core::num::NonZeroI32,
112    core::num::NonZeroI64,
113    core::num::NonZeroI128,
114    core::num::NonZeroIsize,
115
116    #[cfg(feature = "uuid")]
117    uuid::Uuid,
118
119    #[cfg(feature = "jid")]
120    jid::Jid,
121    #[cfg(feature = "jid")]
122    jid::FullJid,
123    #[cfg(feature = "jid")]
124    jid::BareJid,
125    #[cfg(feature = "jid")]
126    jid::NodePart,
127    #[cfg(feature = "jid")]
128    jid::DomainPart,
129    #[cfg(feature = "jid")]
130    jid::ResourcePart,
131
132    #[cfg(feature = "serde_json")]
133    serde_json::Value,
134}
135
136/// Represent a way to encode/decode text data into a Rust type.
137///
138/// This trait can be used in scenarios where implementing [`FromXmlText`]
139/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
140/// following:
141///
142/// 1. The type originates in a foreign crate, preventing the implementation
143///    of foreign traits.
144///
145/// 2. There is more than one way to convert a value to/from XML.
146///
147/// The codec to use for a text can be specified in the attributes understood
148/// by `FromXml` and `AsXml` derive macros. See the documentation of the
149/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
150pub trait TextCodec<T> {
151    /// Decode a string value into the type.
152    fn decode(&self, s: String) -> Result<T, Error>;
153
154    /// Encode the type as string value.
155    ///
156    /// If this returns `None`, the string value is not emitted at all.
157    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
158
159    /// Apply a filter to this codec.
160    ///
161    /// Filters preprocess strings before they are handed to the codec for
162    /// parsing, allowing to, for example, make the codec ignore irrelevant
163    /// content by stripping it.
164    // NOTE: The bound on T is needed because any given type A may implement
165    // TextCodec for any number of types. If we pass T down to the `Filtered`
166    // struct, rustc can do type inference on which `TextCodec`
167    // implementation the `filtered` method is supposed to have been called
168    // on.
169    fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
170    where
171        // placing the bound here (instead of on the `TextCodec<T>` trait
172        // itself) preserves object-safety of TextCodec<T>.
173        Self: Sized,
174    {
175        Filtered {
176            filter,
177            codec: self,
178            bound: PhantomData,
179        }
180    }
181}
182
183/// Wrapper struct to apply a filter to a codec.
184///
185/// You can construct a value of this type via [`TextCodec::filtered`].
186// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
187pub struct Filtered<F, C, T> {
188    filter: F,
189    codec: C,
190    bound: PhantomData<T>,
191}
192
193impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
194    fn decode(&self, s: String) -> Result<T, Error> {
195        let s = self.filter.preprocess(s);
196        self.codec.decode(s)
197    }
198
199    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
200        self.codec.encode(value)
201    }
202}
203
204/// Text codec which does no transform.
205pub struct Plain;
206
207impl TextCodec<String> for Plain {
208    fn decode(&self, s: String) -> Result<String, Error> {
209        Ok(s)
210    }
211
212    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
213        Ok(Some(Cow::Borrowed(value.as_str())))
214    }
215}
216
217/// Text codec which returns `None` if the input to decode is the empty string, instead of
218/// attempting to decode it.
219///
220/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
221/// `Option<_>` otherwise.
222pub struct EmptyAsNone;
223
224impl<T> TextCodec<Option<T>> for EmptyAsNone
225where
226    T: FromXmlText + AsXmlText,
227{
228    fn decode(&self, s: String) -> Result<Option<T>, Error> {
229        if s.is_empty() {
230            Ok(None)
231        } else {
232            Some(T::from_xml_text(s)).transpose()
233        }
234    }
235
236    fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
237        Ok(value
238            .as_ref()
239            .map(AsXmlText::as_xml_text)
240            .transpose()?
241            .and_then(|v| (!v.is_empty()).then_some(v)))
242    }
243}
244
245/// Text codec which returns None instead of the empty string.
246pub struct EmptyAsError;
247
248impl TextCodec<String> for EmptyAsError {
249    fn decode(&self, s: String) -> Result<String, Error> {
250        if s.is_empty() {
251            Err(Error::Other("Empty text node."))
252        } else {
253            Ok(s)
254        }
255    }
256
257    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
258        if value.is_empty() {
259            Err(Error::Other("Empty text node."))
260        } else {
261            Ok(Some(Cow::Borrowed(value.as_str())))
262        }
263    }
264}
265
266/// Trait for preprocessing text data from XML.
267///
268/// This may be used by codecs to allow to customize some of their behaviour.
269pub trait TextFilter {
270    /// Process the incoming string and return the result of the processing.
271    fn preprocess(&self, s: String) -> String;
272}
273
274/// Text preprocessor which returns the input unchanged.
275pub struct NoFilter;
276
277impl TextFilter for NoFilter {
278    fn preprocess(&self, s: String) -> String {
279        s
280    }
281}
282
283/// Text preprocessor to remove all whitespace.
284pub struct StripWhitespace;
285
286impl TextFilter for StripWhitespace {
287    fn preprocess(&self, s: String) -> String {
288        let s: String = s
289            .chars()
290            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
291            .collect();
292        s
293    }
294}
295
296/// Text codec transforming text to binary using standard `base64`.
297///
298/// `Base64` uses the [`base64::engine::general_purpose::STANDARD`] engine.
299/// [`TextCodec`] is also automatically implemented for any value which
300/// implements [`base64::engine::Engine`], allowing you to choose different
301/// alphabets easily.
302#[cfg(feature = "base64")]
303pub struct Base64;
304
305#[cfg(feature = "base64")]
306impl TextCodec<Vec<u8>> for Base64 {
307    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
308        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
309            .map_err(Error::text_parse_error)
310    }
311
312    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
313        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
314            &StandardBase64Engine,
315            &value,
316        ))))
317    }
318}
319
320#[cfg(feature = "base64")]
321impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
322    fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
323        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
324            .map_err(Error::text_parse_error)
325            .map(Cow::Owned)
326    }
327
328    fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
329        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
330            &StandardBase64Engine,
331            &value,
332        ))))
333    }
334}
335
336#[cfg(feature = "base64")]
337impl<T> TextCodec<Option<T>> for Base64
338where
339    Base64: TextCodec<T>,
340{
341    fn decode(&self, s: String) -> Result<Option<T>, Error> {
342        if s.is_empty() {
343            return Ok(None);
344        }
345        Ok(Some(self.decode(s)?))
346    }
347
348    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
349        decoded
350            .as_ref()
351            .map(|x| self.encode(x))
352            .transpose()
353            .map(Option::flatten)
354    }
355}
356
357#[cfg(feature = "base64")]
358impl<T: base64::engine::Engine> TextCodec<Vec<u8>> for T {
359    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
360        base64::engine::Engine::decode(self, s.as_bytes()).map_err(Error::text_parse_error)
361    }
362
363    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
364        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
365            self, &value,
366        ))))
367    }
368}
369
370#[cfg(feature = "base64")]
371impl<'a, T: base64::engine::Engine, U> TextCodec<Option<U>> for T
372where
373    T: TextCodec<U>,
374{
375    fn decode(&self, s: String) -> Result<Option<U>, Error> {
376        if s.is_empty() {
377            return Ok(None);
378        }
379        Ok(Some(TextCodec::decode(self, s)?))
380    }
381
382    fn encode<'x>(&self, decoded: &'x Option<U>) -> Result<Option<Cow<'x, str>>, Error> {
383        decoded
384            .as_ref()
385            .map(|x| TextCodec::encode(self, x))
386            .transpose()
387            .map(Option::flatten)
388    }
389}
390
391/// Text codec transforming text to binary using hexadecimal nibbles.
392///
393/// The length must be known at compile-time.
394pub struct FixedHex<const N: usize>;
395
396impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
397    fn decode(&self, s: String) -> Result<[u8; N], Error> {
398        if s.len() != 2 * N {
399            return Err(Error::Other("Invalid length"));
400        }
401
402        let mut bytes = [0u8; N];
403        for i in 0..N {
404            bytes[i] =
405                u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
406        }
407
408        Ok(bytes)
409    }
410
411    fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
412        let mut bytes = String::with_capacity(N * 2);
413        for byte in value {
414            bytes.extend(format!("{:02x}", byte).chars());
415        }
416        Ok(Some(Cow::Owned(bytes)))
417    }
418}
419
420impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
421where
422    FixedHex<N>: TextCodec<T>,
423{
424    fn decode(&self, s: String) -> Result<Option<T>, Error> {
425        if s.is_empty() {
426            return Ok(None);
427        }
428        Ok(Some(self.decode(s)?))
429    }
430
431    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
432        decoded
433            .as_ref()
434            .map(|x| self.encode(x))
435            .transpose()
436            .map(Option::flatten)
437    }
438}
439
440/// Text codec for colon-separated bytes of uppercase hexadecimal.
441pub struct ColonSeparatedHex;
442
443impl TextCodec<Vec<u8>> for ColonSeparatedHex {
444    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
445        assert_eq!((s.len() + 1) % 3, 0);
446        let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
447        for i in 0..(1 + s.len()) / 3 {
448            let byte =
449                u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
450            if 3 * i + 2 < s.len() {
451                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
452            }
453            bytes.push(byte);
454        }
455        Ok(bytes)
456    }
457
458    fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
459        // TODO: Super inefficient!
460        let mut bytes = Vec::with_capacity(decoded.len());
461        for byte in decoded {
462            bytes.push(format!("{:02X}", byte));
463        }
464        Ok(Some(Cow::Owned(bytes.join(":"))))
465    }
466}