xso/
text.rs

1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! Module containing implementations for conversions to/from XML text.
8
9use core::marker::PhantomData;
10
11use alloc::{
12    borrow::Cow,
13    format,
14    string::{String, ToString},
15    vec::Vec,
16};
17
18use crate::{error::Error, AsXmlText, FromXmlText};
19
20#[cfg(feature = "base64")]
21use base64::engine::general_purpose::STANDARD as StandardBase64Engine;
22
23macro_rules! convert_via_fromstr_and_display {
24    ($($(#[cfg $cfg:tt])?$t:ty,)+) => {
25        $(
26            $(
27                #[cfg $cfg]
28            )?
29            impl FromXmlText for $t {
30                #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
31                fn from_xml_text(s: String) -> Result<Self, Error> {
32                    s.parse().map_err(Error::text_parse_error)
33                }
34            }
35
36            $(
37                #[cfg $cfg]
38            )?
39            impl AsXmlText for $t {
40                #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
41                fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
42                    Ok(Cow::Owned(self.to_string()))
43                }
44            }
45        )+
46    }
47}
48
49/// This provides an implementation compliant with xsd::bool.
50impl FromXmlText for bool {
51    /// Parse a boolean from XML text.
52    ///
53    /// The values `"1"` and `"true"` are considered true. The values `"0"`
54    /// and `"false"` are considered `false`. Any other value is invalid and
55    /// will return an error.
56    fn from_xml_text(s: String) -> Result<Self, Error> {
57        match s.as_str() {
58            "1" => "true",
59            "0" => "false",
60            other => other,
61        }
62        .parse()
63        .map_err(Error::text_parse_error)
64    }
65}
66
67/// This provides an implementation compliant with xsd::bool.
68impl AsXmlText for bool {
69    /// Convert a boolean to XML text.
70    ///
71    /// `true` is converted to `"true"` and `false` is converted to `"false"`.
72    /// This implementation never fails.
73    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
74        match self {
75            true => Ok(Cow::Borrowed("true")),
76            false => Ok(Cow::Borrowed("false")),
77        }
78    }
79}
80
81convert_via_fromstr_and_display! {
82    u8,
83    u16,
84    u32,
85    u64,
86    u128,
87    usize,
88    i8,
89    i16,
90    i32,
91    i64,
92    i128,
93    isize,
94    f32,
95    f64,
96    char,
97    core::net::IpAddr,
98    core::net::Ipv4Addr,
99    core::net::Ipv6Addr,
100    core::net::SocketAddr,
101    core::net::SocketAddrV4,
102    core::net::SocketAddrV6,
103    core::num::NonZeroU8,
104    core::num::NonZeroU16,
105    core::num::NonZeroU32,
106    core::num::NonZeroU64,
107    core::num::NonZeroU128,
108    core::num::NonZeroUsize,
109    core::num::NonZeroI8,
110    core::num::NonZeroI16,
111    core::num::NonZeroI32,
112    core::num::NonZeroI64,
113    core::num::NonZeroI128,
114    core::num::NonZeroIsize,
115
116    #[cfg(feature = "uuid")]
117    uuid::Uuid,
118
119    #[cfg(feature = "jid")]
120    jid::Jid,
121    #[cfg(feature = "jid")]
122    jid::FullJid,
123    #[cfg(feature = "jid")]
124    jid::BareJid,
125    #[cfg(feature = "jid")]
126    jid::NodePart,
127    #[cfg(feature = "jid")]
128    jid::DomainPart,
129    #[cfg(feature = "jid")]
130    jid::ResourcePart,
131}
132
133/// Represent a way to encode/decode text data into a Rust type.
134///
135/// This trait can be used in scenarios where implementing [`FromXmlText`]
136/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
137/// following:
138///
139/// 1. The type originates in a foreign crate, preventing the implementation
140///    of foreign traits.
141///
142/// 2. There is more than one way to convert a value to/from XML.
143///
144/// The codec to use for a text can be specified in the attributes understood
145/// by `FromXml` and `AsXml` derive macros. See the documentation of the
146/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
147pub trait TextCodec<T> {
148    /// Decode a string value into the type.
149    fn decode(&self, s: String) -> Result<T, Error>;
150
151    /// Encode the type as string value.
152    ///
153    /// If this returns `None`, the string value is not emitted at all.
154    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
155
156    /// Apply a filter to this codec.
157    ///
158    /// Filters preprocess strings before they are handed to the codec for
159    /// parsing, allowing to, for example, make the codec ignore irrelevant
160    /// content by stripping it.
161    // NOTE: The bound on T is needed because any given type A may implement
162    // TextCodec for any number of types. If we pass T down to the `Filtered`
163    // struct, rustc can do type inference on which `TextCodec`
164    // implementation the `filtered` method is supposed to have been called
165    // on.
166    fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
167    where
168        // placing the bound here (instead of on the `TextCodec<T>` trait
169        // itself) preserves object-safety of TextCodec<T>.
170        Self: Sized,
171    {
172        Filtered {
173            filter,
174            codec: self,
175            bound: PhantomData,
176        }
177    }
178}
179
180/// Wrapper struct to apply a filter to a codec.
181///
182/// You can construct a value of this type via [`TextCodec::filtered`].
183// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
184pub struct Filtered<F, C, T> {
185    filter: F,
186    codec: C,
187    bound: PhantomData<T>,
188}
189
190impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
191    fn decode(&self, s: String) -> Result<T, Error> {
192        let s = self.filter.preprocess(s);
193        self.codec.decode(s)
194    }
195
196    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
197        self.codec.encode(value)
198    }
199}
200
201/// Text codec which does no transform.
202pub struct Plain;
203
204impl TextCodec<String> for Plain {
205    fn decode(&self, s: String) -> Result<String, Error> {
206        Ok(s)
207    }
208
209    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
210        Ok(Some(Cow::Borrowed(value.as_str())))
211    }
212}
213
214/// Text codec which returns `None` if the input to decode is the empty string, instead of
215/// attempting to decode it.
216///
217/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
218/// `Option<_>` otherwise.
219pub struct EmptyAsNone;
220
221impl<T> TextCodec<Option<T>> for EmptyAsNone
222where
223    T: FromXmlText + AsXmlText,
224{
225    fn decode(&self, s: String) -> Result<Option<T>, Error> {
226        if s.is_empty() {
227            Ok(None)
228        } else {
229            Some(T::from_xml_text(s)).transpose()
230        }
231    }
232
233    fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
234        Ok(value
235            .as_ref()
236            .map(AsXmlText::as_xml_text)
237            .transpose()?
238            .and_then(|v| (!v.is_empty()).then_some(v)))
239    }
240}
241
242/// Text codec which returns None instead of the empty string.
243pub struct EmptyAsError;
244
245impl TextCodec<String> for EmptyAsError {
246    fn decode(&self, s: String) -> Result<String, Error> {
247        if s.is_empty() {
248            Err(Error::Other("Empty text node."))
249        } else {
250            Ok(s)
251        }
252    }
253
254    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
255        if value.is_empty() {
256            Err(Error::Other("Empty text node."))
257        } else {
258            Ok(Some(Cow::Borrowed(value.as_str())))
259        }
260    }
261}
262
263/// Trait for preprocessing text data from XML.
264///
265/// This may be used by codecs to allow to customize some of their behaviour.
266pub trait TextFilter {
267    /// Process the incoming string and return the result of the processing.
268    fn preprocess(&self, s: String) -> String;
269}
270
271/// Text preprocessor which returns the input unchanged.
272pub struct NoFilter;
273
274impl TextFilter for NoFilter {
275    fn preprocess(&self, s: String) -> String {
276        s
277    }
278}
279
280/// Text preprocessor to remove all whitespace.
281pub struct StripWhitespace;
282
283impl TextFilter for StripWhitespace {
284    fn preprocess(&self, s: String) -> String {
285        let s: String = s
286            .chars()
287            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
288            .collect();
289        s
290    }
291}
292
293/// Text codec transforming text to binary using standard `base64`.
294///
295/// `Base64` uses the [`base64::engine::general_purpose::STANDARD`] engine.
296/// [`TextCodec`] is also automatically implemented for any value which
297/// implements [`base64::engine::Engine`], allowing you to choose different
298/// alphabets easily.
299#[cfg(feature = "base64")]
300pub struct Base64;
301
302#[cfg(feature = "base64")]
303impl TextCodec<Vec<u8>> for Base64 {
304    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
305        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
306            .map_err(Error::text_parse_error)
307    }
308
309    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
310        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
311            &StandardBase64Engine,
312            &value,
313        ))))
314    }
315}
316
317#[cfg(feature = "base64")]
318impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
319    fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
320        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
321            .map_err(Error::text_parse_error)
322            .map(Cow::Owned)
323    }
324
325    fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
326        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
327            &StandardBase64Engine,
328            &value,
329        ))))
330    }
331}
332
333#[cfg(feature = "base64")]
334impl<T> TextCodec<Option<T>> for Base64
335where
336    Base64: TextCodec<T>,
337{
338    fn decode(&self, s: String) -> Result<Option<T>, Error> {
339        if s.is_empty() {
340            return Ok(None);
341        }
342        Ok(Some(self.decode(s)?))
343    }
344
345    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
346        decoded
347            .as_ref()
348            .map(|x| self.encode(x))
349            .transpose()
350            .map(Option::flatten)
351    }
352}
353
354#[cfg(feature = "base64")]
355impl<T: base64::engine::Engine> TextCodec<Vec<u8>> for T {
356    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
357        base64::engine::Engine::decode(self, s.as_bytes()).map_err(Error::text_parse_error)
358    }
359
360    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
361        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
362            self, &value,
363        ))))
364    }
365}
366
367#[cfg(feature = "base64")]
368impl<'a, T: base64::engine::Engine, U> TextCodec<Option<U>> for T
369where
370    T: TextCodec<U>,
371{
372    fn decode(&self, s: String) -> Result<Option<U>, Error> {
373        if s.is_empty() {
374            return Ok(None);
375        }
376        Ok(Some(TextCodec::decode(self, s)?))
377    }
378
379    fn encode<'x>(&self, decoded: &'x Option<U>) -> Result<Option<Cow<'x, str>>, Error> {
380        decoded
381            .as_ref()
382            .map(|x| TextCodec::encode(self, x))
383            .transpose()
384            .map(Option::flatten)
385    }
386}
387
388/// Text codec transforming text to binary using hexadecimal nibbles.
389///
390/// The length must be known at compile-time.
391pub struct FixedHex<const N: usize>;
392
393impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
394    fn decode(&self, s: String) -> Result<[u8; N], Error> {
395        if s.len() != 2 * N {
396            return Err(Error::Other("Invalid length"));
397        }
398
399        let mut bytes = [0u8; N];
400        for i in 0..N {
401            bytes[i] =
402                u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
403        }
404
405        Ok(bytes)
406    }
407
408    fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
409        let mut bytes = String::with_capacity(N * 2);
410        for byte in value {
411            bytes.extend(format!("{:02x}", byte).chars());
412        }
413        Ok(Some(Cow::Owned(bytes)))
414    }
415}
416
417impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
418where
419    FixedHex<N>: TextCodec<T>,
420{
421    fn decode(&self, s: String) -> Result<Option<T>, Error> {
422        if s.is_empty() {
423            return Ok(None);
424        }
425        Ok(Some(self.decode(s)?))
426    }
427
428    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
429        decoded
430            .as_ref()
431            .map(|x| self.encode(x))
432            .transpose()
433            .map(Option::flatten)
434    }
435}
436
437/// Text codec for colon-separated bytes of uppercase hexadecimal.
438pub struct ColonSeparatedHex;
439
440impl TextCodec<Vec<u8>> for ColonSeparatedHex {
441    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
442        assert_eq!((s.len() + 1) % 3, 0);
443        let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
444        for i in 0..(1 + s.len()) / 3 {
445            let byte =
446                u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
447            if 3 * i + 2 < s.len() {
448                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
449            }
450            bytes.push(byte);
451        }
452        Ok(bytes)
453    }
454
455    fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
456        // TODO: Super inefficient!
457        let mut bytes = Vec::with_capacity(decoded.len());
458        for byte in decoded {
459            bytes.push(format!("{:02X}", byte));
460        }
461        Ok(Some(Cow::Owned(bytes.join(":"))))
462    }
463}