xso_proc/field/
mod.rs

1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! Compound (struct or enum variant) field types
8
9use proc_macro2::{Span, TokenStream};
10use syn::{spanned::Spanned, *};
11
12use rxml_validation::NcName;
13
14use crate::compound::Compound;
15use crate::error_message::ParentRef;
16use crate::meta::{
17    AmountConstraint, AttributeKind, Flag, NameRef, NamespaceRef, QNameRef, XmlFieldMeta,
18};
19use crate::scope::{AsItemsScope, FromEventsScope};
20
21mod attribute;
22mod child;
23#[cfg(feature = "minidom")]
24mod element;
25mod flag;
26mod text;
27
28use self::attribute::{AttributeField, AttributeFieldKind};
29use self::child::{ChildField, ExtractDef};
30#[cfg(feature = "minidom")]
31use self::element::ElementField;
32use self::flag::FlagField;
33use self::text::TextField;
34
35/// Code slices necessary for declaring and initializing a temporary variable
36/// for parsing purposes.
37pub(crate) struct FieldTempInit {
38    /// The type of the temporary variable.
39    pub(crate) ty: Type,
40
41    /// The initializer for the temporary variable.
42    pub(crate) init: TokenStream,
43}
44
45/// Configure how a nested field builder selects child elements.
46pub(crate) enum NestedMatcher {
47    /// Matches a specific child element fallabily.
48    Selective(
49        /// Expression which evaluates to `Result<T, FromEventsError>`,
50        /// consuming `name: rxml::QName` and `attrs: rxml::AttrMap`.
51        ///
52        /// If the `name` and `attrs` allow starting to parse the child
53        /// element as a value of this field, `Ok(_)` must be returned. If
54        /// the `name` and `attrs` are those of an element which *could*
55        /// be a value of this field, but they have invalid contents,
56        /// `Err(FromEventsError::Invalid(_))` must be returned. Depending
57        /// on the field kind, it may also be acceptable to return the
58        /// `Invalid` variant if the data is valid, but no further child
59        /// element can be accepted into the value.
60        ///
61        /// Otherwise, the `name` and `attrs` must be returned *unchanged* in
62        /// a `FromEventsError::Mismatch { .. }` variant. In that case, the
63        /// implementation in `Compound` will let the next field attempt to
64        /// parse the child element.
65        ///
66        /// `T` must be the type specified in the
67        /// [`FieldBuilderPart::Nested::builder`]  field.
68        TokenStream,
69    ),
70
71    #[cfg_attr(not(feature = "minidom"), allow(dead_code))]
72    /// Matches any child element not matched by another matcher.
73    ///
74    /// Only a single field may use this variant, otherwise an error is
75    /// raised during execution of the proc macro.
76    Fallback(
77        /// Expression which evaluates to `T` (or `return`s an error),
78        /// consuming `name: rxml::QName` and `attrs: rxml::AttrMap`.
79        ///
80        /// Unlike the [`Selective`][`Self::Selective`] variant, this
81        /// expression must always evaluate to an instance of `T`. If that is
82        /// not possible, the expression must diverge, most commonly using
83        /// `return` with a `Err::<_, xso::error::Error>(_)`.
84        ///
85        /// `T` must be the type specified in the
86        /// [`FieldBuilderPart::Nested::builder`]  field.
87        TokenStream,
88    ),
89}
90
91/// Describe how a struct or enum variant's member is parsed from XML data.
92///
93/// This struct is returned from [`FieldDef::make_builder_part`] and
94/// contains code snippets and instructions for
95/// [`Compound::make_from_events_statemachine`][`crate::compound::Compound::make_from_events_statemachine`]
96/// to parse the field's data from XML.
97pub(crate) enum FieldBuilderPart {
98    /// Parse a field from the item's element's start event.
99    Init {
100        /// Expression and type which extracts the field's data from the
101        /// element's start event.
102        value: FieldTempInit,
103    },
104
105    /// Parse a field from text events.
106    Text {
107        /// Expression and type which initializes a buffer to use during
108        /// parsing.
109        value: FieldTempInit,
110
111        /// Statement which takes text and accumulates it into the temporary
112        /// value declared via `value`.
113        collect: TokenStream,
114
115        /// Expression which evaluates to the field's type, consuming the
116        /// temporary value.
117        finalize: TokenStream,
118    },
119
120    /// Parse a field from child element events.
121    Nested {
122        /// Additional definition items which need to be inserted at module
123        /// level for the rest of the implementation to work.
124        extra_defs: TokenStream,
125
126        /// Expression and type which initializes a buffer to use during
127        /// parsing.
128        value: FieldTempInit,
129
130        /// Configure child matching behaviour for this field. See
131        /// [`NestedMatcher`] for options.
132        matcher: NestedMatcher,
133
134        /// Type implementing `xso::FromEventsBuilder` which parses the child
135        /// element.
136        ///
137        /// This type is returned by the expressions in
138        /// [`matcher`][`Self::Nested::matcher`].
139        builder: Type,
140
141        /// Expression which consumes the value stored in the identifier
142        /// [`crate::common::FromEventsScope::substate_result`][`FromEventsScope::substate_result`]
143        /// and somehow collects it into the field declared with
144        /// [`value`][`Self::Nested::value`].
145        collect: TokenStream,
146
147        /// Expression which consumes the data from the field declared with
148        /// [`value`][`Self::Nested::value`] and converts it into the field's
149        /// type.
150        finalize: TokenStream,
151    },
152}
153
154/// Describe how a struct or enum variant's member is converted to XML data.
155///
156/// This struct is returned from [`FieldDef::make_iterator_part`] and
157/// contains code snippets and instructions for
158/// [`Compound::make_into_events_statemachine`][`crate::compound::Compound::make_into_events_statemachine`]
159/// to convert the field's data into XML.
160pub(crate) enum FieldIteratorPart {
161    /// The field is emitted as part of StartElement.
162    Header {
163        /// An expression which consumes the field's value and returns a
164        /// `Item`.
165        generator: TokenStream,
166    },
167
168    /// The field is emitted as text item.
169    Text {
170        /// An expression which consumes the field's value and returns a
171        /// String, which is then emitted as text data.
172        generator: TokenStream,
173    },
174
175    /// The field is emitted as series of items which form a child element.
176    Content {
177        /// Additional definition items which need to be inserted at module
178        /// level for the rest of the implementation to work.
179        extra_defs: TokenStream,
180
181        /// Expression and type which initializes the nested iterator.
182        ///
183        /// Note that this is evaluated at construction time of the iterator.
184        /// Fields of this variant do not get access to their original data,
185        /// unless they carry it in the contents of this `value`.
186        value: FieldTempInit,
187
188        /// An expression which uses the value (mutably) and evaluates to
189        /// a Result<Option<Item>, Error>. Once the state returns None, the
190        /// processing will advance to the next state.
191        generator: TokenStream,
192    },
193}
194
195trait Field {
196    /// Construct the builder pieces for this field.
197    ///
198    /// `container_name` must be a reference to the compound's type, so that
199    /// it can be used for error messages.
200    ///
201    /// `member` and `ty` refer to the field itself.
202    fn make_builder_part(
203        &self,
204        scope: &FromEventsScope,
205        container_name: &ParentRef,
206        member: &Member,
207        ty: &Type,
208    ) -> Result<FieldBuilderPart>;
209
210    /// Construct the iterator pieces for this field.
211    ///
212    /// `bound_name` must be the name to which the field's value is bound in
213    /// the iterator code.
214    ///
215    /// `member` and `ty` refer to the field itself.
216    ///
217    /// `bound_name` is the name under which the field's value is accessible
218    /// in the various parts of the code.
219    fn make_iterator_part(
220        &self,
221        scope: &AsItemsScope,
222        container_name: &ParentRef,
223        bound_name: &Ident,
224        member: &Member,
225        ty: &Type,
226    ) -> Result<FieldIteratorPart>;
227
228    /// Return true if and only if this field captures text content.
229    fn captures_text(&self) -> bool {
230        false
231    }
232
233    /// Return a QNameRef if the field captures an attribute.
234    fn captures_attribute(&self) -> Option<QNameRef> {
235        None
236    }
237}
238
239fn default_name(span: Span, name: Option<NameRef>, field_ident: Option<&Ident>) -> Result<NameRef> {
240    match name {
241        Some(v) => Ok(v),
242        None => match field_ident {
243            None => Err(Error::new(
244                span,
245                "name must be explicitly specified with the `name` key on unnamed fields",
246            )),
247            Some(field_ident) => match NcName::try_from(field_ident.to_string()) {
248                Ok(value) => Ok(NameRef::Literal {
249                    span: field_ident.span(),
250                    value,
251                }),
252                Err(e) => Err(Error::new(
253                    field_ident.span(),
254                    format!("invalid XML name: {}", e),
255                )),
256            },
257        },
258    }
259}
260
261/// Construct a new field implementation from the meta attributes.
262///
263/// `field_ident` is, for some field types, used to infer an XML name if
264/// it is not specified explicitly.
265///
266/// `field_ty` is needed for type inference on extracted fields.
267///
268/// `container_namespace` is used in some cases to insert a default
269/// namespace.
270fn new_field(
271    meta: XmlFieldMeta,
272    field_ident: Option<&Ident>,
273    field_ty: &Type,
274    container_namespace: &NamespaceRef,
275) -> Result<Box<dyn Field>> {
276    match meta {
277        XmlFieldMeta::Attribute {
278            span,
279            kind: AttributeKind::Generic(QNameRef { name, namespace }),
280            default_,
281            type_,
282            codec,
283        } => {
284            let xml_name = default_name(span, name, field_ident)?;
285
286            // This would've been taken via `XmlFieldMeta::take_type` if
287            // this field was within an extract where a `type_` is legal
288            // to have.
289            if let Some(type_) = type_ {
290                return Err(Error::new_spanned(
291                    type_,
292                    "specifying `type_` on fields inside structs and enum variants is redundant and not allowed."
293                ));
294            }
295
296            Ok(Box::new(AttributeField {
297                kind: AttributeFieldKind::Generic {
298                    xml_name,
299                    xml_namespace: namespace,
300                },
301                default_,
302                codec,
303            }))
304        }
305
306        XmlFieldMeta::Attribute {
307            span: _,
308            kind: AttributeKind::XmlLang,
309            default_,
310            type_,
311            codec,
312        } => {
313            // This would've been taken via `XmlFieldMeta::take_type` if
314            // this field was within an extract where a `type_` is legal
315            // to have.
316            if let Some(type_) = type_ {
317                return Err(Error::new_spanned(
318                    type_,
319                    "specifying `type_` on fields inside structs and enum variants is redundant and not allowed."
320                ));
321            }
322
323            Ok(Box::new(AttributeField {
324                kind: AttributeFieldKind::XmlLang,
325                default_,
326                codec,
327            }))
328        }
329
330        XmlFieldMeta::Text {
331            span: _,
332            codec,
333            type_,
334        } => {
335            // This would've been taken via `XmlFieldMeta::take_type` if
336            // this field was within an extract where a `type_` is legal
337            // to have.
338            if let Some(type_) = type_ {
339                return Err(Error::new_spanned(
340                    type_,
341                    "specifying `type_` on fields inside structs and enum variants is redundant and not allowed."
342                ));
343            }
344
345            Ok(Box::new(TextField { codec }))
346        }
347
348        XmlFieldMeta::Child {
349            span: _,
350            default_,
351            amount,
352        } => {
353            if let Some(AmountConstraint::Any(ref amount_span)) = amount {
354                if let Flag::Present(ref flag_span) = default_ {
355                    let mut err =
356                        Error::new(*flag_span, "`default` has no meaning for child collections");
357                    err.combine(Error::new(
358                        *amount_span,
359                        "the field is treated as a collection because of this `n` value",
360                    ));
361                    return Err(err);
362                }
363            }
364
365            Ok(Box::new(ChildField {
366                default_,
367                amount: amount.unwrap_or(AmountConstraint::FixedSingle(Span::call_site())),
368                extract: None,
369            }))
370        }
371
372        XmlFieldMeta::Extract {
373            span,
374            default_,
375            qname: QNameRef { namespace, name },
376            amount,
377            fields,
378            on_unknown_attribute,
379            on_unknown_child,
380        } => {
381            let xml_namespace = namespace.unwrap_or_else(|| container_namespace.clone());
382            let xml_name = default_name(span, name, field_ident)?;
383
384            let amount = amount.unwrap_or(AmountConstraint::FixedSingle(Span::call_site()));
385            match amount {
386                AmountConstraint::Any(ref amount) => {
387                    if let Flag::Present(default_) = default_ {
388                        let mut err = Error::new(
389                            default_,
390                            "default cannot be set when collecting into a collection",
391                        );
392                        err.combine(Error::new(
393                            *amount,
394                            "`n` was set to a non-1 value here, which enables collection logic",
395                        ));
396                        return Err(err);
397                    }
398                }
399                AmountConstraint::FixedSingle(_) => (),
400            }
401
402            let mut field_defs = Vec::new();
403            let allow_inference =
404                matches!(amount, AmountConstraint::FixedSingle(_)) && fields.len() == 1;
405            for (i, mut field) in fields.into_iter().enumerate() {
406                let field_ty = match field.take_type() {
407                    Some(v) => v,
408                    None => {
409                        if allow_inference {
410                            field_ty.clone()
411                        } else {
412                            return Err(Error::new(
413                            field.span(),
414                            "extracted field must specify a type explicitly when extracting into a collection or when extracting more than one field."
415                        ));
416                        }
417                    }
418                };
419
420                field_defs.push(FieldDef::from_extract(
421                    field,
422                    i as u32,
423                    &field_ty,
424                    &xml_namespace,
425                ));
426            }
427            let parts = Compound::from_field_defs(
428                field_defs,
429                on_unknown_attribute,
430                on_unknown_child,
431                vec![],
432            )?;
433
434            Ok(Box::new(ChildField {
435                default_,
436                amount,
437                extract: Some(ExtractDef {
438                    xml_namespace,
439                    xml_name,
440                    parts,
441                }),
442            }))
443        }
444
445        #[cfg(feature = "minidom")]
446        XmlFieldMeta::Element {
447            span,
448            default_,
449            amount,
450        } => Ok(Box::new(ElementField {
451            default_,
452            amount: amount.unwrap_or(AmountConstraint::FixedSingle(span)),
453        })),
454
455        #[cfg(not(feature = "minidom"))]
456        XmlFieldMeta::Element {
457            span,
458            amount,
459            default_,
460        } => {
461            let _ = amount;
462            let _ = default_;
463            Err(Error::new(
464                span,
465                "#[xml(element)] requires xso to be built with the \"minidom\" feature.",
466            ))
467        }
468
469        XmlFieldMeta::Flag {
470            span,
471            qname: QNameRef { namespace, name },
472        } => {
473            let xml_namespace = namespace.unwrap_or_else(|| container_namespace.clone());
474            let xml_name = default_name(span, name, field_ident)?;
475            Ok(Box::new(FlagField {
476                xml_namespace,
477                xml_name,
478            }))
479        }
480    }
481}
482
483/// Definition of a single field in a compound.
484///
485/// See [`Compound`][`crate::compound::Compound`] for more information on
486/// compounds in general.
487pub(crate) struct FieldDef {
488    /// A span which refers to the field's definition.
489    span: Span,
490
491    /// The member identifying the field.
492    member: Member,
493
494    /// The type of the field.
495    ty: Type,
496
497    /// The way the field is mapped to XML.
498    inner: Box<dyn Field>,
499}
500
501impl FieldDef {
502    /// Create a new field definition from its declaration.
503    ///
504    /// The `index` must be the zero-based index of the field even for named
505    /// fields.
506    pub(crate) fn from_field(
507        field: &syn::Field,
508        index: u32,
509        container_namespace: &NamespaceRef,
510    ) -> Result<Self> {
511        let (member, ident) = match field.ident.as_ref() {
512            Some(v) => (Member::Named(v.clone()), Some(v)),
513            None => (
514                Member::Unnamed(Index {
515                    index,
516                    // We use the type's span here, because `field.span()`
517                    // will visually point at the `#[xml(..)]` meta, which is
518                    // not helpful when glancing at error messages referring
519                    // to the field itself.
520                    span: field.ty.span(),
521                }),
522                None,
523            ),
524        };
525        // This will either be the field's identifier's span (for named
526        // fields) or the field's type (for unnamed fields), which should give
527        // the user a good visual feedback about which field an error message
528        // is.
529        let field_span = member.span();
530        let meta = XmlFieldMeta::parse_from_attributes(&field.attrs, &field_span)?;
531        let ty = field.ty.clone();
532
533        Ok(Self {
534            span: field_span,
535            inner: new_field(meta, ident, &ty, container_namespace)?,
536            member,
537            ty,
538        })
539    }
540
541    /// Create a new field definition from its declaration.
542    ///
543    /// The `index` must be the zero-based index of the field even for named
544    /// fields.
545    pub(crate) fn from_extract(
546        meta: XmlFieldMeta,
547        index: u32,
548        ty: &Type,
549        container_namespace: &NamespaceRef,
550    ) -> Result<Self> {
551        let span = meta.span();
552        Ok(Self {
553            span,
554            member: Member::Unnamed(Index { index, span }),
555            ty: ty.clone(),
556            inner: new_field(meta, None, ty, container_namespace)?,
557        })
558    }
559
560    /// Access the [`syn::Member`] identifying this field in the original
561    /// type.
562    pub(crate) fn member(&self) -> &Member {
563        &self.member
564    }
565
566    /// Access the field's type.
567    pub(crate) fn ty(&self) -> &Type {
568        &self.ty
569    }
570
571    /// Construct the builder pieces for this field.
572    ///
573    /// `container_name` must be a reference to the compound's type, so that
574    /// it can be used for error messages.
575    pub(crate) fn make_builder_part(
576        &self,
577        scope: &FromEventsScope,
578        container_name: &ParentRef,
579    ) -> Result<FieldBuilderPart> {
580        self.inner
581            .make_builder_part(scope, container_name, &self.member, &self.ty)
582    }
583
584    /// Construct the iterator pieces for this field.
585    ///
586    /// `bound_name` must be the name to which the field's value is bound in
587    /// the iterator code.
588    pub(crate) fn make_iterator_part(
589        &self,
590        scope: &AsItemsScope,
591        container_name: &ParentRef,
592        bound_name: &Ident,
593    ) -> Result<FieldIteratorPart> {
594        self.inner
595            .make_iterator_part(scope, container_name, bound_name, &self.member, &self.ty)
596    }
597
598    /// Return true if this field's parsing consumes text data.
599    pub(crate) fn is_text_field(&self) -> bool {
600        self.inner.captures_text()
601    }
602
603    /// Return a QNameRef if the field captures an attribute.
604    pub(crate) fn captures_attribute(&self) -> Option<QNameRef> {
605        self.inner.captures_attribute()
606    }
607
608    /// Return a span which points at the field's definition.
609    pub(crate) fn span(&self) -> Span {
610        self.span
611    }
612}