xmpp_parsers/
xhtml.rs

1// Copyright (c) 2019 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7use crate::message::MessagePayload;
8use crate::ns;
9use alloc::collections::BTreeMap;
10use minidom::{Element, Node};
11use xso::exports::rxml;
12use xso::{
13    error::{Error, FromElementError},
14    exports::rxml::Namespace,
15};
16
17// TODO: Use a proper lang type.
18type Lang = String;
19
20/// Container for formatted text.
21#[derive(Debug, Clone)]
22pub struct XhtmlIm {
23    /// Map of language to body element.
24    bodies: BTreeMap<Lang, Body>,
25}
26
27impl XhtmlIm {
28    /// Serialise formatted text to HTML.
29    pub fn into_html(self) -> String {
30        let mut html = Vec::new();
31        // TODO: use the best language instead.
32        // XXX: Remove this flag later when fixing the code below
33        #[allow(clippy::never_loop)]
34        for (lang, body) in self.bodies {
35            if lang.is_empty() {
36                assert!(body.xml_lang.is_none());
37            } else {
38                assert_eq!(Some(lang), body.xml_lang);
39            }
40            for tag in body.children {
41                html.push(tag.into_html());
42            }
43            break;
44        }
45        html.concat()
46    }
47
48    /// Removes all unknown elements.
49    fn flatten(self) -> XhtmlIm {
50        let mut bodies = BTreeMap::new();
51        for (lang, body) in self.bodies {
52            let children = body.children.into_iter().fold(vec![], |mut acc, child| {
53                match child {
54                    Child::Tag(Tag::Unknown(children)) => acc.extend(children),
55                    any => acc.push(any),
56                }
57                acc
58            });
59            let body = Body { children, ..body };
60            bodies.insert(lang, body);
61        }
62        XhtmlIm { bodies }
63    }
64}
65
66impl MessagePayload for XhtmlIm {}
67
68impl TryFrom<Element> for XhtmlIm {
69    type Error = FromElementError;
70
71    fn try_from(elem: Element) -> Result<XhtmlIm, FromElementError> {
72        check_self!(elem, "html", XHTML_IM);
73        check_no_attributes!(elem, "html");
74
75        let mut bodies = BTreeMap::new();
76        for child in elem.children() {
77            if child.is("body", ns::XHTML) {
78                let child = child.clone();
79                let lang = child
80                    .attr_ns(rxml::Namespace::xml(), "lang")
81                    .unwrap_or("")
82                    .to_string();
83                let body = Body::try_from(child)?;
84                match bodies.insert(lang, body) {
85                    None => (),
86                    Some(_) => {
87                        return Err(Error::Other(
88                            "Two identical language bodies found in XHTML-IM.",
89                        )
90                        .into())
91                    }
92                }
93            } else {
94                return Err(Error::Other("Unknown element in XHTML-IM.").into());
95            }
96        }
97
98        Ok(XhtmlIm { bodies }.flatten())
99    }
100}
101
102impl From<XhtmlIm> for Element {
103    fn from(wrapper: XhtmlIm) -> Element {
104        Element::builder("html", ns::XHTML_IM)
105            .append_all(wrapper.bodies.into_iter().map(|(lang, body)| {
106                if lang.is_empty() {
107                    assert!(body.xml_lang.is_none());
108                } else {
109                    assert_eq!(Some(lang), body.xml_lang);
110                }
111                Element::from(body)
112            }))
113            .build()
114    }
115}
116
117#[derive(Debug, Clone)]
118enum Child {
119    Tag(Tag),
120    Text(String),
121}
122
123impl Child {
124    fn into_html(self) -> String {
125        match self {
126            Child::Tag(tag) => tag.into_html(),
127            Child::Text(text) => text,
128        }
129    }
130}
131
132#[derive(Debug, Clone)]
133struct Property {
134    key: String,
135    value: String,
136}
137
138type Css = Vec<Property>;
139
140fn get_style_string(style: Css) -> Option<String> {
141    let mut result = vec![];
142    for Property { key, value } in style {
143        result.push(format!("{}: {}", key, value));
144    }
145    if result.is_empty() {
146        return None;
147    }
148    Some(result.join("; "))
149}
150
151#[derive(Debug, Clone)]
152struct Body {
153    style: Css,
154    xml_lang: Option<String>,
155    children: Vec<Child>,
156}
157
158impl TryFrom<Element> for Body {
159    type Error = Error;
160
161    fn try_from(elem: Element) -> Result<Body, Error> {
162        let mut children = vec![];
163        for child in elem.nodes() {
164            match child {
165                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
166                Node::Text(text) => children.push(Child::Text(text.clone())),
167            }
168        }
169
170        Ok(Body {
171            style: parse_css(elem.attr("style")),
172            xml_lang: elem.attr_ns("xml", "lang").map(ToString::to_string),
173            children,
174        })
175    }
176}
177
178impl From<Body> for Element {
179    fn from(body: Body) -> Element {
180        Element::builder("body", ns::XHTML)
181            .attr(
182                rxml::xml_ncname!("style").into(),
183                get_style_string(body.style),
184            )
185            .attr_ns(
186                Into::<Namespace>::into(String::from("xml")),
187                rxml::xml_ncname!("lang").into(),
188                body.xml_lang,
189            )
190            .append_all(children_to_nodes(body.children))
191            .build()
192    }
193}
194
195#[derive(Debug, Clone)]
196enum Tag {
197    A {
198        href: Option<String>,
199        style: Css,
200        type_: Option<String>,
201        children: Vec<Child>,
202    },
203    Blockquote {
204        style: Css,
205        children: Vec<Child>,
206    },
207    Br,
208    Cite {
209        style: Css,
210        children: Vec<Child>,
211    },
212    Em {
213        children: Vec<Child>,
214    },
215    Img {
216        src: Option<String>,
217        alt: Option<String>,
218    }, // TODO: height, width, style
219    Li {
220        style: Css,
221        children: Vec<Child>,
222    },
223    Ol {
224        style: Css,
225        children: Vec<Child>,
226    },
227    P {
228        style: Css,
229        children: Vec<Child>,
230    },
231    Span {
232        style: Css,
233        children: Vec<Child>,
234    },
235    Strong {
236        children: Vec<Child>,
237    },
238    Ul {
239        style: Css,
240        children: Vec<Child>,
241    },
242    Unknown(Vec<Child>),
243}
244
245impl Tag {
246    fn into_html(self) -> String {
247        match self {
248            Tag::A {
249                href,
250                style,
251                type_,
252                children,
253            } => {
254                let href = write_attr(href, "href");
255                let style = write_attr(get_style_string(style), "style");
256                let type_ = write_attr(type_, "type");
257                format!(
258                    "<a{}{}{}>{}</a>",
259                    href,
260                    style,
261                    type_,
262                    children_to_html(children)
263                )
264            }
265            Tag::Blockquote { style, children } => {
266                let style = write_attr(get_style_string(style), "style");
267                format!(
268                    "<blockquote{}>{}</blockquote>",
269                    style,
270                    children_to_html(children)
271                )
272            }
273            Tag::Br => String::from("<br>"),
274            Tag::Cite { style, children } => {
275                let style = write_attr(get_style_string(style), "style");
276                format!("<cite{}>{}</cite>", style, children_to_html(children))
277            }
278            Tag::Em { children } => format!("<em>{}</em>", children_to_html(children)),
279            Tag::Img { src, alt } => {
280                let src = write_attr(src, "src");
281                let alt = write_attr(alt, "alt");
282                format!("<img{}{}>", src, alt)
283            }
284            Tag::Li { style, children } => {
285                let style = write_attr(get_style_string(style), "style");
286                format!("<li{}>{}</li>", style, children_to_html(children))
287            }
288            Tag::Ol { style, children } => {
289                let style = write_attr(get_style_string(style), "style");
290                format!("<ol{}>{}</ol>", style, children_to_html(children))
291            }
292            Tag::P { style, children } => {
293                let style = write_attr(get_style_string(style), "style");
294                format!("<p{}>{}</p>", style, children_to_html(children))
295            }
296            Tag::Span { style, children } => {
297                let style = write_attr(get_style_string(style), "style");
298                format!("<span{}>{}</span>", style, children_to_html(children))
299            }
300            Tag::Strong { children } => format!("<strong>{}</strong>", children_to_html(children)),
301            Tag::Ul { style, children } => {
302                let style = write_attr(get_style_string(style), "style");
303                format!("<ul{}>{}</ul>", style, children_to_html(children))
304            }
305            Tag::Unknown(_) => {
306                panic!("No unknown element should be present in XHTML-IM after parsing.")
307            }
308        }
309    }
310}
311
312impl TryFrom<Element> for Tag {
313    type Error = Error;
314
315    fn try_from(elem: Element) -> Result<Tag, Error> {
316        let mut children = vec![];
317        for child in elem.nodes() {
318            match child {
319                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
320                Node::Text(text) => children.push(Child::Text(text.clone())),
321            }
322        }
323
324        Ok(match elem.name() {
325            "a" => Tag::A {
326                href: elem.attr("href").map(ToString::to_string),
327                style: parse_css(elem.attr(rxml::xml_ncname!("style"))),
328                type_: elem.attr("type").map(ToString::to_string),
329                children,
330            },
331            "blockquote" => Tag::Blockquote {
332                style: parse_css(elem.attr("style")),
333                children,
334            },
335            "br" => Tag::Br,
336            "cite" => Tag::Cite {
337                style: parse_css(elem.attr("style")),
338                children,
339            },
340            "em" => Tag::Em { children },
341            "img" => Tag::Img {
342                src: elem.attr("src").map(ToString::to_string),
343                alt: elem.attr("alt").map(ToString::to_string),
344            },
345            "li" => Tag::Li {
346                style: parse_css(elem.attr("style")),
347                children,
348            },
349            "ol" => Tag::Ol {
350                style: parse_css(elem.attr("style")),
351                children,
352            },
353            "p" => Tag::P {
354                style: parse_css(elem.attr("style")),
355                children,
356            },
357            "span" => Tag::Span {
358                style: parse_css(elem.attr("style")),
359                children,
360            },
361            "strong" => Tag::Strong { children },
362            "ul" => Tag::Ul {
363                style: parse_css(elem.attr("style")),
364                children,
365            },
366            _ => Tag::Unknown(children),
367        })
368    }
369}
370
371impl From<Tag> for Element {
372    fn from(tag: Tag) -> Element {
373        let (name, attrs, children) = match tag {
374            Tag::A {
375                href,
376                style,
377                type_,
378                children,
379            } => (
380                "a",
381                {
382                    let mut attrs = vec![];
383                    if let Some(href) = href {
384                        attrs.push((rxml::xml_ncname!("href"), href));
385                    }
386                    if let Some(style) = get_style_string(style) {
387                        attrs.push((rxml::xml_ncname!("style"), style));
388                    }
389                    if let Some(type_) = type_ {
390                        attrs.push((rxml::xml_ncname!("type"), type_));
391                    }
392                    attrs
393                },
394                children,
395            ),
396            Tag::Blockquote { style, children } => (
397                "blockquote",
398                match get_style_string(style) {
399                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
400                    None => vec![],
401                },
402                children,
403            ),
404            Tag::Br => ("br", vec![], vec![]),
405            Tag::Cite { style, children } => (
406                "cite",
407                match get_style_string(style) {
408                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
409                    None => vec![],
410                },
411                children,
412            ),
413            Tag::Em { children } => ("em", vec![], children),
414            Tag::Img { src, alt } => {
415                let mut attrs = vec![];
416                if let Some(src) = src {
417                    attrs.push((rxml::xml_ncname!("src"), src));
418                }
419                if let Some(alt) = alt {
420                    attrs.push((rxml::xml_ncname!("alt"), alt));
421                }
422                ("img", attrs, vec![])
423            }
424            Tag::Li { style, children } => (
425                "li",
426                match get_style_string(style) {
427                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
428                    None => vec![],
429                },
430                children,
431            ),
432            Tag::Ol { style, children } => (
433                "ol",
434                match get_style_string(style) {
435                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
436                    None => vec![],
437                },
438                children,
439            ),
440            Tag::P { style, children } => (
441                "p",
442                match get_style_string(style) {
443                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
444                    None => vec![],
445                },
446                children,
447            ),
448            Tag::Span { style, children } => (
449                "span",
450                match get_style_string(style) {
451                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
452                    None => vec![],
453                },
454                children,
455            ),
456            Tag::Strong { children } => ("strong", vec![], children),
457            Tag::Ul { style, children } => (
458                "ul",
459                match get_style_string(style) {
460                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
461                    None => vec![],
462                },
463                children,
464            ),
465            Tag::Unknown(_) => {
466                panic!("No unknown element should be present in XHTML-IM after parsing.")
467            }
468        };
469        let mut builder = Element::builder(name, ns::XHTML).append_all(children_to_nodes(children));
470        for (key, value) in attrs {
471            builder = builder.attr(key.into(), value);
472        }
473        builder.build()
474    }
475}
476
477fn children_to_nodes(children: Vec<Child>) -> impl IntoIterator<Item = Node> {
478    children.into_iter().map(|child| match child {
479        Child::Tag(tag) => Node::Element(Element::from(tag)),
480        Child::Text(text) => Node::Text(text),
481    })
482}
483
484fn children_to_html(children: Vec<Child>) -> String {
485    children
486        .into_iter()
487        .map(|child| child.into_html())
488        .collect::<Vec<_>>()
489        .concat()
490}
491
492fn write_attr(attr: Option<String>, name: &str) -> String {
493    match attr {
494        Some(attr) => format!(" {}='{}'", name, attr),
495        None => String::new(),
496    }
497}
498
499fn parse_css(style: Option<&str>) -> Css {
500    let mut properties = vec![];
501    if let Some(style) = style {
502        // TODO: make that parser a bit more resilient to things.
503        for declaration in style.split(';') {
504            let (key, value) = declaration.split_once(':').unwrap();
505            properties.push(Property {
506                key: key.to_string(),
507                value: value.to_string(),
508            });
509        }
510    }
511    properties
512}
513
514#[cfg(test)]
515mod tests {
516    use super::*;
517
518    #[cfg(target_pointer_width = "32")]
519    #[test]
520    fn test_size() {
521        assert_size!(XhtmlIm, 12);
522        assert_size!(Child, 48);
523        assert_size!(Tag, 48);
524    }
525
526    #[cfg(target_pointer_width = "64")]
527    #[test]
528    fn test_size() {
529        assert_size!(XhtmlIm, 24);
530        assert_size!(Child, 96);
531        assert_size!(Tag, 96);
532    }
533
534    #[test]
535    fn test_empty() {
536        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'/>"
537            .parse()
538            .unwrap();
539        let xhtml = XhtmlIm::try_from(elem).unwrap();
540        assert_eq!(xhtml.bodies.len(), 0);
541
542        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'/></html>"
543            .parse()
544            .unwrap();
545        let xhtml = XhtmlIm::try_from(elem).unwrap();
546        assert_eq!(xhtml.bodies.len(), 1);
547
548        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body xml:lang='fr'/><html:body xml:lang='en'/></html>"
549            .parse()
550            .unwrap();
551        let xhtml = XhtmlIm::try_from(elem).unwrap();
552        assert_eq!(xhtml.bodies.len(), 2);
553    }
554
555    #[test]
556    fn invalid_two_same_langs() {
557        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body/><html:body/></html>"
558            .parse()
559            .unwrap();
560        let error = XhtmlIm::try_from(elem).unwrap_err();
561        let message = match error {
562            FromElementError::Invalid(Error::Other(string)) => string,
563            _ => panic!(),
564        };
565        assert_eq!(message, "Two identical language bodies found in XHTML-IM.");
566    }
567
568    #[test]
569    fn test_tag() {
570        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'/>"
571            .parse()
572            .unwrap();
573        let body = Body::try_from(elem).unwrap();
574        assert_eq!(body.children.len(), 0);
575
576        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body>"
577            .parse()
578            .unwrap();
579        let mut body = Body::try_from(elem).unwrap();
580        assert_eq!(body.style.len(), 0);
581        assert_eq!(body.xml_lang, None);
582        assert_eq!(body.children.len(), 1);
583        let p = match body.children.pop() {
584            Some(Child::Tag(tag)) => tag,
585            _ => panic!(),
586        };
587        let mut children = match p {
588            Tag::P { style, children } => {
589                assert_eq!(style.len(), 0);
590                assert_eq!(children.len(), 1);
591                children
592            }
593            _ => panic!(),
594        };
595        let text = match children.pop() {
596            Some(Child::Text(text)) => text,
597            _ => panic!(),
598        };
599        assert_eq!(text, "Hello world!");
600    }
601
602    #[test]
603    fn test_unknown_element() {
604        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><coucou>Hello world!</coucou></body></html>"
605            .parse()
606            .unwrap();
607        let parsed = XhtmlIm::try_from(elem).unwrap();
608        let parsed2 = parsed.clone();
609        let html = parsed.into_html();
610        assert_eq!(html, "Hello world!");
611
612        let elem = Element::from(parsed2);
613        assert_eq!(String::from(&elem), "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'>Hello world!</body></html>");
614    }
615
616    #[test]
617    fn test_generate_html() {
618        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body></html>"
619            .parse()
620            .unwrap();
621        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
622        let html = xhtml_im.into_html();
623        assert_eq!(html, "<p>Hello world!</p>");
624
625        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello <strong>world</strong>!</p></body></html>"
626            .parse()
627            .unwrap();
628        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
629        let html = xhtml_im.into_html();
630        assert_eq!(html, "<p>Hello <strong>world</strong>!</p>");
631    }
632
633    #[test]
634    fn generate_tree() {
635        let world = "world".to_string();
636
637        Body {
638            style: vec![],
639            xml_lang: Some("en".to_string()),
640            children: vec![Child::Tag(Tag::P {
641                style: vec![],
642                children: vec![
643                    Child::Text("Hello ".to_string()),
644                    Child::Tag(Tag::Strong {
645                        children: vec![Child::Text(world)],
646                    }),
647                    Child::Text("!".to_string()),
648                ],
649            })],
650        };
651    }
652}