xmpp_parsers/
xhtml.rs

1// Copyright (c) 2019 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7use crate::message::MessagePayload;
8use crate::ns;
9use alloc::collections::BTreeMap;
10use minidom::{Element, Node};
11use xso::exports::rxml;
12use xso::{
13    error::{Error, FromElementError},
14    exports::rxml::Namespace,
15};
16
17// TODO: Use a proper lang type.
18type Lang = String;
19
20/// Container for formatted text.
21#[derive(Debug, Clone)]
22pub struct XhtmlIm {
23    /// Map of language to body element.
24    bodies: BTreeMap<Lang, Body>,
25}
26
27impl XhtmlIm {
28    /// Serialise formatted text to HTML.
29    pub fn into_html(self) -> String {
30        let mut html = Vec::new();
31        // TODO: use the best language instead.
32        // XXX: Remove this flag later when fixing the code below
33        #[allow(clippy::never_loop)]
34        for (lang, body) in self.bodies {
35            if lang.is_empty() {
36                assert!(body.xml_lang.is_none());
37            } else {
38                assert_eq!(Some(lang), body.xml_lang);
39            }
40            for tag in body.children {
41                html.push(tag.into_html());
42            }
43            break;
44        }
45        html.concat()
46    }
47
48    /// Removes all unknown elements.
49    fn flatten(self) -> XhtmlIm {
50        let mut bodies = BTreeMap::new();
51        for (lang, body) in self.bodies {
52            let children = body.children.into_iter().fold(vec![], |mut acc, child| {
53                match child {
54                    Child::Tag(Tag::Unknown(children)) => acc.extend(children),
55                    any => acc.push(any),
56                }
57                acc
58            });
59            let body = Body { children, ..body };
60            bodies.insert(lang, body);
61        }
62        XhtmlIm { bodies }
63    }
64}
65
66impl MessagePayload for XhtmlIm {}
67
68impl TryFrom<Element> for XhtmlIm {
69    type Error = FromElementError;
70
71    fn try_from(elem: Element) -> Result<XhtmlIm, FromElementError> {
72        check_self!(elem, "html", XHTML_IM);
73        check_no_attributes!(elem, "html");
74
75        let mut bodies = BTreeMap::new();
76        for child in elem.children() {
77            if child.is("body", ns::XHTML) {
78                let child = child.clone();
79                let lang = child
80                    .attr_ns(rxml::Namespace::xml(), "lang")
81                    .unwrap_or("")
82                    .to_string();
83                let body = Body::try_from(child)?;
84                match bodies.insert(lang, body) {
85                    None => (),
86                    Some(_) => {
87                        return Err(Error::Other(
88                            "Two identical language bodies found in XHTML-IM.",
89                        )
90                        .into())
91                    }
92                }
93            } else {
94                return Err(Error::Other("Unknown element in XHTML-IM.").into());
95            }
96        }
97
98        Ok(XhtmlIm { bodies }.flatten())
99    }
100}
101
102impl From<XhtmlIm> for Element {
103    fn from(wrapper: XhtmlIm) -> Element {
104        Element::builder("html", ns::XHTML_IM)
105            .append_all(wrapper.bodies.into_iter().map(|(lang, body)| {
106                if lang.is_empty() {
107                    assert!(body.xml_lang.is_none());
108                } else {
109                    assert_eq!(Some(lang), body.xml_lang);
110                }
111                Element::from(body)
112            }))
113            .build()
114    }
115}
116
117#[derive(Debug, Clone)]
118enum Child {
119    Tag(Tag),
120    Text(String),
121}
122
123impl Child {
124    fn into_html(self) -> String {
125        match self {
126            Child::Tag(tag) => tag.into_html(),
127            Child::Text(text) => text,
128        }
129    }
130}
131
132#[derive(Debug, Clone)]
133struct Property {
134    key: String,
135    value: String,
136}
137
138type Css = Vec<Property>;
139
140fn get_style_string(style: Css) -> Option<String> {
141    let mut result = vec![];
142    for Property { key, value } in style {
143        result.push(format!("{}: {}", key, value));
144    }
145    if result.is_empty() {
146        return None;
147    }
148    Some(result.join("; "))
149}
150
151#[derive(Debug, Clone)]
152struct Body {
153    style: Css,
154    xml_lang: Option<String>,
155    children: Vec<Child>,
156}
157
158impl TryFrom<Element> for Body {
159    type Error = Error;
160
161    fn try_from(elem: Element) -> Result<Body, Error> {
162        let mut children = vec![];
163        for child in elem.nodes() {
164            match child {
165                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
166                Node::Text(text) => children.push(Child::Text(text.clone())),
167            }
168        }
169
170        Ok(Body {
171            style: parse_css(elem.attr("style")),
172            xml_lang: elem
173                .attr_ns("xml", "lang")
174                .map(|xml_lang| xml_lang.to_string()),
175            children,
176        })
177    }
178}
179
180impl From<Body> for Element {
181    fn from(body: Body) -> Element {
182        Element::builder("body", ns::XHTML)
183            .attr(
184                rxml::xml_ncname!("style").into(),
185                get_style_string(body.style),
186            )
187            .attr_ns(
188                Into::<Namespace>::into(String::from("xml")),
189                rxml::xml_ncname!("lang").into(),
190                body.xml_lang,
191            )
192            .append_all(children_to_nodes(body.children))
193            .build()
194    }
195}
196
197#[derive(Debug, Clone)]
198enum Tag {
199    A {
200        href: Option<String>,
201        style: Css,
202        type_: Option<String>,
203        children: Vec<Child>,
204    },
205    Blockquote {
206        style: Css,
207        children: Vec<Child>,
208    },
209    Br,
210    Cite {
211        style: Css,
212        children: Vec<Child>,
213    },
214    Em {
215        children: Vec<Child>,
216    },
217    Img {
218        src: Option<String>,
219        alt: Option<String>,
220    }, // TODO: height, width, style
221    Li {
222        style: Css,
223        children: Vec<Child>,
224    },
225    Ol {
226        style: Css,
227        children: Vec<Child>,
228    },
229    P {
230        style: Css,
231        children: Vec<Child>,
232    },
233    Span {
234        style: Css,
235        children: Vec<Child>,
236    },
237    Strong {
238        children: Vec<Child>,
239    },
240    Ul {
241        style: Css,
242        children: Vec<Child>,
243    },
244    Unknown(Vec<Child>),
245}
246
247impl Tag {
248    fn into_html(self) -> String {
249        match self {
250            Tag::A {
251                href,
252                style,
253                type_,
254                children,
255            } => {
256                let href = write_attr(href, "href");
257                let style = write_attr(get_style_string(style), "style");
258                let type_ = write_attr(type_, "type");
259                format!(
260                    "<a{}{}{}>{}</a>",
261                    href,
262                    style,
263                    type_,
264                    children_to_html(children)
265                )
266            }
267            Tag::Blockquote { style, children } => {
268                let style = write_attr(get_style_string(style), "style");
269                format!(
270                    "<blockquote{}>{}</blockquote>",
271                    style,
272                    children_to_html(children)
273                )
274            }
275            Tag::Br => String::from("<br>"),
276            Tag::Cite { style, children } => {
277                let style = write_attr(get_style_string(style), "style");
278                format!("<cite{}>{}</cite>", style, children_to_html(children))
279            }
280            Tag::Em { children } => format!("<em>{}</em>", children_to_html(children)),
281            Tag::Img { src, alt } => {
282                let src = write_attr(src, "src");
283                let alt = write_attr(alt, "alt");
284                format!("<img{}{}>", src, alt)
285            }
286            Tag::Li { style, children } => {
287                let style = write_attr(get_style_string(style), "style");
288                format!("<li{}>{}</li>", style, children_to_html(children))
289            }
290            Tag::Ol { style, children } => {
291                let style = write_attr(get_style_string(style), "style");
292                format!("<ol{}>{}</ol>", style, children_to_html(children))
293            }
294            Tag::P { style, children } => {
295                let style = write_attr(get_style_string(style), "style");
296                format!("<p{}>{}</p>", style, children_to_html(children))
297            }
298            Tag::Span { style, children } => {
299                let style = write_attr(get_style_string(style), "style");
300                format!("<span{}>{}</span>", style, children_to_html(children))
301            }
302            Tag::Strong { children } => format!("<strong>{}</strong>", children_to_html(children)),
303            Tag::Ul { style, children } => {
304                let style = write_attr(get_style_string(style), "style");
305                format!("<ul{}>{}</ul>", style, children_to_html(children))
306            }
307            Tag::Unknown(_) => {
308                panic!("No unknown element should be present in XHTML-IM after parsing.")
309            }
310        }
311    }
312}
313
314impl TryFrom<Element> for Tag {
315    type Error = Error;
316
317    fn try_from(elem: Element) -> Result<Tag, Error> {
318        let mut children = vec![];
319        for child in elem.nodes() {
320            match child {
321                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
322                Node::Text(text) => children.push(Child::Text(text.clone())),
323            }
324        }
325
326        Ok(match elem.name() {
327            "a" => Tag::A {
328                href: elem.attr("href").map(|href| href.to_string()),
329                style: parse_css(elem.attr(rxml::xml_ncname!("style"))),
330                type_: elem.attr("type").map(|type_| type_.to_string()),
331                children,
332            },
333            "blockquote" => Tag::Blockquote {
334                style: parse_css(elem.attr("style")),
335                children,
336            },
337            "br" => Tag::Br,
338            "cite" => Tag::Cite {
339                style: parse_css(elem.attr("style")),
340                children,
341            },
342            "em" => Tag::Em { children },
343            "img" => Tag::Img {
344                src: elem.attr("src").map(|src| src.to_string()),
345                alt: elem.attr("alt").map(|alt| alt.to_string()),
346            },
347            "li" => Tag::Li {
348                style: parse_css(elem.attr("style")),
349                children,
350            },
351            "ol" => Tag::Ol {
352                style: parse_css(elem.attr("style")),
353                children,
354            },
355            "p" => Tag::P {
356                style: parse_css(elem.attr("style")),
357                children,
358            },
359            "span" => Tag::Span {
360                style: parse_css(elem.attr("style")),
361                children,
362            },
363            "strong" => Tag::Strong { children },
364            "ul" => Tag::Ul {
365                style: parse_css(elem.attr("style")),
366                children,
367            },
368            _ => Tag::Unknown(children),
369        })
370    }
371}
372
373impl From<Tag> for Element {
374    fn from(tag: Tag) -> Element {
375        let (name, attrs, children) = match tag {
376            Tag::A {
377                href,
378                style,
379                type_,
380                children,
381            } => (
382                "a",
383                {
384                    let mut attrs = vec![];
385                    if let Some(href) = href {
386                        attrs.push((rxml::xml_ncname!("href"), href));
387                    }
388                    if let Some(style) = get_style_string(style) {
389                        attrs.push((rxml::xml_ncname!("style"), style));
390                    }
391                    if let Some(type_) = type_ {
392                        attrs.push((rxml::xml_ncname!("type"), type_));
393                    }
394                    attrs
395                },
396                children,
397            ),
398            Tag::Blockquote { style, children } => (
399                "blockquote",
400                match get_style_string(style) {
401                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
402                    None => vec![],
403                },
404                children,
405            ),
406            Tag::Br => ("br", vec![], vec![]),
407            Tag::Cite { style, children } => (
408                "cite",
409                match get_style_string(style) {
410                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
411                    None => vec![],
412                },
413                children,
414            ),
415            Tag::Em { children } => ("em", vec![], children),
416            Tag::Img { src, alt } => {
417                let mut attrs = vec![];
418                if let Some(src) = src {
419                    attrs.push((rxml::xml_ncname!("src"), src));
420                }
421                if let Some(alt) = alt {
422                    attrs.push((rxml::xml_ncname!("alt"), alt));
423                }
424                ("img", attrs, vec![])
425            }
426            Tag::Li { style, children } => (
427                "li",
428                match get_style_string(style) {
429                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
430                    None => vec![],
431                },
432                children,
433            ),
434            Tag::Ol { style, children } => (
435                "ol",
436                match get_style_string(style) {
437                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
438                    None => vec![],
439                },
440                children,
441            ),
442            Tag::P { style, children } => (
443                "p",
444                match get_style_string(style) {
445                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
446                    None => vec![],
447                },
448                children,
449            ),
450            Tag::Span { style, children } => (
451                "span",
452                match get_style_string(style) {
453                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
454                    None => vec![],
455                },
456                children,
457            ),
458            Tag::Strong { children } => ("strong", vec![], children),
459            Tag::Ul { style, children } => (
460                "ul",
461                match get_style_string(style) {
462                    Some(style) => vec![(rxml::xml_ncname!("style"), style)],
463                    None => vec![],
464                },
465                children,
466            ),
467            Tag::Unknown(_) => {
468                panic!("No unknown element should be present in XHTML-IM after parsing.")
469            }
470        };
471        let mut builder = Element::builder(name, ns::XHTML).append_all(children_to_nodes(children));
472        for (key, value) in attrs {
473            builder = builder.attr(key.into(), value);
474        }
475        builder.build()
476    }
477}
478
479fn children_to_nodes(children: Vec<Child>) -> impl IntoIterator<Item = Node> {
480    children.into_iter().map(|child| match child {
481        Child::Tag(tag) => Node::Element(Element::from(tag)),
482        Child::Text(text) => Node::Text(text),
483    })
484}
485
486fn children_to_html(children: Vec<Child>) -> String {
487    children
488        .into_iter()
489        .map(|child| child.into_html())
490        .collect::<Vec<_>>()
491        .concat()
492}
493
494fn write_attr(attr: Option<String>, name: &str) -> String {
495    match attr {
496        Some(attr) => format!(" {}='{}'", name, attr),
497        None => String::new(),
498    }
499}
500
501fn parse_css(style: Option<&str>) -> Css {
502    let mut properties = vec![];
503    if let Some(style) = style {
504        // TODO: make that parser a bit more resilient to things.
505        for part in style.split(';') {
506            let mut part = part
507                .splitn(2, ':')
508                .map(|a| a.to_string())
509                .collect::<Vec<_>>();
510            let key = part.pop().unwrap();
511            let value = part.pop().unwrap();
512            properties.push(Property { key, value });
513        }
514    }
515    properties
516}
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521
522    #[cfg(target_pointer_width = "32")]
523    #[test]
524    fn test_size() {
525        assert_size!(XhtmlIm, 12);
526        assert_size!(Child, 48);
527        assert_size!(Tag, 48);
528    }
529
530    #[cfg(target_pointer_width = "64")]
531    #[test]
532    fn test_size() {
533        assert_size!(XhtmlIm, 24);
534        assert_size!(Child, 96);
535        assert_size!(Tag, 96);
536    }
537
538    #[test]
539    fn test_empty() {
540        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'/>"
541            .parse()
542            .unwrap();
543        let xhtml = XhtmlIm::try_from(elem).unwrap();
544        assert_eq!(xhtml.bodies.len(), 0);
545
546        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'/></html>"
547            .parse()
548            .unwrap();
549        let xhtml = XhtmlIm::try_from(elem).unwrap();
550        assert_eq!(xhtml.bodies.len(), 1);
551
552        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body xml:lang='fr'/><html:body xml:lang='en'/></html>"
553            .parse()
554            .unwrap();
555        let xhtml = XhtmlIm::try_from(elem).unwrap();
556        assert_eq!(xhtml.bodies.len(), 2);
557    }
558
559    #[test]
560    fn invalid_two_same_langs() {
561        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body/><html:body/></html>"
562            .parse()
563            .unwrap();
564        let error = XhtmlIm::try_from(elem).unwrap_err();
565        let message = match error {
566            FromElementError::Invalid(Error::Other(string)) => string,
567            _ => panic!(),
568        };
569        assert_eq!(message, "Two identical language bodies found in XHTML-IM.");
570    }
571
572    #[test]
573    fn test_tag() {
574        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'/>"
575            .parse()
576            .unwrap();
577        let body = Body::try_from(elem).unwrap();
578        assert_eq!(body.children.len(), 0);
579
580        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body>"
581            .parse()
582            .unwrap();
583        let mut body = Body::try_from(elem).unwrap();
584        assert_eq!(body.style.len(), 0);
585        assert_eq!(body.xml_lang, None);
586        assert_eq!(body.children.len(), 1);
587        let p = match body.children.pop() {
588            Some(Child::Tag(tag)) => tag,
589            _ => panic!(),
590        };
591        let mut children = match p {
592            Tag::P { style, children } => {
593                assert_eq!(style.len(), 0);
594                assert_eq!(children.len(), 1);
595                children
596            }
597            _ => panic!(),
598        };
599        let text = match children.pop() {
600            Some(Child::Text(text)) => text,
601            _ => panic!(),
602        };
603        assert_eq!(text, "Hello world!");
604    }
605
606    #[test]
607    fn test_unknown_element() {
608        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><coucou>Hello world!</coucou></body></html>"
609            .parse()
610            .unwrap();
611        let parsed = XhtmlIm::try_from(elem).unwrap();
612        let parsed2 = parsed.clone();
613        let html = parsed.into_html();
614        assert_eq!(html, "Hello world!");
615
616        let elem = Element::from(parsed2);
617        assert_eq!(String::from(&elem), "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'>Hello world!</body></html>");
618    }
619
620    #[test]
621    fn test_generate_html() {
622        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body></html>"
623            .parse()
624            .unwrap();
625        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
626        let html = xhtml_im.into_html();
627        assert_eq!(html, "<p>Hello world!</p>");
628
629        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello <strong>world</strong>!</p></body></html>"
630            .parse()
631            .unwrap();
632        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
633        let html = xhtml_im.into_html();
634        assert_eq!(html, "<p>Hello <strong>world</strong>!</p>");
635    }
636
637    #[test]
638    fn generate_tree() {
639        let world = "world".to_string();
640
641        Body {
642            style: vec![],
643            xml_lang: Some("en".to_string()),
644            children: vec![Child::Tag(Tag::P {
645                style: vec![],
646                children: vec![
647                    Child::Text("Hello ".to_string()),
648                    Child::Tag(Tag::Strong {
649                        children: vec![Child::Text(world)],
650                    }),
651                    Child::Text("!".to_string()),
652                ],
653            })],
654        };
655    }
656}