xmpp_parsers/
xhtml.rs

1// Copyright (c) 2019 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7use crate::message::MessagePayload;
8use crate::ns;
9use alloc::collections::BTreeMap;
10use minidom::{Element, Node};
11use xso::error::{Error, FromElementError};
12
13// TODO: Use a proper lang type.
14type Lang = String;
15
16/// Container for formatted text.
17#[derive(Debug, Clone)]
18pub struct XhtmlIm {
19    /// Map of language to body element.
20    bodies: BTreeMap<Lang, Body>,
21}
22
23impl XhtmlIm {
24    /// Serialise formatted text to HTML.
25    pub fn into_html(self) -> String {
26        let mut html = Vec::new();
27        // TODO: use the best language instead.
28        for (lang, body) in self.bodies {
29            if lang.is_empty() {
30                assert!(body.xml_lang.is_none());
31            } else {
32                assert_eq!(Some(lang), body.xml_lang);
33            }
34            for tag in body.children {
35                html.push(tag.into_html());
36            }
37            break;
38        }
39        html.concat()
40    }
41
42    /// Removes all unknown elements.
43    fn flatten(self) -> XhtmlIm {
44        let mut bodies = BTreeMap::new();
45        for (lang, body) in self.bodies {
46            let children = body.children.into_iter().fold(vec![], |mut acc, child| {
47                match child {
48                    Child::Tag(Tag::Unknown(children)) => acc.extend(children),
49                    any => acc.push(any),
50                }
51                acc
52            });
53            let body = Body { children, ..body };
54            bodies.insert(lang, body);
55        }
56        XhtmlIm { bodies }
57    }
58}
59
60impl MessagePayload for XhtmlIm {}
61
62impl TryFrom<Element> for XhtmlIm {
63    type Error = FromElementError;
64
65    fn try_from(elem: Element) -> Result<XhtmlIm, FromElementError> {
66        check_self!(elem, "html", XHTML_IM);
67        check_no_attributes!(elem, "html");
68
69        let mut bodies = BTreeMap::new();
70        for child in elem.children() {
71            if child.is("body", ns::XHTML) {
72                let child = child.clone();
73                let lang = child.attr("xml:lang").unwrap_or("").to_string();
74                let body = Body::try_from(child)?;
75                match bodies.insert(lang, body) {
76                    None => (),
77                    Some(_) => {
78                        return Err(Error::Other(
79                            "Two identical language bodies found in XHTML-IM.",
80                        )
81                        .into())
82                    }
83                }
84            } else {
85                return Err(Error::Other("Unknown element in XHTML-IM.").into());
86            }
87        }
88
89        Ok(XhtmlIm { bodies }.flatten())
90    }
91}
92
93impl From<XhtmlIm> for Element {
94    fn from(wrapper: XhtmlIm) -> Element {
95        Element::builder("html", ns::XHTML_IM)
96            .append_all(wrapper.bodies.into_iter().map(|(lang, body)| {
97                if lang.is_empty() {
98                    assert!(body.xml_lang.is_none());
99                } else {
100                    assert_eq!(Some(lang), body.xml_lang);
101                }
102                Element::from(body)
103            }))
104            .build()
105    }
106}
107
108#[derive(Debug, Clone)]
109enum Child {
110    Tag(Tag),
111    Text(String),
112}
113
114impl Child {
115    fn into_html(self) -> String {
116        match self {
117            Child::Tag(tag) => tag.into_html(),
118            Child::Text(text) => text,
119        }
120    }
121}
122
123#[derive(Debug, Clone)]
124struct Property {
125    key: String,
126    value: String,
127}
128
129type Css = Vec<Property>;
130
131fn get_style_string(style: Css) -> Option<String> {
132    let mut result = vec![];
133    for Property { key, value } in style {
134        result.push(format!("{}: {}", key, value));
135    }
136    if result.is_empty() {
137        return None;
138    }
139    Some(result.join("; "))
140}
141
142#[derive(Debug, Clone)]
143struct Body {
144    style: Css,
145    xml_lang: Option<String>,
146    children: Vec<Child>,
147}
148
149impl TryFrom<Element> for Body {
150    type Error = Error;
151
152    fn try_from(elem: Element) -> Result<Body, Error> {
153        let mut children = vec![];
154        for child in elem.nodes() {
155            match child {
156                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
157                Node::Text(text) => children.push(Child::Text(text.clone())),
158            }
159        }
160
161        Ok(Body {
162            style: parse_css(elem.attr("style")),
163            xml_lang: elem.attr("xml:lang").map(|xml_lang| xml_lang.to_string()),
164            children,
165        })
166    }
167}
168
169impl From<Body> for Element {
170    fn from(body: Body) -> Element {
171        Element::builder("body", ns::XHTML)
172            .attr("style", get_style_string(body.style))
173            .attr("xml:lang", body.xml_lang)
174            .append_all(children_to_nodes(body.children))
175            .build()
176    }
177}
178
179#[derive(Debug, Clone)]
180enum Tag {
181    A {
182        href: Option<String>,
183        style: Css,
184        type_: Option<String>,
185        children: Vec<Child>,
186    },
187    Blockquote {
188        style: Css,
189        children: Vec<Child>,
190    },
191    Br,
192    Cite {
193        style: Css,
194        children: Vec<Child>,
195    },
196    Em {
197        children: Vec<Child>,
198    },
199    Img {
200        src: Option<String>,
201        alt: Option<String>,
202    }, // TODO: height, width, style
203    Li {
204        style: Css,
205        children: Vec<Child>,
206    },
207    Ol {
208        style: Css,
209        children: Vec<Child>,
210    },
211    P {
212        style: Css,
213        children: Vec<Child>,
214    },
215    Span {
216        style: Css,
217        children: Vec<Child>,
218    },
219    Strong {
220        children: Vec<Child>,
221    },
222    Ul {
223        style: Css,
224        children: Vec<Child>,
225    },
226    Unknown(Vec<Child>),
227}
228
229impl Tag {
230    fn into_html(self) -> String {
231        match self {
232            Tag::A {
233                href,
234                style,
235                type_,
236                children,
237            } => {
238                let href = write_attr(href, "href");
239                let style = write_attr(get_style_string(style), "style");
240                let type_ = write_attr(type_, "type");
241                format!(
242                    "<a{}{}{}>{}</a>",
243                    href,
244                    style,
245                    type_,
246                    children_to_html(children)
247                )
248            }
249            Tag::Blockquote { style, children } => {
250                let style = write_attr(get_style_string(style), "style");
251                format!(
252                    "<blockquote{}>{}</blockquote>",
253                    style,
254                    children_to_html(children)
255                )
256            }
257            Tag::Br => String::from("<br>"),
258            Tag::Cite { style, children } => {
259                let style = write_attr(get_style_string(style), "style");
260                format!("<cite{}>{}</cite>", style, children_to_html(children))
261            }
262            Tag::Em { children } => format!("<em>{}</em>", children_to_html(children)),
263            Tag::Img { src, alt } => {
264                let src = write_attr(src, "src");
265                let alt = write_attr(alt, "alt");
266                format!("<img{}{}>", src, alt)
267            }
268            Tag::Li { style, children } => {
269                let style = write_attr(get_style_string(style), "style");
270                format!("<li{}>{}</li>", style, children_to_html(children))
271            }
272            Tag::Ol { style, children } => {
273                let style = write_attr(get_style_string(style), "style");
274                format!("<ol{}>{}</ol>", style, children_to_html(children))
275            }
276            Tag::P { style, children } => {
277                let style = write_attr(get_style_string(style), "style");
278                format!("<p{}>{}</p>", style, children_to_html(children))
279            }
280            Tag::Span { style, children } => {
281                let style = write_attr(get_style_string(style), "style");
282                format!("<span{}>{}</span>", style, children_to_html(children))
283            }
284            Tag::Strong { children } => format!("<strong>{}</strong>", children_to_html(children)),
285            Tag::Ul { style, children } => {
286                let style = write_attr(get_style_string(style), "style");
287                format!("<ul{}>{}</ul>", style, children_to_html(children))
288            }
289            Tag::Unknown(_) => {
290                panic!("No unknown element should be present in XHTML-IM after parsing.")
291            }
292        }
293    }
294}
295
296impl TryFrom<Element> for Tag {
297    type Error = Error;
298
299    fn try_from(elem: Element) -> Result<Tag, Error> {
300        let mut children = vec![];
301        for child in elem.nodes() {
302            match child {
303                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
304                Node::Text(text) => children.push(Child::Text(text.clone())),
305            }
306        }
307
308        Ok(match elem.name() {
309            "a" => Tag::A {
310                href: elem.attr("href").map(|href| href.to_string()),
311                style: parse_css(elem.attr("style")),
312                type_: elem.attr("type").map(|type_| type_.to_string()),
313                children,
314            },
315            "blockquote" => Tag::Blockquote {
316                style: parse_css(elem.attr("style")),
317                children,
318            },
319            "br" => Tag::Br,
320            "cite" => Tag::Cite {
321                style: parse_css(elem.attr("style")),
322                children,
323            },
324            "em" => Tag::Em { children },
325            "img" => Tag::Img {
326                src: elem.attr("src").map(|src| src.to_string()),
327                alt: elem.attr("alt").map(|alt| alt.to_string()),
328            },
329            "li" => Tag::Li {
330                style: parse_css(elem.attr("style")),
331                children,
332            },
333            "ol" => Tag::Ol {
334                style: parse_css(elem.attr("style")),
335                children,
336            },
337            "p" => Tag::P {
338                style: parse_css(elem.attr("style")),
339                children,
340            },
341            "span" => Tag::Span {
342                style: parse_css(elem.attr("style")),
343                children,
344            },
345            "strong" => Tag::Strong { children },
346            "ul" => Tag::Ul {
347                style: parse_css(elem.attr("style")),
348                children,
349            },
350            _ => Tag::Unknown(children),
351        })
352    }
353}
354
355impl From<Tag> for Element {
356    fn from(tag: Tag) -> Element {
357        let (name, attrs, children) = match tag {
358            Tag::A {
359                href,
360                style,
361                type_,
362                children,
363            } => (
364                "a",
365                {
366                    let mut attrs = vec![];
367                    if let Some(href) = href {
368                        attrs.push(("href", href));
369                    }
370                    if let Some(style) = get_style_string(style) {
371                        attrs.push(("style", style));
372                    }
373                    if let Some(type_) = type_ {
374                        attrs.push(("type", type_));
375                    }
376                    attrs
377                },
378                children,
379            ),
380            Tag::Blockquote { style, children } => (
381                "blockquote",
382                match get_style_string(style) {
383                    Some(style) => vec![("style", style)],
384                    None => vec![],
385                },
386                children,
387            ),
388            Tag::Br => ("br", vec![], vec![]),
389            Tag::Cite { style, children } => (
390                "cite",
391                match get_style_string(style) {
392                    Some(style) => vec![("style", style)],
393                    None => vec![],
394                },
395                children,
396            ),
397            Tag::Em { children } => ("em", vec![], children),
398            Tag::Img { src, alt } => {
399                let mut attrs = vec![];
400                if let Some(src) = src {
401                    attrs.push(("src", src));
402                }
403                if let Some(alt) = alt {
404                    attrs.push(("alt", alt));
405                }
406                ("img", attrs, vec![])
407            }
408            Tag::Li { style, children } => (
409                "li",
410                match get_style_string(style) {
411                    Some(style) => vec![("style", style)],
412                    None => vec![],
413                },
414                children,
415            ),
416            Tag::Ol { style, children } => (
417                "ol",
418                match get_style_string(style) {
419                    Some(style) => vec![("style", style)],
420                    None => vec![],
421                },
422                children,
423            ),
424            Tag::P { style, children } => (
425                "p",
426                match get_style_string(style) {
427                    Some(style) => vec![("style", style)],
428                    None => vec![],
429                },
430                children,
431            ),
432            Tag::Span { style, children } => (
433                "span",
434                match get_style_string(style) {
435                    Some(style) => vec![("style", style)],
436                    None => vec![],
437                },
438                children,
439            ),
440            Tag::Strong { children } => ("strong", vec![], children),
441            Tag::Ul { style, children } => (
442                "ul",
443                match get_style_string(style) {
444                    Some(style) => vec![("style", style)],
445                    None => vec![],
446                },
447                children,
448            ),
449            Tag::Unknown(_) => {
450                panic!("No unknown element should be present in XHTML-IM after parsing.")
451            }
452        };
453        let mut builder = Element::builder(name, ns::XHTML).append_all(children_to_nodes(children));
454        for (key, value) in attrs {
455            builder = builder.attr(key, value);
456        }
457        builder.build()
458    }
459}
460
461fn children_to_nodes(children: Vec<Child>) -> impl IntoIterator<Item = Node> {
462    children.into_iter().map(|child| match child {
463        Child::Tag(tag) => Node::Element(Element::from(tag)),
464        Child::Text(text) => Node::Text(text),
465    })
466}
467
468fn children_to_html(children: Vec<Child>) -> String {
469    children
470        .into_iter()
471        .map(|child| child.into_html())
472        .collect::<Vec<_>>()
473        .concat()
474}
475
476fn write_attr(attr: Option<String>, name: &str) -> String {
477    match attr {
478        Some(attr) => format!(" {}='{}'", name, attr),
479        None => String::new(),
480    }
481}
482
483fn parse_css(style: Option<&str>) -> Css {
484    let mut properties = vec![];
485    if let Some(style) = style {
486        // TODO: make that parser a bit more resilient to things.
487        for part in style.split(';') {
488            let mut part = part
489                .splitn(2, ':')
490                .map(|a| a.to_string())
491                .collect::<Vec<_>>();
492            let key = part.pop().unwrap();
493            let value = part.pop().unwrap();
494            properties.push(Property { key, value });
495        }
496    }
497    properties
498}
499
500#[cfg(test)]
501mod tests {
502    use super::*;
503
504    #[cfg(target_pointer_width = "32")]
505    #[test]
506    fn test_size() {
507        assert_size!(XhtmlIm, 12);
508        assert_size!(Child, 48);
509        assert_size!(Tag, 48);
510    }
511
512    #[cfg(target_pointer_width = "64")]
513    #[test]
514    fn test_size() {
515        assert_size!(XhtmlIm, 24);
516        assert_size!(Child, 96);
517        assert_size!(Tag, 96);
518    }
519
520    #[test]
521    fn test_empty() {
522        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'/>"
523            .parse()
524            .unwrap();
525        let xhtml = XhtmlIm::try_from(elem).unwrap();
526        assert_eq!(xhtml.bodies.len(), 0);
527
528        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'/></html>"
529            .parse()
530            .unwrap();
531        let xhtml = XhtmlIm::try_from(elem).unwrap();
532        assert_eq!(xhtml.bodies.len(), 1);
533
534        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body xml:lang='fr'/><html:body xml:lang='en'/></html>"
535            .parse()
536            .unwrap();
537        let xhtml = XhtmlIm::try_from(elem).unwrap();
538        assert_eq!(xhtml.bodies.len(), 2);
539    }
540
541    #[test]
542    fn invalid_two_same_langs() {
543        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body/><html:body/></html>"
544            .parse()
545            .unwrap();
546        let error = XhtmlIm::try_from(elem).unwrap_err();
547        let message = match error {
548            FromElementError::Invalid(Error::Other(string)) => string,
549            _ => panic!(),
550        };
551        assert_eq!(message, "Two identical language bodies found in XHTML-IM.");
552    }
553
554    #[test]
555    fn test_tag() {
556        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'/>"
557            .parse()
558            .unwrap();
559        let body = Body::try_from(elem).unwrap();
560        assert_eq!(body.children.len(), 0);
561
562        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body>"
563            .parse()
564            .unwrap();
565        let mut body = Body::try_from(elem).unwrap();
566        assert_eq!(body.style.len(), 0);
567        assert_eq!(body.xml_lang, None);
568        assert_eq!(body.children.len(), 1);
569        let p = match body.children.pop() {
570            Some(Child::Tag(tag)) => tag,
571            _ => panic!(),
572        };
573        let mut children = match p {
574            Tag::P { style, children } => {
575                assert_eq!(style.len(), 0);
576                assert_eq!(children.len(), 1);
577                children
578            }
579            _ => panic!(),
580        };
581        let text = match children.pop() {
582            Some(Child::Text(text)) => text,
583            _ => panic!(),
584        };
585        assert_eq!(text, "Hello world!");
586    }
587
588    #[test]
589    fn test_unknown_element() {
590        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><coucou>Hello world!</coucou></body></html>"
591            .parse()
592            .unwrap();
593        let parsed = XhtmlIm::try_from(elem).unwrap();
594        let parsed2 = parsed.clone();
595        let html = parsed.into_html();
596        assert_eq!(html, "Hello world!");
597
598        let elem = Element::from(parsed2);
599        assert_eq!(String::from(&elem), "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'>Hello world!</body></html>");
600    }
601
602    #[test]
603    fn test_generate_html() {
604        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body></html>"
605            .parse()
606            .unwrap();
607        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
608        let html = xhtml_im.into_html();
609        assert_eq!(html, "<p>Hello world!</p>");
610
611        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello <strong>world</strong>!</p></body></html>"
612            .parse()
613            .unwrap();
614        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
615        let html = xhtml_im.into_html();
616        assert_eq!(html, "<p>Hello <strong>world</strong>!</p>");
617    }
618
619    #[test]
620    fn generate_tree() {
621        let world = "world".to_string();
622
623        Body {
624            style: vec![],
625            xml_lang: Some("en".to_string()),
626            children: vec![Child::Tag(Tag::P {
627                style: vec![],
628                children: vec![
629                    Child::Text("Hello ".to_string()),
630                    Child::Tag(Tag::Strong {
631                        children: vec![Child::Text(world)],
632                    }),
633                    Child::Text("!".to_string()),
634                ],
635            })],
636        };
637    }
638}