xmpp_parsers/
xhtml.rs

1// Copyright (c) 2019 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7use crate::message::MessagePayload;
8use crate::ns;
9use alloc::collections::BTreeMap;
10use minidom::{Element, Node};
11use xso::error::{Error, FromElementError};
12
13// TODO: Use a proper lang type.
14type Lang = String;
15
16/// Container for formatted text.
17#[derive(Debug, Clone)]
18pub struct XhtmlIm {
19    /// Map of language to body element.
20    bodies: BTreeMap<Lang, Body>,
21}
22
23impl XhtmlIm {
24    /// Serialise formatted text to HTML.
25    pub fn into_html(self) -> String {
26        let mut html = Vec::new();
27        // TODO: use the best language instead.
28        // XXX: Remove this flag later when fixing the code below
29        #[allow(clippy::never_loop)]
30        for (lang, body) in self.bodies {
31            if lang.is_empty() {
32                assert!(body.xml_lang.is_none());
33            } else {
34                assert_eq!(Some(lang), body.xml_lang);
35            }
36            for tag in body.children {
37                html.push(tag.into_html());
38            }
39            break;
40        }
41        html.concat()
42    }
43
44    /// Removes all unknown elements.
45    fn flatten(self) -> XhtmlIm {
46        let mut bodies = BTreeMap::new();
47        for (lang, body) in self.bodies {
48            let children = body.children.into_iter().fold(vec![], |mut acc, child| {
49                match child {
50                    Child::Tag(Tag::Unknown(children)) => acc.extend(children),
51                    any => acc.push(any),
52                }
53                acc
54            });
55            let body = Body { children, ..body };
56            bodies.insert(lang, body);
57        }
58        XhtmlIm { bodies }
59    }
60}
61
62impl MessagePayload for XhtmlIm {}
63
64impl TryFrom<Element> for XhtmlIm {
65    type Error = FromElementError;
66
67    fn try_from(elem: Element) -> Result<XhtmlIm, FromElementError> {
68        check_self!(elem, "html", XHTML_IM);
69        check_no_attributes!(elem, "html");
70
71        let mut bodies = BTreeMap::new();
72        for child in elem.children() {
73            if child.is("body", ns::XHTML) {
74                let child = child.clone();
75                let lang = child.attr("xml:lang").unwrap_or("").to_string();
76                let body = Body::try_from(child)?;
77                match bodies.insert(lang, body) {
78                    None => (),
79                    Some(_) => {
80                        return Err(Error::Other(
81                            "Two identical language bodies found in XHTML-IM.",
82                        )
83                        .into())
84                    }
85                }
86            } else {
87                return Err(Error::Other("Unknown element in XHTML-IM.").into());
88            }
89        }
90
91        Ok(XhtmlIm { bodies }.flatten())
92    }
93}
94
95impl From<XhtmlIm> for Element {
96    fn from(wrapper: XhtmlIm) -> Element {
97        Element::builder("html", ns::XHTML_IM)
98            .append_all(wrapper.bodies.into_iter().map(|(lang, body)| {
99                if lang.is_empty() {
100                    assert!(body.xml_lang.is_none());
101                } else {
102                    assert_eq!(Some(lang), body.xml_lang);
103                }
104                Element::from(body)
105            }))
106            .build()
107    }
108}
109
110#[derive(Debug, Clone)]
111enum Child {
112    Tag(Tag),
113    Text(String),
114}
115
116impl Child {
117    fn into_html(self) -> String {
118        match self {
119            Child::Tag(tag) => tag.into_html(),
120            Child::Text(text) => text,
121        }
122    }
123}
124
125#[derive(Debug, Clone)]
126struct Property {
127    key: String,
128    value: String,
129}
130
131type Css = Vec<Property>;
132
133fn get_style_string(style: Css) -> Option<String> {
134    let mut result = vec![];
135    for Property { key, value } in style {
136        result.push(format!("{}: {}", key, value));
137    }
138    if result.is_empty() {
139        return None;
140    }
141    Some(result.join("; "))
142}
143
144#[derive(Debug, Clone)]
145struct Body {
146    style: Css,
147    xml_lang: Option<String>,
148    children: Vec<Child>,
149}
150
151impl TryFrom<Element> for Body {
152    type Error = Error;
153
154    fn try_from(elem: Element) -> Result<Body, Error> {
155        let mut children = vec![];
156        for child in elem.nodes() {
157            match child {
158                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
159                Node::Text(text) => children.push(Child::Text(text.clone())),
160            }
161        }
162
163        Ok(Body {
164            style: parse_css(elem.attr("style")),
165            xml_lang: elem.attr("xml:lang").map(|xml_lang| xml_lang.to_string()),
166            children,
167        })
168    }
169}
170
171impl From<Body> for Element {
172    fn from(body: Body) -> Element {
173        Element::builder("body", ns::XHTML)
174            .attr("style", get_style_string(body.style))
175            .attr("xml:lang", body.xml_lang)
176            .append_all(children_to_nodes(body.children))
177            .build()
178    }
179}
180
181#[derive(Debug, Clone)]
182enum Tag {
183    A {
184        href: Option<String>,
185        style: Css,
186        type_: Option<String>,
187        children: Vec<Child>,
188    },
189    Blockquote {
190        style: Css,
191        children: Vec<Child>,
192    },
193    Br,
194    Cite {
195        style: Css,
196        children: Vec<Child>,
197    },
198    Em {
199        children: Vec<Child>,
200    },
201    Img {
202        src: Option<String>,
203        alt: Option<String>,
204    }, // TODO: height, width, style
205    Li {
206        style: Css,
207        children: Vec<Child>,
208    },
209    Ol {
210        style: Css,
211        children: Vec<Child>,
212    },
213    P {
214        style: Css,
215        children: Vec<Child>,
216    },
217    Span {
218        style: Css,
219        children: Vec<Child>,
220    },
221    Strong {
222        children: Vec<Child>,
223    },
224    Ul {
225        style: Css,
226        children: Vec<Child>,
227    },
228    Unknown(Vec<Child>),
229}
230
231impl Tag {
232    fn into_html(self) -> String {
233        match self {
234            Tag::A {
235                href,
236                style,
237                type_,
238                children,
239            } => {
240                let href = write_attr(href, "href");
241                let style = write_attr(get_style_string(style), "style");
242                let type_ = write_attr(type_, "type");
243                format!(
244                    "<a{}{}{}>{}</a>",
245                    href,
246                    style,
247                    type_,
248                    children_to_html(children)
249                )
250            }
251            Tag::Blockquote { style, children } => {
252                let style = write_attr(get_style_string(style), "style");
253                format!(
254                    "<blockquote{}>{}</blockquote>",
255                    style,
256                    children_to_html(children)
257                )
258            }
259            Tag::Br => String::from("<br>"),
260            Tag::Cite { style, children } => {
261                let style = write_attr(get_style_string(style), "style");
262                format!("<cite{}>{}</cite>", style, children_to_html(children))
263            }
264            Tag::Em { children } => format!("<em>{}</em>", children_to_html(children)),
265            Tag::Img { src, alt } => {
266                let src = write_attr(src, "src");
267                let alt = write_attr(alt, "alt");
268                format!("<img{}{}>", src, alt)
269            }
270            Tag::Li { style, children } => {
271                let style = write_attr(get_style_string(style), "style");
272                format!("<li{}>{}</li>", style, children_to_html(children))
273            }
274            Tag::Ol { style, children } => {
275                let style = write_attr(get_style_string(style), "style");
276                format!("<ol{}>{}</ol>", style, children_to_html(children))
277            }
278            Tag::P { style, children } => {
279                let style = write_attr(get_style_string(style), "style");
280                format!("<p{}>{}</p>", style, children_to_html(children))
281            }
282            Tag::Span { style, children } => {
283                let style = write_attr(get_style_string(style), "style");
284                format!("<span{}>{}</span>", style, children_to_html(children))
285            }
286            Tag::Strong { children } => format!("<strong>{}</strong>", children_to_html(children)),
287            Tag::Ul { style, children } => {
288                let style = write_attr(get_style_string(style), "style");
289                format!("<ul{}>{}</ul>", style, children_to_html(children))
290            }
291            Tag::Unknown(_) => {
292                panic!("No unknown element should be present in XHTML-IM after parsing.")
293            }
294        }
295    }
296}
297
298impl TryFrom<Element> for Tag {
299    type Error = Error;
300
301    fn try_from(elem: Element) -> Result<Tag, Error> {
302        let mut children = vec![];
303        for child in elem.nodes() {
304            match child {
305                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
306                Node::Text(text) => children.push(Child::Text(text.clone())),
307            }
308        }
309
310        Ok(match elem.name() {
311            "a" => Tag::A {
312                href: elem.attr("href").map(|href| href.to_string()),
313                style: parse_css(elem.attr("style")),
314                type_: elem.attr("type").map(|type_| type_.to_string()),
315                children,
316            },
317            "blockquote" => Tag::Blockquote {
318                style: parse_css(elem.attr("style")),
319                children,
320            },
321            "br" => Tag::Br,
322            "cite" => Tag::Cite {
323                style: parse_css(elem.attr("style")),
324                children,
325            },
326            "em" => Tag::Em { children },
327            "img" => Tag::Img {
328                src: elem.attr("src").map(|src| src.to_string()),
329                alt: elem.attr("alt").map(|alt| alt.to_string()),
330            },
331            "li" => Tag::Li {
332                style: parse_css(elem.attr("style")),
333                children,
334            },
335            "ol" => Tag::Ol {
336                style: parse_css(elem.attr("style")),
337                children,
338            },
339            "p" => Tag::P {
340                style: parse_css(elem.attr("style")),
341                children,
342            },
343            "span" => Tag::Span {
344                style: parse_css(elem.attr("style")),
345                children,
346            },
347            "strong" => Tag::Strong { children },
348            "ul" => Tag::Ul {
349                style: parse_css(elem.attr("style")),
350                children,
351            },
352            _ => Tag::Unknown(children),
353        })
354    }
355}
356
357impl From<Tag> for Element {
358    fn from(tag: Tag) -> Element {
359        let (name, attrs, children) = match tag {
360            Tag::A {
361                href,
362                style,
363                type_,
364                children,
365            } => (
366                "a",
367                {
368                    let mut attrs = vec![];
369                    if let Some(href) = href {
370                        attrs.push(("href", href));
371                    }
372                    if let Some(style) = get_style_string(style) {
373                        attrs.push(("style", style));
374                    }
375                    if let Some(type_) = type_ {
376                        attrs.push(("type", type_));
377                    }
378                    attrs
379                },
380                children,
381            ),
382            Tag::Blockquote { style, children } => (
383                "blockquote",
384                match get_style_string(style) {
385                    Some(style) => vec![("style", style)],
386                    None => vec![],
387                },
388                children,
389            ),
390            Tag::Br => ("br", vec![], vec![]),
391            Tag::Cite { style, children } => (
392                "cite",
393                match get_style_string(style) {
394                    Some(style) => vec![("style", style)],
395                    None => vec![],
396                },
397                children,
398            ),
399            Tag::Em { children } => ("em", vec![], children),
400            Tag::Img { src, alt } => {
401                let mut attrs = vec![];
402                if let Some(src) = src {
403                    attrs.push(("src", src));
404                }
405                if let Some(alt) = alt {
406                    attrs.push(("alt", alt));
407                }
408                ("img", attrs, vec![])
409            }
410            Tag::Li { style, children } => (
411                "li",
412                match get_style_string(style) {
413                    Some(style) => vec![("style", style)],
414                    None => vec![],
415                },
416                children,
417            ),
418            Tag::Ol { style, children } => (
419                "ol",
420                match get_style_string(style) {
421                    Some(style) => vec![("style", style)],
422                    None => vec![],
423                },
424                children,
425            ),
426            Tag::P { style, children } => (
427                "p",
428                match get_style_string(style) {
429                    Some(style) => vec![("style", style)],
430                    None => vec![],
431                },
432                children,
433            ),
434            Tag::Span { style, children } => (
435                "span",
436                match get_style_string(style) {
437                    Some(style) => vec![("style", style)],
438                    None => vec![],
439                },
440                children,
441            ),
442            Tag::Strong { children } => ("strong", vec![], children),
443            Tag::Ul { style, children } => (
444                "ul",
445                match get_style_string(style) {
446                    Some(style) => vec![("style", style)],
447                    None => vec![],
448                },
449                children,
450            ),
451            Tag::Unknown(_) => {
452                panic!("No unknown element should be present in XHTML-IM after parsing.")
453            }
454        };
455        let mut builder = Element::builder(name, ns::XHTML).append_all(children_to_nodes(children));
456        for (key, value) in attrs {
457            builder = builder.attr(key, value);
458        }
459        builder.build()
460    }
461}
462
463fn children_to_nodes(children: Vec<Child>) -> impl IntoIterator<Item = Node> {
464    children.into_iter().map(|child| match child {
465        Child::Tag(tag) => Node::Element(Element::from(tag)),
466        Child::Text(text) => Node::Text(text),
467    })
468}
469
470fn children_to_html(children: Vec<Child>) -> String {
471    children
472        .into_iter()
473        .map(|child| child.into_html())
474        .collect::<Vec<_>>()
475        .concat()
476}
477
478fn write_attr(attr: Option<String>, name: &str) -> String {
479    match attr {
480        Some(attr) => format!(" {}='{}'", name, attr),
481        None => String::new(),
482    }
483}
484
485fn parse_css(style: Option<&str>) -> Css {
486    let mut properties = vec![];
487    if let Some(style) = style {
488        // TODO: make that parser a bit more resilient to things.
489        for part in style.split(';') {
490            let mut part = part
491                .splitn(2, ':')
492                .map(|a| a.to_string())
493                .collect::<Vec<_>>();
494            let key = part.pop().unwrap();
495            let value = part.pop().unwrap();
496            properties.push(Property { key, value });
497        }
498    }
499    properties
500}
501
502#[cfg(test)]
503mod tests {
504    use super::*;
505
506    #[cfg(target_pointer_width = "32")]
507    #[test]
508    fn test_size() {
509        assert_size!(XhtmlIm, 12);
510        assert_size!(Child, 48);
511        assert_size!(Tag, 48);
512    }
513
514    #[cfg(target_pointer_width = "64")]
515    #[test]
516    fn test_size() {
517        assert_size!(XhtmlIm, 24);
518        assert_size!(Child, 96);
519        assert_size!(Tag, 96);
520    }
521
522    #[test]
523    fn test_empty() {
524        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'/>"
525            .parse()
526            .unwrap();
527        let xhtml = XhtmlIm::try_from(elem).unwrap();
528        assert_eq!(xhtml.bodies.len(), 0);
529
530        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'/></html>"
531            .parse()
532            .unwrap();
533        let xhtml = XhtmlIm::try_from(elem).unwrap();
534        assert_eq!(xhtml.bodies.len(), 1);
535
536        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body xml:lang='fr'/><html:body xml:lang='en'/></html>"
537            .parse()
538            .unwrap();
539        let xhtml = XhtmlIm::try_from(elem).unwrap();
540        assert_eq!(xhtml.bodies.len(), 2);
541    }
542
543    #[test]
544    fn invalid_two_same_langs() {
545        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body/><html:body/></html>"
546            .parse()
547            .unwrap();
548        let error = XhtmlIm::try_from(elem).unwrap_err();
549        let message = match error {
550            FromElementError::Invalid(Error::Other(string)) => string,
551            _ => panic!(),
552        };
553        assert_eq!(message, "Two identical language bodies found in XHTML-IM.");
554    }
555
556    #[test]
557    fn test_tag() {
558        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'/>"
559            .parse()
560            .unwrap();
561        let body = Body::try_from(elem).unwrap();
562        assert_eq!(body.children.len(), 0);
563
564        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body>"
565            .parse()
566            .unwrap();
567        let mut body = Body::try_from(elem).unwrap();
568        assert_eq!(body.style.len(), 0);
569        assert_eq!(body.xml_lang, None);
570        assert_eq!(body.children.len(), 1);
571        let p = match body.children.pop() {
572            Some(Child::Tag(tag)) => tag,
573            _ => panic!(),
574        };
575        let mut children = match p {
576            Tag::P { style, children } => {
577                assert_eq!(style.len(), 0);
578                assert_eq!(children.len(), 1);
579                children
580            }
581            _ => panic!(),
582        };
583        let text = match children.pop() {
584            Some(Child::Text(text)) => text,
585            _ => panic!(),
586        };
587        assert_eq!(text, "Hello world!");
588    }
589
590    #[test]
591    fn test_unknown_element() {
592        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><coucou>Hello world!</coucou></body></html>"
593            .parse()
594            .unwrap();
595        let parsed = XhtmlIm::try_from(elem).unwrap();
596        let parsed2 = parsed.clone();
597        let html = parsed.into_html();
598        assert_eq!(html, "Hello world!");
599
600        let elem = Element::from(parsed2);
601        assert_eq!(String::from(&elem), "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'>Hello world!</body></html>");
602    }
603
604    #[test]
605    fn test_generate_html() {
606        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body></html>"
607            .parse()
608            .unwrap();
609        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
610        let html = xhtml_im.into_html();
611        assert_eq!(html, "<p>Hello world!</p>");
612
613        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello <strong>world</strong>!</p></body></html>"
614            .parse()
615            .unwrap();
616        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
617        let html = xhtml_im.into_html();
618        assert_eq!(html, "<p>Hello <strong>world</strong>!</p>");
619    }
620
621    #[test]
622    fn generate_tree() {
623        let world = "world".to_string();
624
625        Body {
626            style: vec![],
627            xml_lang: Some("en".to_string()),
628            children: vec![Child::Tag(Tag::P {
629                style: vec![],
630                children: vec![
631                    Child::Text("Hello ".to_string()),
632                    Child::Tag(Tag::Strong {
633                        children: vec![Child::Text(world)],
634                    }),
635                    Child::Text("!".to_string()),
636                ],
637            })],
638        };
639    }
640}