From ba93f43dc2b29d1f6f6313885435adba8986a00c Mon Sep 17 00:00:00 2001 From: Juliana Rodrigueiro Date: Wed, 19 Sep 2018 17:03:24 +0200 Subject: [PATCH] Handle tag validation according to its white list group Make the whole validation process dependent of the group type and not of specific tags and their particularities. --- src/restricted_html.cpp | 227 ++++++++++++++++++++++++++++++++++++----------- 1 files changed, 173 insertions(+), 54 deletions(-) diff --git a/src/restricted_html.cpp b/src/restricted_html.cpp index 0aeab2d..ee91156 100644 --- a/src/restricted_html.cpp +++ b/src/restricted_html.cpp @@ -92,6 +92,157 @@ const pcrecpp::RE SAFE_URL("^(http(s?):\\/\\/)(([a-zA-Z0-9\\.\\-\\_]+(\\.[a-zA-" } // eo namespace +template < typename CONT > +bool is_contained(const string &name, const CONT &container) +{ + return container.find(name) != container.end(); +} + +class HtmlTag +{ +private: + const string FullTag; + string Name; + string Attributes; + bool IsAllowed; + + /** + * @brief Checks if this fits in one of the three groups of white-listed + * tags and populate private variables on the way. + * + * @return bool True if tag and its attributes are valid. + */ + bool process_tag() + { + string name = FullTag; + + if (is_end_tag()) + name = remove_prefix(name, "")) + { + name = remove_suffix(name, "/>"); + trim_mod(name); + + if (is_contained(name, ALLOWED_VOID)) + { + Name = name; + return true; + } + + return false; + } + + name = remove_suffix(name, ">"); + trim_mod(name); + + // Try to match any allowed tag. + if (is_contained(name, ALLOWED_NORMAL) + || is_contained(name, ALLOWED_VOID) + || is_contained(name, ALLOWED_WITH_ATTR)) + { + Name = name; + return true; + } + + // It may be a tag with attributes which requires special handling. + string tmp_name, tmp_attr; + if (!pair_split(name, tmp_name, tmp_attr, ' ')) + return false; + + // But end tags should not have attributes. + if (is_end_tag()) + return false; + + if (!is_contained(tmp_name, ALLOWED_WITH_ATTR)) + return false; + + Name = tmp_name; + + // The tag is contained in the allowed tags list, but all attributes + // and their content should also be allowed and valid. + list attrs = split_string(tmp_attr, " ", true, Whitespaces); + + // Map of allowed attributes of the given tag. + const map< string, AttributeHandler> &attrs_handlers + = ALLOWED_WITH_ATTR.find(tmp_name)->second; + + BOOST_FOREACH(const string &attribute, attrs) + { + string attr_name = "", attr_value = ""; + if (!pair_split(attribute, attr_name, attr_value, '=')) + attr_name = attribute; + + if (!is_contained(attr_name, attrs_handlers)) + return false; + + // Execute the AttributeHandler for the given attribute. + if (!attrs_handlers.find(attr_name)->second(attr_value)) + return false; + + if (!Attributes.empty()) + Attributes += " "; + + Attributes += attr_name + "=" + attr_value; + } + + return true; + } + +public: + HtmlTag(const string &tag) + : FullTag(tag) + , Name("") + , Attributes("") + { + IsAllowed = process_tag(); + } + + bool is_allowed() const + { + return IsAllowed; + } + + const string get_name() const + { + return Name; + } + + bool is_end_tag() const + { + if (has_prefix(FullTag, ""; + } + +}; + /** * @brief Verifies if a html "a" tag has a valid link and sanitize it if necessary. * Modify tag and add redirector prefix if link has a valid protocol. @@ -150,26 +301,6 @@ bool link_sanitizer(string &tag, const std::string &redirect_prefix) } /** - * @brief Check if tag is in a whitelist of allowed tags. - * Does not accept a tag containing attributes. - * Example: - *

or

returns true. - *
returns false. - * - * @param tag html tag to be verified. - * @return true if is an allowed tag, false otherwise. - */ -bool is_tag_allowed(string tag) -{ - replace_all(tag, "<", ""); - replace_all(tag, ">", ""); - replace_all(tag, "/", ""); - to_lower_mod(tag); - - return ALLOWED_TAGS.find(tag) != ALLOWED_TAGS.end(); -} - -/** * @brief Restricts html code to a small list of allowed tags. * The attribute "href" from the tag "a" has its value sanitized and if it * contains unsafe characters, the tag is stripped. @@ -201,55 +332,43 @@ const string restrict_html(const string &html_code_orig, { if (!s.second) { - result = result + html_entities(s.first); + result = result + html_entities(s.first, true); continue; } - if (is_tag_allowed(s.first) ) + + HtmlTag tag(s.first); + + if (tag.is_allowed()) { - // Checks if this tag (s.first) is not a closing tag. - if (s.first.compare(0, 2, "" does not have a closing tag, so don't push it - // into the expected tags vector. - if (to_lower(s.first) != "
") - expected_tags.push_back(" 0 && expected_tags.back() == s.first) + if (tag.is_end_tag()) { - // When the closing tag is indeed what we expect, pop it out - // from the LIFO queue. - result = result + s.first; - expected_tags.pop_back(); - continue; + if (expected_tags.size() > 0 && expected_tags.back() == tag.get_name()) + expected_tags.pop_back(); + else if (!strip) + throw runtime_error("Invalid tag: " + s.first); + else + continue; + } + else if (!tag.is_void()) + { + expected_tags.push_back(tag.get_name()); } - } - // Copy s.first to a string since it might be modified by link_sanitizer. - string tag = s.first; - if (to_lower_mod(tag).compare(0, AHREF.size(), AHREF) == 0 - && link_sanitizer(tag, redirect_prefix)) - { - result = result + tag; - expected_tags.push_back(""); + // Add a sanitized version of the tag to the result string. + result += tag.sanitized(); continue; } if (!strip) throw runtime_error("Invalid tag: " + s.first); - - } - if (expected_tags.size() > 0) //One or more tags were not closed. - { - BOOST_REVERSE_FOREACH(const string &s, expected_tags) - { - result = result + s; - } } + // One or more tags were not closed. + BOOST_REVERSE_FOREACH(const string &close_tag, expected_tags) + result = result + ""; + return result.c_str(); } -- 1.7.1