} // eo namespace <anonymous>
+template < typename CONT >
+bool is_contained(const string &name, const CONT &container)
+{
+ return container.find(name) != container.end();
+}
+
+class HtmlTag
+{
+private:
+ const string FullTag;
+ string Name;
+ string Attributes;
+ bool IsAllowed;
+
+ /**
+ * @brief Checks if this fits in one of the three groups of white-listed
+ * tags and populate private variables on the way.
+ *
+ * @return bool True if tag and its attributes are valid.
+ */
+ bool process_tag()
+ {
+ string name = FullTag;
+
+ if (is_end_tag())
+ name = remove_prefix(name, "</");
+ else
+ name = remove_prefix(name, "<");
+
+ // Spaces after "<" or "</" are invalid.
+ if (name.substr(0,1).find_first_of(I2n::Whitespaces) != string::npos)
+ return false;
+
+ to_lower_mod(name);
+
+ // If this tag has the self-closing notation, it must be contained in
+ // the allowed void tags list.
+ if (has_suffix(FullTag, "/>"))
+ {
+ name = remove_suffix(name, "/>");
+ trim_mod(name);
+
+ if (is_contained(name, ALLOWED_VOID))
+ {
+ Name = name;
+ return true;
+ }
+
+ return false;
+ }
+
+ name = remove_suffix(name, ">");
+ trim_mod(name);
+
+ // Try to match any allowed tag.
+ if (is_contained(name, ALLOWED_NORMAL)
+ || is_contained(name, ALLOWED_VOID)
+ || is_contained(name, ALLOWED_WITH_ATTR))
+ {
+ Name = name;
+ return true;
+ }
+
+ // It may be a tag with attributes which requires special handling.
+ string tmp_name, tmp_attr;
+ if (!pair_split(name, tmp_name, tmp_attr, ' '))
+ return false;
+
+ // But end tags should not have attributes.
+ if (is_end_tag())
+ return false;
+
+ if (!is_contained(tmp_name, ALLOWED_WITH_ATTR))
+ return false;
+
+ Name = tmp_name;
+
+ // The tag is contained in the allowed tags list, but all attributes
+ // and their content should also be allowed and valid.
+ list<string> attrs = split_string(tmp_attr, " ", true, Whitespaces);
+
+ // Map of allowed attributes of the given tag.
+ const map< string, AttributeHandler> &attrs_handlers
+ = ALLOWED_WITH_ATTR.find(tmp_name)->second;
+
+ BOOST_FOREACH(const string &attribute, attrs)
+ {
+ string attr_name = "", attr_value = "";
+ if (!pair_split(attribute, attr_name, attr_value, '='))
+ attr_name = attribute;
+
+ if (!is_contained(attr_name, attrs_handlers))
+ return false;
+
+ // Execute the AttributeHandler for the given attribute.
+ if (!attrs_handlers.find(attr_name)->second(attr_value))
+ return false;
+
+ if (!Attributes.empty())
+ Attributes += " ";
+
+ Attributes += attr_name + "=" + attr_value;
+ }
+
+ return true;
+ }
+
+public:
+ HtmlTag(const string &tag)
+ : FullTag(tag)
+ , Name("")
+ , Attributes("")
+ {
+ IsAllowed = process_tag();
+ }
+
+ bool is_allowed() const
+ {
+ return IsAllowed;
+ }
+
+ const string get_name() const
+ {
+ return Name;
+ }
+
+ bool is_end_tag() const
+ {
+ if (has_prefix(FullTag, "</"))
+ return true;
+ return false;
+ }
+
+ bool is_void() const
+ {
+ if (is_contained(Name, ALLOWED_VOID))
+ return true;
+ return false;
+ }
+
+ const string sanitized() const
+ {
+ if (is_end_tag()
+ || !is_contained(Name, ALLOWED_WITH_ATTR))
+ return to_lower(FullTag);
+
+ return "<" + Name + " " + Attributes + ">";
+ }
+
+};
+
/**
* @brief Verifies if a html "a" tag has a valid link and sanitize it if necessary.
* Modify tag and add redirector prefix if link has a valid protocol.
}
/**
- * @brief Check if tag is in a whitelist of allowed tags.
- * Does not accept a tag containing attributes.
- * Example:
- * <h1> or </p> returns true.
- * <br size="param"> returns false.
- *
- * @param tag html tag to be verified.
- * @return true if is an allowed tag, false otherwise.
- */
-bool is_tag_allowed(string tag)
-{
- replace_all(tag, "<", "");
- replace_all(tag, ">", "");
- replace_all(tag, "/", "");
- to_lower_mod(tag);
-
- return ALLOWED_TAGS.find(tag) != ALLOWED_TAGS.end();
-}
-
-/**
* @brief Restricts html code to a small list of allowed tags.
* The attribute "href" from the tag "a" has its value sanitized and if it
* contains unsafe characters, the tag is stripped.
{
if (!s.second)
{
- result = result + html_entities(s.first);
+ result = result + html_entities(s.first, true);
continue;
}
- if (is_tag_allowed(s.first) )
+
+ HtmlTag tag(s.first);
+
+ if (tag.is_allowed())
{
- // Checks if this tag (s.first) is not a closing tag.
- if (s.first.compare(0, 2, "</") != 0)
- {
- result = result + s.first;
- // The tag "<br>" does not have a closing tag, so don't push it
- // into the expected tags vector.
- if (to_lower(s.first) != "<br>")
- expected_tags.push_back("</"+s.first.substr(1));
- continue;
- }
// If it is a closing tag, check if this is the expected one.
// We can't accept an closing tag that was never opened.
- else if (expected_tags.size() > 0 && expected_tags.back() == s.first)
+ if (tag.is_end_tag())
{
- // When the closing tag is indeed what we expect, pop it out
- // from the LIFO queue.
- result = result + s.first;
- expected_tags.pop_back();
- continue;
+ if (expected_tags.size() > 0 && expected_tags.back() == tag.get_name())
+ expected_tags.pop_back();
+ else if (!strip)
+ throw runtime_error("Invalid tag: " + s.first);
+ else
+ continue;
+ }
+ else if (!tag.is_void())
+ {
+ expected_tags.push_back(tag.get_name());
}
- }
- // Copy s.first to a string since it might be modified by link_sanitizer.
- string tag = s.first;
- if (to_lower_mod(tag).compare(0, AHREF.size(), AHREF) == 0
- && link_sanitizer(tag, redirect_prefix))
- {
- result = result + tag;
- expected_tags.push_back("</a>");
+ // Add a sanitized version of the tag to the result string.
+ result += tag.sanitized();
continue;
}
if (!strip)
throw runtime_error("Invalid tag: " + s.first);
-
- }
- if (expected_tags.size() > 0) //One or more tags were not closed.
- {
- BOOST_REVERSE_FOREACH(const string &s, expected_tags)
- {
- result = result + s;
- }
}
+ // One or more tags were not closed.
+ BOOST_REVERSE_FOREACH(const string &close_tag, expected_tags)
+ result = result + "</" + close_tag + ">";
+
return result.c_str();
}