From 3f5c5ccd81eff5e730447c1e74140ea7ab019dd4 Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Wed, 8 Feb 2017 16:22:34 +0100 Subject: [PATCH] Create functions find/remove_html_comments Similar functions existed in two places in UI but these were not able to deal with nested comments. --- src/stringfunc.cpp | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/stringfunc.hxx | 5 +++ 2 files changed, 101 insertions(+), 0 deletions(-) diff --git a/src/stringfunc.cpp b/src/stringfunc.cpp index c50eb24..63424bc 100644 --- a/src/stringfunc.cpp +++ b/src/stringfunc.cpp @@ -35,6 +35,7 @@ on this file might be covered by the GNU General Public License. #include #include +#include #include @@ -891,6 +892,101 @@ string html_entities_to_console(std::string str) return str; } +// find_html_comments + remove_html_comments(str, comments) +void remove_html_comments(string &str) +{ + vector comments; + find_html_comments(str, comments); + remove_html_comments(str, comments); +} + +// find all html comments, behaving correctly if they are nested; ignores comment tags ("") +// If there are invalid comments ("-->" before ""; + static const string::size_type START_LEN = START.length(); + static const string::size_type CLOSE_LEN = CLOSE.length(); + + // in order to find nested comments, need either recursion or a stack + vector starts; // stack of start tags + + string::size_type pos = 0; + string::size_type len = str.length(); + string::size_type next_start, next_close; + + while (pos < len) // not really needed but just in case + { + next_start = str.find(START, pos); + next_close = str.find(CLOSE, pos); + + if ( (next_start == string::npos) && (next_close == string::npos) ) + break; // we are done + + else if ( (next_start == string::npos) || (next_close < next_start) ) // close one comment (pop) + { + if (starts.empty()) // closing tag without a start + comments.push_back(CommentZone(string::npos, next_close+CLOSE_LEN)); + else + { + comments.push_back(CommentZone(starts.back(), next_close+CLOSE_LEN)); + starts.pop_back(); + } + pos = next_close + CLOSE_LEN; + } + + else if ( (next_close == string::npos) || (next_start < next_close) ) // start a new comment (push) + { + starts.push_back(next_start); + pos = next_start + START_LEN; + } + } + + // add comments that have no closing tag from back to front (important for remove_html_comments!) + while (!starts.empty()) + { + comments.push_back(CommentZone(starts.back(), string::npos)); + starts.pop_back(); + } +} + +// remove all html comments foundby find_html_comments +void remove_html_comments(std::string &str, const vector &comments) +{ + // remember position where last removal started + string::size_type last_removal_start = str.length(); + + // Go from back to front to not mess up indices. + // This requires that bigger comments, that contain smaller comments, come AFTER + // the small contained comments in the comments vector (i.e. comments are ordered by + // their closing tag, not their opening tag). This is true for results from find_html_comments + BOOST_REVERSE_FOREACH(const CommentZone &comment, comments) + { + if (comment.first == string::npos) + { + str = str.replace(0, comment.second, ""); // comment starts "before" str --> delete from start + break; // there can be no more + } + else if (comment.first >= last_removal_start) + { + continue; // this comment is inside another comment that we have removed already + } + else if (comment.second == string::npos) // comment ends "after" str --> delete until end + { + str = str.replace(comment.first, string::npos, ""); + last_removal_start = comment.first; + } + else + { + str = str.replace(comment.first, comment.second-comment.first, ""); + last_removal_start = comment.first; + } + } +} + bool replace_all(string &base, const char *ist, const char *soll) { string i=ist; diff --git a/src/stringfunc.hxx b/src/stringfunc.hxx index 67c38e7..5d3455c 100644 --- a/src/stringfunc.hxx +++ b/src/stringfunc.hxx @@ -299,6 +299,11 @@ std::string smart_html_entities(const std::string &input); std::string html_entities(std::string str); std::string html_entities_to_console(std::string str); +typedef std::pair CommentZone; +void find_html_comments(const std::string &str, std::vector &result); +void remove_html_comments(std::string &str); +void remove_html_comments(std::string &str, const std::vector &comments); + std::string sanitize_for_logging(const std::string &str, const char replace_with='?'); std::string escape(const std::string &s); -- 1.7.1