#include <i18n.h>
#include <boost/numeric/conversion/cast.hpp>
+#include <boost/foreach.hpp>
#include <stringfunc.hxx>
return str;
}
+// find_html_comments + remove_html_comments(str, comments)
+void remove_html_comments(string &str)
+{
+ vector<CommentZone> comments;
+ find_html_comments(str, comments);
+ remove_html_comments(str, comments);
+}
+
+// find all html comments, behaving correctly if they are nested; ignores comment tags ("<!--FOO .... BAR-->")
+// If there are invalid comments ("-->" before "<!--" or different number of closing and opening tags),
+// then the unknown index of corresponding start/end tag will be represented by a string::npos
+// Indices are from start of start tag until first index after closing tag
+void find_html_comments(const std::string &str, vector<CommentZone> &comments)
+{
+ static const string START = "<!--";
+ static const string CLOSE = "-->";
+ static const string::size_type START_LEN = START.length();
+ static const string::size_type CLOSE_LEN = CLOSE.length();
+
+ // in order to find nested comments, need either recursion or a stack
+ vector<string::size_type> starts; // stack of start tags
+
+ string::size_type pos = 0;
+ string::size_type len = str.length();
+ string::size_type next_start, next_close;
+
+ while (pos < len) // not really needed but just in case
+ {
+ next_start = str.find(START, pos);
+ next_close = str.find(CLOSE, pos);
+
+ if ( (next_start == string::npos) && (next_close == string::npos) )
+ break; // we are done
+
+ else if ( (next_start == string::npos) || (next_close < next_start) ) // close one comment (pop)
+ {
+ if (starts.empty()) // closing tag without a start
+ comments.push_back(CommentZone(string::npos, next_close+CLOSE_LEN));
+ else
+ {
+ comments.push_back(CommentZone(starts.back(), next_close+CLOSE_LEN));
+ starts.pop_back();
+ }
+ pos = next_close + CLOSE_LEN;
+ }
+
+ else if ( (next_close == string::npos) || (next_start < next_close) ) // start a new comment (push)
+ {
+ starts.push_back(next_start);
+ pos = next_start + START_LEN;
+ }
+ }
+
+ // add comments that have no closing tag from back to front (important for remove_html_comments!)
+ while (!starts.empty())
+ {
+ comments.push_back(CommentZone(starts.back(), string::npos));
+ starts.pop_back();
+ }
+}
+
+// remove all html comments foundby find_html_comments
+void remove_html_comments(std::string &str, const vector<CommentZone> &comments)
+{
+ // remember position where last removal started
+ string::size_type last_removal_start = str.length();
+
+ // Go from back to front to not mess up indices.
+ // This requires that bigger comments, that contain smaller comments, come AFTER
+ // the small contained comments in the comments vector (i.e. comments are ordered by
+ // their closing tag, not their opening tag). This is true for results from find_html_comments
+ BOOST_REVERSE_FOREACH(const CommentZone &comment, comments)
+ {
+ if (comment.first == string::npos)
+ {
+ str = str.replace(0, comment.second, ""); // comment starts "before" str --> delete from start
+ break; // there can be no more
+ }
+ else if (comment.first >= last_removal_start)
+ {
+ continue; // this comment is inside another comment that we have removed already
+ }
+ else if (comment.second == string::npos) // comment ends "after" str --> delete until end
+ {
+ str = str.replace(comment.first, string::npos, "");
+ last_removal_start = comment.first;
+ }
+ else
+ {
+ str = str.replace(comment.first, comment.second-comment.first, "");
+ last_removal_start = comment.first;
+ }
+ }
+}
+
bool replace_all(string &base, const char *ist, const char *soll)
{
string i=ist;