2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
22 * (c) Copyright 2007-2008 by Intra2net AG
30 #include <cmath> // for round()
37 #include <boost/numeric/conversion/cast.hpp>
38 #include <boost/foreach.hpp>
40 #include <stringfunc.hxx>
51 const std::string hexDigitsLower("0123456789abcdef");
52 const std::string hexDigitsUpper("0123456789ABCDEF");
57 char operator() (char c)
59 return std::toupper(c);
61 }; // eo struct UpperFunc
66 char operator() (char c)
68 return std::tolower(c);
70 }; // eo struct LowerFunc
73 } // eo namespace <anonymous>
78 * default list of Whitespaces (" \t\r\n");
80 const std::string Whitespaces = " \t\r\n";
83 * default list of lineendings ("\r\n");
85 const std::string LineEndings= "\r\n";
90 * @brief checks if a string begins with a given prefix.
91 * @param[in,out] str the string which is tested
92 * @param prefix the prefix which should be tested for.
93 * @return @a true iff the prefix is not empty and the string begins with that prefix.
95 bool has_prefix(const std::string& str, const std::string& prefix)
97 if (prefix.empty() || str.empty() || str.size() < prefix.size() )
101 return str.compare(0, prefix.size(), prefix) == 0;
102 } // eo has_prefix(const std::string&,const std::string&)
106 * @brief checks if a string ends with a given suffix.
107 * @param[in,out] str the string which is tested
108 * @param suffix the suffix which should be tested for.
109 * @return @a true iff the suffix is not empty and the string ends with that suffix.
111 bool has_suffix(const std::string& str, const std::string& suffix)
113 if (suffix.empty() || str.empty() || str.size() < suffix.size() )
117 return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
118 } // eo has_suffix(const std::string&,const std::string&)
122 * cut off characters from a given list from front and end of a string.
123 * @param[in,out] str the string which should be trimmed.
124 * @param charlist the list of characters to remove from beginning and end of string
125 * @return the result string.
127 std::string trim_mod(std::string& str, const std::string& charlist)
129 // first: trim the beginning:
130 std::string::size_type pos= str.find_first_not_of (charlist);
131 if (pos == std::string::npos)
133 // whole string consists of charlist (or is already empty)
139 // str starts with charlist
142 // now let's look at the tail:
143 pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char!
144 if ( pos < str.size() )
146 str.erase(pos, str.size()-pos);
149 } // eo trim_mod(std::string&,const std::string&)
154 * removes last character from a string when it is in a list of chars to be removed.
155 * @param[in,out] str the string.
156 * @param what the list of chars which will be tested for.
157 * @return the resulting string with last char removed (if applicable)
159 std::string chomp_mod(std::string& str, const std::string& what)
161 if (str.empty() || what.empty() )
165 if (what.find(str.at (str.size()-1) ) != std::string::npos)
167 str.erase(str.size() - 1);
170 } // eo chomp_mod(std::string&,const std::string&)
174 * @brief converts a string to lower case.
175 * @param[in,out] str the string to modify.
178 std::string to_lower_mod(std::string& str)
180 std::transform(str.begin(), str.end(), str.begin(), LowerFunc() );
182 } // eo to_lower_mod(std::string&)
186 * @brief converts a string to upper case.
187 * @param[in,out] str the string to modify.
190 std::string to_upper_mod(std::string& str)
192 std::transform( str.begin(), str.end(), str.begin(), UpperFunc() );
194 } // eo to_upper_mod(std::string&)
199 * cut off characters from a given list from front and end of a string.
200 * @param str the string which should be trimmed.
201 * @param charlist the list of characters to remove from beginning and end of string
202 * @return the result string.
204 std::string trim (const std::string& str, const std::string& charlist)
206 // first: trim the beginning:
207 std::string::size_type pos0= str.find_first_not_of(charlist);
208 if (pos0 == std::string::npos)
210 // whole string consists of charlist (or is already empty)
211 return std::string();
213 // now let's look at the end:
214 std::string::size_type pos1= str.find_last_not_of(charlist);
215 return str.substr(pos0, pos1 - pos0 + 1);
216 } // eo trim(const std:.string&,const std::string&)
220 * removes last character from a string when it is in a list of chars to be removed.
221 * @param str the string.
222 * @param what the list of chars which will be tested for.
223 * @return the resulting string with last char removed (if applicable)
225 std::string chomp (const std::string& str, const std::string& what)
227 if (str.empty() || what.empty() )
231 if (what.find(str.at (str.size()-1) ) != std::string::npos)
233 return str.substr(0, str.size()-1);
236 } // eo chomp(const std:.string&,const std::string&)
240 * @brief returns a lower case version of a given string.
241 * @param str the string
242 * @return the lower case version of the string
244 std::string to_lower (const std::string& str)
246 std::string result(str);
247 return to_lower_mod(result);
248 } // eo to_lower(const std::string&)
252 * @brief returns a upper case version of a given string.
253 * @param str the string
254 * @return the upper case version of the string
256 std::string to_upper(const std::string& str)
258 std::string result(str);
259 return to_upper_mod(result);
260 } // eo to_upper(const std::string&)
265 * @brief removes a given suffix from a string.
266 * @param str the string.
267 * @param suffix the suffix which should be removed if the string ends with it.
268 * @return the string without the suffix.
270 * If the string ends with the suffix, it is removed. If the the string doesn't end
271 * with the suffix the original string is returned.
273 std::string remove_suffix(const std::string& str, const std::string& suffix)
275 if (has_suffix(str,suffix) )
277 return str.substr(0, str.size()-suffix.size() );
280 } // eo remove_suffix(const std::string&,const std::string&)
285 * @brief removes a given prefix from a string.
286 * @param str the string.
287 * @param prefix the prefix which should be removed if the string begins with it.
288 * @return the string without the prefix.
290 * If the string begins with the prefix, it is removed. If the the string doesn't begin
291 * with the prefix the original string is returned.
293 std::string remove_prefix(const std::string& str, const std::string& prefix)
295 if (has_prefix(str,prefix) )
297 return str.substr( prefix.size() );
300 } // eo remove_prefix(const std::string&,const std::string&)
304 * split a string to key and value delimited by a given delimiter.
305 * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end).
306 * @param str the string which should be splitted.
307 * @param[out] key the resulting key
308 * @param[out] value the resulting value
309 * @param delimiter the delimiter between key and value; default is '='.
310 * @return @a true if the split was successful.
313 const std::string& str,
318 std::string::size_type pos = str.find (delimiter);
319 if (pos == std::string::npos) return false;
320 key= str.substr(0,pos);
321 value= str.substr(pos+1);
325 } // eo pair_split(const std::string&,std::string&,std::string&,char)
329 * splits a string by given delimiter
331 * @param[in] str the string which should be splitted.
332 * @param[out] result the list resulting from splitting @a str.
333 * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted.
334 * @param[in] omit_empty should empty parts not be stored?
335 * @param[in] trim_list list of characters the parts should be trimmed by.
336 * (empty string results in no trim)
339 const std::string& str,
340 std::list<std::string>& result,
341 const std::string& delimiter,
343 const std::string& trim_list
346 std::string::size_type pos, last_pos=0;
347 bool delimiter_found= false;
348 while ( last_pos < str.size() && last_pos != std::string::npos)
350 pos= str.find(delimiter, last_pos);
352 if (pos == std::string::npos)
354 part= str.substr(last_pos);
355 delimiter_found= false;
359 part= str.substr(last_pos, pos-last_pos);
360 delimiter_found=true;
362 if (pos != std::string::npos)
364 last_pos= pos+ delimiter.size();
368 last_pos= std::string::npos;
370 if (!trim_list.empty() ) trim_mod (part, trim_list);
371 if (omit_empty && part.empty() ) continue;
372 result.push_back( part );
374 // if the string ends with a delimiter we need to append an empty string if no omit_empty
376 // (this way we keep the split result consistent to a join operation)
377 if (delimiter_found && !omit_empty)
379 result.push_back("");
381 } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&)
384 /** call split_string with list<string>, converts result to vector; vector is clear()-ed first
386 * Note: Uses 3 O(n)-operations: list.size, vector.resize and std::swap_ranges;
387 * not sure whether there is a better way to do this
390 const std::string& str,
391 std::vector<std::string>& result,
392 const std::string& delimiter,
394 const std::string& trim_list
397 std::list<std::string> tmp;
398 split_string(str, tmp, delimiter, omit_empty, trim_list);
399 std::size_t size = tmp.size(); // this is O(n)
401 result.resize(size); // also O(n)
402 std::swap_ranges(tmp.begin(), tmp.end(), result.begin()); // also O(n)
406 * splits a string by a given delimiter
407 * @param str the string which should be splitted.
408 * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted.
409 * @param[in] omit_empty should empty parts not be stored?
410 * @param[in] trim_list list of characters the parts should be trimmed by.
411 * (empty string results in no trim)
412 * @return the list resulting from splitting @a str.
414 std::list<std::string> split_string(
415 const std::string& str,
416 const std::string& delimiter,
418 const std::string& trim_list
421 std::list<std::string> result;
422 split_string(str, result, delimiter, omit_empty, trim_list);
424 } // eo split_string(const std::string&,const std::string&,bool,const std::string&)
428 * @brief joins a list of strings into a single string.
430 * This funtion is (basically) the reverse operation of @a split_string.
432 * @param parts the list of strings.
433 * @param delimiter the delimiter which is inserted between the strings.
434 * @return the joined string.
436 std::string join_string(
437 const std::list< std::string >& parts,
438 const std::string& delimiter
442 if (! parts.empty() )
444 std::list< std::string >::const_iterator it= parts.begin();
446 while ( ++it != parts.end() )
453 } // eo join_string(const std::list< std::string >&,const std::string&)
456 /** @brief same as join_string for list, except uses a vector */
457 std::string join_string(
458 const std::vector< std::string >& parts,
459 const std::string& delimiter
463 if (! parts.empty() )
465 std::vector< std::string >::const_iterator it= parts.begin();
467 while ( ++it != parts.end() )
474 } // eo join_string(const std::vector< std::string >&,const std::string&)
484 * @brief returns a hex string from a binary string.
485 * @param str the (binary) string
486 * @param upper_case_digits determine whether to use upper case characters for digits A-F.
487 * @return the string in hex notation.
489 std::string convert_binary_to_hex(
490 const std::string& str,
491 bool upper_case_digits
495 std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower);
496 for ( std::string::const_iterator it= str.begin();
500 result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] );
501 result.push_back( hexDigits[ (*it) & 0x0f ] );
504 } // eo convert_binary_to_hex(const std::string&,bool)
508 * @brief converts a hex digit string to binary string.
509 * @param str hex digit string
510 * @return the binary string.
512 * The hex digit string may contains white spaces or colons which are treated
513 * as delimiters between hex digit groups.
515 * @todo rework the handling of half nibbles (consistency)!
517 std::string convert_hex_to_binary(
518 const std::string& str
520 throw (std::runtime_error)
524 bool hasNibble= false;
525 bool lastWasWS= true;
526 for ( std::string::const_iterator it= str.begin();
530 std::string::size_type p = hexDigitsLower.find( *it );
531 if (p== std::string::npos)
533 p= hexDigitsUpper.find( *it );
535 if (p == std::string::npos)
537 if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace?
538 or ( *it == ':') // or a colon?
541 // we treat that as a valid delimiter:
544 // 1 nibble before WS is treate as lower part:
553 if (p == std::string::npos )
555 throw runtime_error("illegal character in hex digit string: " + str);
569 //we already had a nibble, so a char is complete now:
570 result.push_back( c );
575 // this is the first nibble of a new char:
581 //well, there is one nibble left
582 // let's do some heuristics:
585 // if the preceeding character was a white space (or a colon)
586 // we treat the nibble as lower part:
587 //( this is consistent with shortened hex notations where leading zeros are not noted)
588 result.push_back( c );
592 // if it was part of a hex digit chain, we treat it as UPPER part (!!)
593 result.push_back( c << 4 );
597 } // eo convert_hex_to_binary(const std::string&)
600 static list<string>& alloc_template_starts()
602 static list<string> result;
605 result.push_back("std::list");
606 result.push_back("std::vector");
611 string shorten_stl_types(const string &input)
613 string output = input;
615 // first: replace fixed string for std::string
616 replace_all(output, "std::basic_string<char, std::char_traits<char>, std::allocator<char> >",
619 // loop over list/vector/... that have an allocator, e.g.
620 // std::list< some_type_here, std::allocator<some_type_here> >
621 string::size_type start, comma, end, len, start_text_len;
623 string allocator_text;
624 BOOST_FOREACH(const string &start_text, alloc_template_starts())
629 start_text_len = start_text.length();
630 while( (start=output.find(start_text+"<", start)) != string::npos )
632 len = output.length();
633 start += start_text_len+1; // start next iter and tests here after opening bracket
635 // now comes the tricky part: find matching ',' and the closing '>' even if "subtype" is template again
637 n_open_brackets = 1; // the bracket right after start_text counts as first
638 while (comma < len && n_open_brackets > 0)
640 if (output[comma] == ',' && n_open_brackets == 1)
642 else if (output[comma] == '<')
644 else if (output[comma] == '>')
649 while (end < len && n_open_brackets > 0)
651 if (output[end] == '<')
653 else if (output[end] == '>')
656 if (n_open_brackets == 0)
657 break; // do not increment end
662 // check that start < comma < end < len && n_open_brackets == 0
663 if (start >= comma || comma >= end || end >= len || n_open_brackets != 0)
664 continue; // input seems to be of unexpected form
666 // check that type in allocator is same as until comma
667 string type = output.substr(start, comma-start);
668 if (type[type.length()-1] == '>')
669 allocator_text = string("std::allocator<") + type + " > ";
671 allocator_text = string("std::allocator<") + type + "> ";
672 if (output.substr(comma+2, end-comma-2) == allocator_text)
673 output.replace(comma+2, end-comma-2, "_alloc_");
680 } // eo namespace I2n
685 std::string iso_to_utf8(const std::string& isostring)
689 iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1");
691 if (iso_to_utf8 == (iconv_t)-1)
692 throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
694 size_t in_size=isostring.size();
695 size_t out_size=in_size*4;
697 char *buf = (char *)malloc(out_size+1);
699 throw runtime_error("out of memory for iconv buffer");
701 char *in = (char *)isostring.c_str();
703 iconv(i2utf8, &in, &in_size, &out, &out_size);
705 buf[isostring.size()*4-out_size]=0;
715 std::string utf8_to_iso(const std::string& utf8string)
719 iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8");
721 if (utf82iso == (iconv_t)-1)
722 throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
724 size_t in_size=utf8string.size();
725 size_t out_size=in_size;
727 char *buf = (char *)malloc(out_size+1);
729 throw runtime_error("out of memory for iconv buffer");
731 char *in = (char *)utf8string.c_str();
733 iconv(utf82iso, &in, &in_size, &out, &out_size);
735 buf[utf8string.size()-out_size]=0;
740 iconv_close(utf82iso);
745 wchar_t* utf8_to_wbuf(const std::string& utf8string)
747 iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8");
749 if (utf82wstr == (iconv_t)-1)
750 throw runtime_error("iconv can't convert from UTF-8 to UCS-4");
752 size_t in_size=utf8string.size();
753 size_t out_size= (in_size+1)*sizeof(wchar_t);
755 wchar_t *buf = (wchar_t *)malloc(out_size);
757 throw runtime_error("out of memory for iconv buffer");
759 char *in = (char *)utf8string.c_str();
760 char *out = (char*) buf;
761 if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1)
762 throw runtime_error("error converting char encodings");
764 buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0;
766 iconv_close(utf82wstr);
771 std::string utf7imap_to_utf8(const std::string& utf7imapstring)
775 iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP");
777 if (utf7imap2utf8 == (iconv_t)-1)
778 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
780 size_t in_size=utf7imapstring.size();
781 size_t out_size=in_size*4;
783 char *buf = (char *)malloc(out_size+1);
785 throw runtime_error("out of memory for iconv buffer");
787 char *in = (char *)utf7imapstring.c_str();
789 iconv(utf7imap2utf8, &in, &in_size, &out, &out_size);
791 buf[utf7imapstring.size()*4-out_size]=0;
796 iconv_close(utf7imap2utf8);
801 std::string utf8_to_utf7imap(const std::string& utf8string)
805 iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8");
807 if (utf82utf7imap == (iconv_t)-1)
808 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
810 // UTF-7 is base64 encoded, a buffer 10x as large
811 // as the utf-8 buffer should be enough. If not the string will be truncated.
812 size_t in_size=utf8string.size();
813 size_t out_size=in_size*10;
815 char *buf = (char *)malloc(out_size+1);
817 throw runtime_error("out of memory for iconv buffer");
819 char *in = (char *)utf8string.c_str();
821 iconv(utf82utf7imap, &in, &in_size, &out, &out_size);
823 buf[utf8string.size()*10-out_size]= 0;
828 iconv_close(utf82utf7imap);
833 // Tokenize string by (html) tags
834 void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
836 string::size_type pos, len = input.size();
837 bool inside_tag = false;
840 for (pos = 0; pos < len; pos++)
842 if (input[pos] == '<')
846 if (!current.empty() )
848 tokenized.push_back( make_pair(current, false) );
852 current += input[pos];
854 else if (input[pos] == '>' && inside_tag)
856 current += input[pos];
858 if (!current.empty() )
860 tokenized.push_back( make_pair(current, true) );
865 current += input[pos];
868 // String left over in buffer?
869 if (!current.empty() )
870 tokenized.push_back( make_pair(current, false) );
871 } // eo tokenize_by_tag
874 std::string strip_html_tags(const std::string &input)
876 // Pair first: string, second: isTag
877 vector<pair<string,bool> > tokenized;
878 tokenize_by_tag (tokenized, input);
881 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
882 for (token = tokenized.begin(); token != tokens_end; ++token)
884 output += token->first;
887 } // eo strip_html_tags
890 // Smart-encode HTML en
891 string smart_html_entities(const std::string &input)
893 // Pair first: string, second: isTag
894 vector<pair<string,bool> > tokenized;
895 tokenize_by_tag (tokenized, input);
898 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
899 for (token = tokenized.begin(); token != tokens_end; ++token)
901 // keep HTML tags as they are
903 output += token->first;
905 output += html_entities(token->first);
912 string::size_type find_8bit(const std::string &str)
914 string::size_type l=str.size();
915 for (string::size_type p=0; p < l; p++)
916 if (static_cast<unsigned char>(str[p]) > 127)
922 // encoded UTF-8 chars into HTML entities
923 string html_entities(std::string str)
926 replace_all (str, "&", "&");
927 replace_all (str, "<", "<");
928 replace_all (str, ">", ">");
929 replace_all (str, "\"", """);
930 replace_all (str, "'", "'");
931 replace_all (str, "/", "/");
934 replace_all (str, "\xC3\xA4", "ä");
935 replace_all (str, "\xC3\xB6", "ö");
936 replace_all (str, "\xC3\xBC", "ü");
937 replace_all (str, "\xC3\x84", "Ä");
938 replace_all (str, "\xC3\x96", "Ö");
939 replace_all (str, "\xC3\x9C", "Ü");
942 replace_all (str, "\xC3\x9F", "ß");
944 // conversion of remaining non-ASCII chars needed?
945 // just do if needed because of performance
946 if (find_8bit(str) != string::npos)
948 // convert to fixed-size encoding UTF-32
949 wchar_t* wbuf=utf8_to_wbuf(str);
950 ostringstream target;
952 // replace all non-ASCII chars with HTML representation
953 for (int p=0; wbuf[p] != 0; p++)
955 unsigned int c=wbuf[p];
958 target << static_cast<unsigned char>(c);
960 target << "&#" << c << ';';
969 } // eo html_entities(std::string)
971 // convert HTML entities to something that can be viewed on a basic text console (restricted to ASCII-7)
972 string html_entities_to_console(std::string str)
975 replace_all (str, "&", "&");
976 replace_all (str, "<", "<");
977 replace_all (str, ">", ">");
978 replace_all (str, """, "\"");
979 replace_all (str, "'", "'");
980 replace_all (str, "/", "/");
983 replace_all (str, "ä", "ae");
984 replace_all (str, "ö", "oe");
985 replace_all (str, "ü", "ue");
986 replace_all (str, "Ä", "Ae");
987 replace_all (str, "Ö", "Oe");
988 replace_all (str, "Ü", "Ue");
991 replace_all (str, "ß", "ss");
996 // find_html_comments + remove_html_comments(str, comments)
997 void remove_html_comments(string &str)
999 vector<CommentZone> comments = find_html_comments(str);
1000 remove_html_comments(str, comments);
1003 // find all html comments, behaving correctly if they are nested; ignores comment tags ("<!--FOO .... BAR-->")
1004 // If there are invalid comments ("-->" before "<!--" or different number of closing and opening tags),
1005 // then the unknown index of corresponding start/end tag will be represented by a string::npos
1006 // Indices are from start of start tag until first index after closing tag
1007 vector<CommentZone> find_html_comments(const std::string &str)
1009 static const string START = "<!--";
1010 static const string CLOSE = "-->";
1011 static const string::size_type START_LEN = START.length();
1012 static const string::size_type CLOSE_LEN = CLOSE.length();
1014 vector<CommentZone> comments;
1016 // in order to find nested comments, need either recursion or a stack
1017 vector<string::size_type> starts; // stack of start tags
1019 string::size_type pos = 0;
1020 string::size_type len = str.length();
1021 string::size_type next_start, next_close;
1023 while (pos < len) // not really needed but just in case
1025 next_start = str.find(START, pos);
1026 next_close = str.find(CLOSE, pos);
1028 if ( (next_start == string::npos) && (next_close == string::npos) )
1029 break; // we are done
1031 else if ( (next_start == string::npos) || (next_close < next_start) ) // close one comment (pop)
1033 if (starts.empty()) // closing tag without a start
1034 comments.push_back(CommentZone(string::npos, next_close+CLOSE_LEN));
1037 comments.push_back(CommentZone(starts.back(), next_close+CLOSE_LEN));
1040 pos = next_close + CLOSE_LEN;
1043 else if ( (next_close == string::npos) || (next_start < next_close) ) // start a new comment (push)
1045 starts.push_back(next_start);
1046 pos = next_start + START_LEN;
1050 // add comments that have no closing tag from back to front (important for remove_html_comments!)
1051 while (!starts.empty())
1053 comments.push_back(CommentZone(starts.back(), string::npos));
1060 // remove all html comments foundby find_html_comments
1061 void remove_html_comments(std::string &str, const vector<CommentZone> &comments)
1063 // remember position where last removal started
1064 string::size_type last_removal_start = str.length();
1066 // Go from back to front to not mess up indices.
1067 // This requires that bigger comments, that contain smaller comments, come AFTER
1068 // the small contained comments in the comments vector (i.e. comments are ordered by
1069 // their closing tag, not their opening tag). This is true for results from find_html_comments
1070 BOOST_REVERSE_FOREACH(const CommentZone &comment, comments)
1072 if (comment.first == string::npos)
1074 str = str.replace(0, comment.second, ""); // comment starts "before" str --> delete from start
1075 break; // there can be no more
1077 else if (comment.first >= last_removal_start)
1079 continue; // this comment is inside another comment that we have removed already
1081 else if (comment.second == string::npos) // comment ends "after" str --> delete until end
1083 str = str.replace(comment.first, string::npos, "");
1084 last_removal_start = comment.first;
1088 str = str.replace(comment.first, comment.second-comment.first, "");
1089 last_removal_start = comment.first;
1094 bool replace_all(string &base, const char *ist, const char *soll)
1098 return replace_all(base,&i,&s);
1101 bool replace_all(string &base, const string &ist, const char *soll)
1104 return replace_all(base,&ist,&s);
1107 bool replace_all(string &base, const string *ist, const string *soll)
1109 return replace_all(base,*ist,*soll);
1112 bool replace_all(string &base, const char *ist, const string *soll)
1115 return replace_all(base,&i,soll);
1118 bool replace_all(string &base, const string &ist, const string &soll)
1120 bool found_ist = false;
1121 string::size_type a=0;
1124 throw runtime_error ("replace_all called with empty search string");
1126 while ( (a=base.find(ist,a) ) != string::npos)
1128 base.replace(a,ist.size(),soll);
1137 * @brief replaces all characters that could be problematic or impose a security risk when being logged
1138 * @param str the original string
1139 * @param replace_with the character to replace the unsafe chars with
1140 * @return a string that is safe to send to syslog or other logfiles
1142 * All chars between 0x20 (space) and 0x7E (~) (including) are considered safe for logging.
1143 * See e.g. RFC 5424, section 8.2 or the posix character class "printable".
1144 * This eliminates all possible problems with NUL, control characters, 8 bit chars, UTF8.
1147 std::string sanitize_for_logging(const std::string &str, const char replace_with)
1149 std::string output=str;
1151 const string::size_type len = output.size();
1152 for (std::string::size_type p=0; p < len; p++)
1153 if (output[p] < 0x20 || output[p] > 0x7E)
1154 output[p]=replace_with;
1160 string to_lower(const string &src)
1164 string::size_type pos, end = dst.size();
1165 for (pos = 0; pos < end; pos++)
1166 dst[pos] = tolower(dst[pos]);
1171 string to_upper(const string &src)
1175 string::size_type pos, end = dst.size();
1176 for (pos = 0; pos < end; pos++)
1177 dst[pos] = toupper(dst[pos]);
1183 const int MAX_UNIT_FORMAT_SYMBOLS = 6;
1185 const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
1194 const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
1195 i18n_noop(" Bytes"),
1196 i18n_noop(" KBytes"),
1197 i18n_noop(" MBytes"),
1198 i18n_noop(" GBytes"),
1199 i18n_noop(" TBytes"),
1200 i18n_noop(" PBytes")
1204 long double rounding_upwards(
1205 const long double number,
1206 const int rounding_multiplier
1209 long double rounded_number;
1210 rounded_number = number * rounding_multiplier;
1211 rounded_number += 0.5;
1212 rounded_number = (int64_t) (rounded_number);
1213 rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier);
1215 return rounded_number;
1219 string nice_unit_format(
1220 const int64_t input,
1221 const UnitFormat format,
1225 // select the system of units (decimal or binary)
1227 if (base == UnitBase1000)
1236 long double size = input;
1238 // check the size of the input number to fit in the appropriate symbol
1240 while (size > multiple)
1242 size = size / multiple;
1245 // rollback to the previous values and stop the loop when cannot
1246 // represent the number length.
1247 if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS)
1249 size = size * multiple;
1255 // round the input number "half up" to multiples of 10
1256 const int rounding_multiplier = 10;
1257 size = rounding_upwards(size, rounding_multiplier);
1259 // format the input number, placing the appropriate symbol
1261 out.setf (ios::fixed);
1262 if (format == ShortUnitFormat)
1265 out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() );
1270 out << size << i18n( longUnitFormatSymbols[sizecount].c_str() );
1274 } // eo nice_unit_format(int input)
1277 string nice_unit_format(
1279 const UnitFormat format,
1283 // round as double and cast to int64_t
1284 // cast raised overflow error near max val of int64_t (~9.2e18, see unittest)
1285 int64_t input_casted_and_rounded =
1286 boost::numeric_cast<int64_t>( round(input) );
1289 return nice_unit_format( input_casted_and_rounded, format, base );
1290 } // eo nice_unit_format(double input)
1293 string escape(const string &s)
1296 string::size_type p;
1299 while ( (p=out.find_first_of("\"\\",p) ) !=out.npos)
1301 out.insert (p,"\\");
1306 while ( (p=out.find_first_of("\r",p) ) !=out.npos)
1308 out.replace (p,1,"\\r");
1313 while ( (p=out.find_first_of("\n",p) ) !=out.npos)
1315 out.replace (p,1,"\\n");
1322 } // eo scape(const std::string&)
1325 string descape(const string &s, int startpos, int &endpos)
1329 if (s.at(startpos) != '"')
1330 throw out_of_range("value not type escaped string");
1332 out=s.substr(startpos+1);
1333 string::size_type p=0;
1335 // search for the end of the string
1336 while ( (p=out.find("\"",p) ) !=out.npos)
1341 // the " might be escaped with a backslash
1342 while (e>=0 && out.at (e) =='\\')
1344 if (escaped == false)
1358 // we now have the end of the string
1359 out=out.substr(0,p);
1361 // tell calling prog about the endposition
1362 endpos=startpos+p+1;
1364 // descape all \ stuff inside the string now
1366 while ( (p=out.find_first_of("\\",p) ) !=out.npos)
1368 switch (out.at(p+1) )
1371 out.replace(p,2,"\r");
1374 out.replace(p,2,"\n");
1383 } // eo descape(const std::string&,int,int&)
1386 string escape_shellarg(const string &input)
1388 string output = "'";
1389 string::const_iterator it, it_end = input.end();
1390 for (it = input.begin(); it != it_end; ++it)