2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
22 * (c) Copyright 2007-2008 by Intra2net AG
30 #include <cmath> // for round()
37 #include <boost/numeric/conversion/cast.hpp>
38 #include <boost/foreach.hpp>
40 #include <stringfunc.hxx>
51 const std::string hexDigitsLower("0123456789abcdef");
52 const std::string hexDigitsUpper("0123456789ABCDEF");
57 char operator() (char c)
59 return std::toupper(c);
61 }; // eo struct UpperFunc
66 char operator() (char c)
68 return std::tolower(c);
70 }; // eo struct LowerFunc
73 } // eo namespace <anonymous>
78 * default list of Whitespaces (" \t\r\n");
80 const std::string Whitespaces = " \t\r\n";
83 * default list of lineendings ("\r\n");
85 const std::string LineEndings= "\r\n";
90 * @brief checks if a string begins with a given prefix.
91 * @param[in,out] str the string which is tested
92 * @param prefix the prefix which should be tested for.
93 * @return @a true iff the prefix is not empty and the string begins with that prefix.
95 bool has_prefix(const std::string& str, const std::string& prefix)
97 if (prefix.empty() || str.empty() || str.size() < prefix.size() )
101 return str.compare(0, prefix.size(), prefix) == 0;
102 } // eo has_prefix(const std::string&,const std::string&)
106 * @brief checks if a string ends with a given suffix.
107 * @param[in,out] str the string which is tested
108 * @param suffix the suffix which should be tested for.
109 * @return @a true iff the suffix is not empty and the string ends with that suffix.
111 bool has_suffix(const std::string& str, const std::string& suffix)
113 if (suffix.empty() || str.empty() || str.size() < suffix.size() )
117 return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
118 } // eo has_suffix(const std::string&,const std::string&)
122 * cut off characters from a given list from front and end of a string.
123 * @param[in,out] str the string which should be trimmed.
124 * @param charlist the list of characters to remove from beginning and end of string
125 * @return the result string.
127 std::string trim_mod(std::string& str, const std::string& charlist)
129 // first: trim the beginning:
130 std::string::size_type pos= str.find_first_not_of (charlist);
131 if (pos == std::string::npos)
133 // whole string consists of charlist (or is already empty)
139 // str starts with charlist
142 // now let's look at the tail:
143 pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char!
144 if ( pos < str.size() )
146 str.erase(pos, str.size()-pos);
149 } // eo trim_mod(std::string&,const std::string&)
154 * removes last character from a string when it is in a list of chars to be removed.
155 * @param[in,out] str the string.
156 * @param what the list of chars which will be tested for.
157 * @return the resulting string with last char removed (if applicable)
159 std::string chomp_mod(std::string& str, const std::string& what)
161 if (str.empty() || what.empty() )
165 if (what.find(str.at (str.size()-1) ) != std::string::npos)
167 str.erase(str.size() - 1);
170 } // eo chomp_mod(std::string&,const std::string&)
174 * @brief converts a string to lower case.
175 * @param[in,out] str the string to modify.
178 std::string to_lower_mod(std::string& str)
180 std::transform(str.begin(), str.end(), str.begin(), LowerFunc() );
182 } // eo to_lower_mod(std::string&)
186 * @brief converts a string to upper case.
187 * @param[in,out] str the string to modify.
190 std::string to_upper_mod(std::string& str)
192 std::transform( str.begin(), str.end(), str.begin(), UpperFunc() );
194 } // eo to_upper_mod(std::string&)
199 * cut off characters from a given list from front and end of a string.
200 * @param str the string which should be trimmed.
201 * @param charlist the list of characters to remove from beginning and end of string
202 * @return the result string.
204 std::string trim (const std::string& str, const std::string& charlist)
206 // first: trim the beginning:
207 std::string::size_type pos0= str.find_first_not_of(charlist);
208 if (pos0 == std::string::npos)
210 // whole string consists of charlist (or is already empty)
211 return std::string();
213 // now let's look at the end:
214 std::string::size_type pos1= str.find_last_not_of(charlist);
215 return str.substr(pos0, pos1 - pos0 + 1);
216 } // eo trim(const std:.string&,const std::string&)
220 * removes last character from a string when it is in a list of chars to be removed.
221 * @param str the string.
222 * @param what the list of chars which will be tested for.
223 * @return the resulting string with last char removed (if applicable)
225 std::string chomp (const std::string& str, const std::string& what)
227 if (str.empty() || what.empty() )
231 if (what.find(str.at (str.size()-1) ) != std::string::npos)
233 return str.substr(0, str.size()-1);
236 } // eo chomp(const std:.string&,const std::string&)
240 * @brief returns a lower case version of a given string.
241 * @param str the string
242 * @return the lower case version of the string
244 std::string to_lower (const std::string& str)
246 std::string result(str);
247 return to_lower_mod(result);
248 } // eo to_lower(const std::string&)
252 * @brief returns a upper case version of a given string.
253 * @param str the string
254 * @return the upper case version of the string
256 std::string to_upper(const std::string& str)
258 std::string result(str);
259 return to_upper_mod(result);
260 } // eo to_upper(const std::string&)
265 * @brief removes a given suffix from a string.
266 * @param str the string.
267 * @param suffix the suffix which should be removed if the string ends with it.
268 * @return the string without the suffix.
270 * If the string ends with the suffix, it is removed. If the the string doesn't end
271 * with the suffix the original string is returned.
273 std::string remove_suffix(const std::string& str, const std::string& suffix)
275 if (has_suffix(str,suffix) )
277 return str.substr(0, str.size()-suffix.size() );
280 } // eo remove_suffix(const std::string&,const std::string&)
285 * @brief removes a given prefix from a string.
286 * @param str the string.
287 * @param prefix the prefix which should be removed if the string begins with it.
288 * @return the string without the prefix.
290 * If the string begins with the prefix, it is removed. If the the string doesn't begin
291 * with the prefix the original string is returned.
293 std::string remove_prefix(const std::string& str, const std::string& prefix)
295 if (has_prefix(str,prefix) )
297 return str.substr( prefix.size() );
300 } // eo remove_prefix(const std::string&,const std::string&)
304 * split a string to key and value delimited by a given delimiter.
305 * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end).
306 * @param str the string which should be splitted.
307 * @param[out] key the resulting key
308 * @param[out] value the resulting value
309 * @param delimiter the delimiter between key and value; default is '='.
310 * @return @a true if the split was successful.
313 const std::string& str,
318 std::string::size_type pos = str.find (delimiter);
319 if (pos == std::string::npos) return false;
320 key= str.substr(0,pos);
321 value= str.substr(pos+1);
325 } // eo pair_split(const std::string&,std::string&,std::string&,char)
329 * splits a string by given delimiter
331 * @param[in] str the string which should be splitted.
332 * @param[out] result the list resulting from splitting @a str.
333 * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted.
334 * @param[in] omit_empty should empty parts not be stored?
335 * @param[in] trim_list list of characters the parts should be trimmed by.
336 * (empty string results in no trim)
339 const std::string& str,
340 std::list<std::string>& result,
341 const std::string& delimiter,
343 const std::string& trim_list
346 std::string::size_type pos, last_pos=0;
347 bool delimiter_found= false;
348 while ( last_pos < str.size() && last_pos != std::string::npos)
350 pos= str.find(delimiter, last_pos);
352 if (pos == std::string::npos)
354 part= str.substr(last_pos);
355 delimiter_found= false;
359 part= str.substr(last_pos, pos-last_pos);
360 delimiter_found=true;
362 if (pos != std::string::npos)
364 last_pos= pos+ delimiter.size();
368 last_pos= std::string::npos;
370 if (!trim_list.empty() ) trim_mod (part, trim_list);
371 if (omit_empty && part.empty() ) continue;
372 result.push_back( part );
374 // if the string ends with a delimiter we need to append an empty string if no omit_empty
376 // (this way we keep the split result consistent to a join operation)
377 if (delimiter_found && !omit_empty)
379 result.push_back("");
381 } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&)
384 /** call split_string with list<string>, converts result to vector; vector is clear()-ed first
386 * Note: Uses 3 O(n)-operations: list.size, vector.resize and std::swap_ranges;
387 * not sure whether there is a better way to do this
390 const std::string& str,
391 std::vector<std::string>& result,
392 const std::string& delimiter,
394 const std::string& trim_list
397 std::list<std::string> tmp;
398 split_string(str, tmp, delimiter, omit_empty, trim_list);
399 std::size_t size = tmp.size(); // this is O(n)
401 result.resize(size); // also O(n)
402 std::swap_ranges(tmp.begin(), tmp.end(), result.begin()); // also O(n)
406 * splits a string by a given delimiter
407 * @param str the string which should be splitted.
408 * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted.
409 * @param[in] omit_empty should empty parts not be stored?
410 * @param[in] trim_list list of characters the parts should be trimmed by.
411 * (empty string results in no trim)
412 * @return the list resulting from splitting @a str.
414 std::list<std::string> split_string(
415 const std::string& str,
416 const std::string& delimiter,
418 const std::string& trim_list
421 std::list<std::string> result;
422 split_string(str, result, delimiter, omit_empty, trim_list);
424 } // eo split_string(const std::string&,const std::string&,bool,const std::string&)
428 * @brief joins a list of strings into a single string.
430 * This funtion is (basically) the reverse operation of @a split_string.
432 * @param parts the list of strings.
433 * @param delimiter the delimiter which is inserted between the strings.
434 * @return the joined string.
436 std::string join_string(
437 const std::list< std::string >& parts,
438 const std::string& delimiter
442 if (! parts.empty() )
444 std::list< std::string >::const_iterator it= parts.begin();
446 while ( ++it != parts.end() )
453 } // eo join_string(const std::list< std::string >&,const std::string&)
456 /** @brief same as join_string for list, except uses a vector */
457 std::string join_string(
458 const std::vector< std::string >& parts,
459 const std::string& delimiter
463 if (! parts.empty() )
465 std::vector< std::string >::const_iterator it= parts.begin();
467 while ( ++it != parts.end() )
474 } // eo join_string(const std::vector< std::string >&,const std::string&)
484 * @brief returns a hex string from a binary string.
485 * @param str the (binary) string
486 * @param upper_case_digits determine whether to use upper case characters for digits A-F.
487 * @return the string in hex notation.
489 std::string convert_binary_to_hex(
490 const std::string& str,
491 bool upper_case_digits
495 std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower);
496 for ( std::string::const_iterator it= str.begin();
500 result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] );
501 result.push_back( hexDigits[ (*it) & 0x0f ] );
504 } // eo convert_binary_to_hex(const std::string&,bool)
508 * @brief converts a hex digit string to binary string.
509 * @param str hex digit string
510 * @return the binary string.
512 * The hex digit string may contains white spaces or colons which are treated
513 * as delimiters between hex digit groups.
515 * @todo rework the handling of half nibbles (consistency)!
517 std::string convert_hex_to_binary(
518 const std::string& str
520 throw (std::runtime_error)
524 bool hasNibble= false;
525 bool lastWasWS= true;
526 for ( std::string::const_iterator it= str.begin();
530 std::string::size_type p = hexDigitsLower.find( *it );
531 if (p== std::string::npos)
533 p= hexDigitsUpper.find( *it );
535 if (p == std::string::npos)
537 if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace?
538 or ( *it == ':') // or a colon?
541 // we treat that as a valid delimiter:
544 // 1 nibble before WS is treate as lower part:
553 if (p == std::string::npos )
555 throw runtime_error("illegal character in hex digit string: " + str);
569 //we already had a nibble, so a char is complete now:
570 result.push_back( c );
575 // this is the first nibble of a new char:
581 //well, there is one nibble left
582 // let's do some heuristics:
585 // if the preceeding character was a white space (or a colon)
586 // we treat the nibble as lower part:
587 //( this is consistent with shortened hex notations where leading zeros are not noted)
588 result.push_back( c );
592 // if it was part of a hex digit chain, we treat it as UPPER part (!!)
593 result.push_back( c << 4 );
597 } // eo convert_hex_to_binary(const std::string&)
600 } // eo namespace I2n
605 std::string iso_to_utf8(const std::string& isostring)
609 iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1");
611 if (iso_to_utf8 == (iconv_t)-1)
612 throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
614 size_t in_size=isostring.size();
615 size_t out_size=in_size*4;
617 char *buf = (char *)malloc(out_size+1);
619 throw runtime_error("out of memory for iconv buffer");
621 char *in = (char *)isostring.c_str();
623 iconv(i2utf8, &in, &in_size, &out, &out_size);
625 buf[isostring.size()*4-out_size]=0;
635 std::string utf8_to_iso(const std::string& utf8string)
639 iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8");
641 if (utf82iso == (iconv_t)-1)
642 throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
644 size_t in_size=utf8string.size();
645 size_t out_size=in_size;
647 char *buf = (char *)malloc(out_size+1);
649 throw runtime_error("out of memory for iconv buffer");
651 char *in = (char *)utf8string.c_str();
653 iconv(utf82iso, &in, &in_size, &out, &out_size);
655 buf[utf8string.size()-out_size]=0;
660 iconv_close(utf82iso);
665 wchar_t* utf8_to_wbuf(const std::string& utf8string)
667 iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8");
669 if (utf82wstr == (iconv_t)-1)
670 throw runtime_error("iconv can't convert from UTF-8 to UCS-4");
672 size_t in_size=utf8string.size();
673 size_t out_size= (in_size+1)*sizeof(wchar_t);
675 wchar_t *buf = (wchar_t *)malloc(out_size);
677 throw runtime_error("out of memory for iconv buffer");
679 char *in = (char *)utf8string.c_str();
680 char *out = (char*) buf;
681 if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1)
682 throw runtime_error("error converting char encodings");
684 buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0;
686 iconv_close(utf82wstr);
691 std::string utf7imap_to_utf8(const std::string& utf7imapstring)
695 iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP");
697 if (utf7imap2utf8 == (iconv_t)-1)
698 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
700 size_t in_size=utf7imapstring.size();
701 size_t out_size=in_size*4;
703 char *buf = (char *)malloc(out_size+1);
705 throw runtime_error("out of memory for iconv buffer");
707 char *in = (char *)utf7imapstring.c_str();
709 iconv(utf7imap2utf8, &in, &in_size, &out, &out_size);
711 buf[utf7imapstring.size()*4-out_size]=0;
716 iconv_close(utf7imap2utf8);
721 std::string utf8_to_utf7imap(const std::string& utf8string)
725 iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8");
727 if (utf82utf7imap == (iconv_t)-1)
728 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
730 // UTF-7 is base64 encoded, a buffer 10x as large
731 // as the utf-8 buffer should be enough. If not the string will be truncated.
732 size_t in_size=utf8string.size();
733 size_t out_size=in_size*10;
735 char *buf = (char *)malloc(out_size+1);
737 throw runtime_error("out of memory for iconv buffer");
739 char *in = (char *)utf8string.c_str();
741 iconv(utf82utf7imap, &in, &in_size, &out, &out_size);
743 buf[utf8string.size()*10-out_size]= 0;
748 iconv_close(utf82utf7imap);
753 // Tokenize string by (html) tags
754 void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
756 string::size_type pos, len = input.size();
757 bool inside_tag = false;
760 for (pos = 0; pos < len; pos++)
762 if (input[pos] == '<')
766 if (!current.empty() )
768 tokenized.push_back( make_pair(current, false) );
772 current += input[pos];
774 else if (input[pos] == '>' && inside_tag)
776 current += input[pos];
778 if (!current.empty() )
780 tokenized.push_back( make_pair(current, true) );
785 current += input[pos];
788 // String left over in buffer?
789 if (!current.empty() )
790 tokenized.push_back( make_pair(current, false) );
791 } // eo tokenize_by_tag
794 std::string strip_html_tags(const std::string &input)
796 // Pair first: string, second: isTag
797 vector<pair<string,bool> > tokenized;
798 tokenize_by_tag (tokenized, input);
801 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
802 for (token = tokenized.begin(); token != tokens_end; ++token)
804 output += token->first;
807 } // eo strip_html_tags
810 // Smart-encode HTML en
811 string smart_html_entities(const std::string &input)
813 // Pair first: string, second: isTag
814 vector<pair<string,bool> > tokenized;
815 tokenize_by_tag (tokenized, input);
818 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
819 for (token = tokenized.begin(); token != tokens_end; ++token)
821 // keep HTML tags as they are
823 output += token->first;
825 output += html_entities(token->first);
832 string::size_type find_8bit(const std::string &str)
834 string::size_type l=str.size();
835 for (string::size_type p=0; p < l; p++)
836 if (static_cast<unsigned char>(str[p]) > 127)
842 // encoded UTF-8 chars into HTML entities
843 string html_entities(std::string str)
846 replace_all (str, "&", "&");
847 replace_all (str, "<", "<");
848 replace_all (str, ">", ">");
849 replace_all (str, "\"", """);
850 replace_all (str, "'", "'");
851 replace_all (str, "/", "/");
854 replace_all (str, "\xC3\xA4", "ä");
855 replace_all (str, "\xC3\xB6", "ö");
856 replace_all (str, "\xC3\xBC", "ü");
857 replace_all (str, "\xC3\x84", "Ä");
858 replace_all (str, "\xC3\x96", "Ö");
859 replace_all (str, "\xC3\x9C", "Ü");
862 replace_all (str, "\xC3\x9F", "ß");
864 // conversion of remaining non-ASCII chars needed?
865 // just do if needed because of performance
866 if (find_8bit(str) != string::npos)
868 // convert to fixed-size encoding UTF-32
869 wchar_t* wbuf=utf8_to_wbuf(str);
870 ostringstream target;
872 // replace all non-ASCII chars with HTML representation
873 for (int p=0; wbuf[p] != 0; p++)
875 unsigned int c=wbuf[p];
878 target << static_cast<unsigned char>(c);
880 target << "&#" << c << ';';
889 } // eo html_entities(std::string)
891 // convert HTML entities to something that can be viewed on a basic text console (restricted to ASCII-7)
892 string html_entities_to_console(std::string str)
895 replace_all (str, "&", "&");
896 replace_all (str, "<", "<");
897 replace_all (str, ">", ">");
898 replace_all (str, """, "\"");
899 replace_all (str, "'", "'");
900 replace_all (str, "/", "/");
903 replace_all (str, "ä", "ae");
904 replace_all (str, "ö", "oe");
905 replace_all (str, "ü", "ue");
906 replace_all (str, "Ä", "Ae");
907 replace_all (str, "Ö", "Oe");
908 replace_all (str, "Ü", "Ue");
911 replace_all (str, "ß", "ss");
916 // find_html_comments + remove_html_comments(str, comments)
917 void remove_html_comments(string &str)
919 vector<CommentZone> comments = find_html_comments(str);
920 remove_html_comments(str, comments);
923 // find all html comments, behaving correctly if they are nested; ignores comment tags ("<!--FOO .... BAR-->")
924 // If there are invalid comments ("-->" before "<!--" or different number of closing and opening tags),
925 // then the unknown index of corresponding start/end tag will be represented by a string::npos
926 // Indices are from start of start tag until first index after closing tag
927 vector<CommentZone> find_html_comments(const std::string &str)
929 static const string START = "<!--";
930 static const string CLOSE = "-->";
931 static const string::size_type START_LEN = START.length();
932 static const string::size_type CLOSE_LEN = CLOSE.length();
934 vector<CommentZone> comments;
936 // in order to find nested comments, need either recursion or a stack
937 vector<string::size_type> starts; // stack of start tags
939 string::size_type pos = 0;
940 string::size_type len = str.length();
941 string::size_type next_start, next_close;
943 while (pos < len) // not really needed but just in case
945 next_start = str.find(START, pos);
946 next_close = str.find(CLOSE, pos);
948 if ( (next_start == string::npos) && (next_close == string::npos) )
949 break; // we are done
951 else if ( (next_start == string::npos) || (next_close < next_start) ) // close one comment (pop)
953 if (starts.empty()) // closing tag without a start
954 comments.push_back(CommentZone(string::npos, next_close+CLOSE_LEN));
957 comments.push_back(CommentZone(starts.back(), next_close+CLOSE_LEN));
960 pos = next_close + CLOSE_LEN;
963 else if ( (next_close == string::npos) || (next_start < next_close) ) // start a new comment (push)
965 starts.push_back(next_start);
966 pos = next_start + START_LEN;
970 // add comments that have no closing tag from back to front (important for remove_html_comments!)
971 while (!starts.empty())
973 comments.push_back(CommentZone(starts.back(), string::npos));
980 // remove all html comments foundby find_html_comments
981 void remove_html_comments(std::string &str, const vector<CommentZone> &comments)
983 // remember position where last removal started
984 string::size_type last_removal_start = str.length();
986 // Go from back to front to not mess up indices.
987 // This requires that bigger comments, that contain smaller comments, come AFTER
988 // the small contained comments in the comments vector (i.e. comments are ordered by
989 // their closing tag, not their opening tag). This is true for results from find_html_comments
990 BOOST_REVERSE_FOREACH(const CommentZone &comment, comments)
992 if (comment.first == string::npos)
994 str = str.replace(0, comment.second, ""); // comment starts "before" str --> delete from start
995 break; // there can be no more
997 else if (comment.first >= last_removal_start)
999 continue; // this comment is inside another comment that we have removed already
1001 else if (comment.second == string::npos) // comment ends "after" str --> delete until end
1003 str = str.replace(comment.first, string::npos, "");
1004 last_removal_start = comment.first;
1008 str = str.replace(comment.first, comment.second-comment.first, "");
1009 last_removal_start = comment.first;
1014 bool replace_all(string &base, const char *ist, const char *soll)
1018 return replace_all(base,&i,&s);
1021 bool replace_all(string &base, const string &ist, const char *soll)
1024 return replace_all(base,&ist,&s);
1027 bool replace_all(string &base, const string *ist, const string *soll)
1029 return replace_all(base,*ist,*soll);
1032 bool replace_all(string &base, const char *ist, const string *soll)
1035 return replace_all(base,&i,soll);
1038 bool replace_all(string &base, const string &ist, const string &soll)
1040 bool found_ist = false;
1041 string::size_type a=0;
1044 throw runtime_error ("replace_all called with empty search string");
1046 while ( (a=base.find(ist,a) ) != string::npos)
1048 base.replace(a,ist.size(),soll);
1057 * @brief replaces all characters that could be problematic or impose a security risk when being logged
1058 * @param str the original string
1059 * @param replace_with the character to replace the unsafe chars with
1060 * @return a string that is safe to send to syslog or other logfiles
1062 * All chars between 0x20 (space) and 0x7E (~) (including) are considered safe for logging.
1063 * See e.g. RFC 5424, section 8.2 or the posix character class "printable".
1064 * This eliminates all possible problems with NUL, control characters, 8 bit chars, UTF8.
1067 std::string sanitize_for_logging(const std::string &str, const char replace_with)
1069 std::string output=str;
1071 const string::size_type len = output.size();
1072 for (std::string::size_type p=0; p < len; p++)
1073 if (output[p] < 0x20 || output[p] > 0x7E)
1074 output[p]=replace_with;
1080 string to_lower(const string &src)
1084 string::size_type pos, end = dst.size();
1085 for (pos = 0; pos < end; pos++)
1086 dst[pos] = tolower(dst[pos]);
1091 string to_upper(const string &src)
1095 string::size_type pos, end = dst.size();
1096 for (pos = 0; pos < end; pos++)
1097 dst[pos] = toupper(dst[pos]);
1103 const int MAX_UNIT_FORMAT_SYMBOLS = 6;
1105 const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
1114 const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
1115 i18n_noop(" Bytes"),
1116 i18n_noop(" KBytes"),
1117 i18n_noop(" MBytes"),
1118 i18n_noop(" GBytes"),
1119 i18n_noop(" TBytes"),
1120 i18n_noop(" PBytes")
1124 long double rounding_upwards(
1125 const long double number,
1126 const int rounding_multiplier
1129 long double rounded_number;
1130 rounded_number = number * rounding_multiplier;
1131 rounded_number += 0.5;
1132 rounded_number = (int64_t) (rounded_number);
1133 rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier);
1135 return rounded_number;
1139 string nice_unit_format(
1140 const int64_t input,
1141 const UnitFormat format,
1145 // select the system of units (decimal or binary)
1147 if (base == UnitBase1000)
1156 long double size = input;
1158 // check the size of the input number to fit in the appropriate symbol
1160 while (size > multiple)
1162 size = size / multiple;
1165 // rollback to the previous values and stop the loop when cannot
1166 // represent the number length.
1167 if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS)
1169 size = size * multiple;
1175 // round the input number "half up" to multiples of 10
1176 const int rounding_multiplier = 10;
1177 size = rounding_upwards(size, rounding_multiplier);
1179 // format the input number, placing the appropriate symbol
1181 out.setf (ios::fixed);
1182 if (format == ShortUnitFormat)
1185 out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() );
1190 out << size << i18n( longUnitFormatSymbols[sizecount].c_str() );
1194 } // eo nice_unit_format(int input)
1197 string nice_unit_format(
1199 const UnitFormat format,
1203 // round as double and cast to int64_t
1204 // cast raised overflow error near max val of int64_t (~9.2e18, see unittest)
1205 int64_t input_casted_and_rounded =
1206 boost::numeric_cast<int64_t>( round(input) );
1209 return nice_unit_format( input_casted_and_rounded, format, base );
1210 } // eo nice_unit_format(double input)
1213 string escape(const string &s)
1216 string::size_type p;
1219 while ( (p=out.find_first_of("\"\\",p) ) !=out.npos)
1221 out.insert (p,"\\");
1226 while ( (p=out.find_first_of("\r",p) ) !=out.npos)
1228 out.replace (p,1,"\\r");
1233 while ( (p=out.find_first_of("\n",p) ) !=out.npos)
1235 out.replace (p,1,"\\n");
1242 } // eo scape(const std::string&)
1245 string descape(const string &s, int startpos, int &endpos)
1249 if (s.at(startpos) != '"')
1250 throw out_of_range("value not type escaped string");
1252 out=s.substr(startpos+1);
1253 string::size_type p=0;
1255 // search for the end of the string
1256 while ( (p=out.find("\"",p) ) !=out.npos)
1261 // the " might be escaped with a backslash
1262 while (e>=0 && out.at (e) =='\\')
1264 if (escaped == false)
1278 // we now have the end of the string
1279 out=out.substr(0,p);
1281 // tell calling prog about the endposition
1282 endpos=startpos+p+1;
1284 // descape all \ stuff inside the string now
1286 while ( (p=out.find_first_of("\\",p) ) !=out.npos)
1288 switch (out.at(p+1) )
1291 out.replace(p,2,"\r");
1294 out.replace(p,2,"\n");
1303 } // eo descape(const std::string&,int,int&)
1306 string escape_shellarg(const string &input)
1308 string output = "'";
1309 string::const_iterator it, it_end = input.end();
1310 for (it = input.begin(); it != it_end; ++it)