2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
22 * (c) Copyright 2007-2008 by Intra2net AG
30 #include <cmath> // for round()
38 #include <boost/numeric/conversion/cast.hpp>
39 #include <boost/foreach.hpp>
41 #include <boost/assert.hpp>
42 #include <boost/shared_ptr.hpp>
43 #include <openssl/bio.h>
44 #include <openssl/evp.h>
46 #include <stringfunc.hxx>
57 const std::string hexDigitsLower("0123456789abcdef");
58 const std::string hexDigitsUpper("0123456789ABCDEF");
63 char operator() (char c)
65 return std::toupper(c);
67 }; // eo struct UpperFunc
72 char operator() (char c)
74 return std::tolower(c);
76 }; // eo struct LowerFunc
79 } // eo namespace <anonymous>
84 * default list of Whitespaces (" \t\r\n");
86 const std::string Whitespaces = " \t\r\n";
89 * default list of lineendings ("\r\n");
91 const std::string LineEndings= "\r\n";
96 * @brief checks if a string begins with a given prefix.
97 * @param[in,out] str the string which is tested
98 * @param prefix the prefix which should be tested for.
99 * @return @a true iff the prefix is not empty and the string begins with that prefix.
101 bool has_prefix(const std::string& str, const std::string& prefix)
103 if (prefix.empty() || str.empty() || str.size() < prefix.size() )
107 return str.compare(0, prefix.size(), prefix) == 0;
108 } // eo has_prefix(const std::string&,const std::string&)
112 * @brief checks if a string ends with a given suffix.
113 * @param[in,out] str the string which is tested
114 * @param suffix the suffix which should be tested for.
115 * @return @a true iff the suffix is not empty and the string ends with that suffix.
117 bool has_suffix(const std::string& str, const std::string& suffix)
119 if (suffix.empty() || str.empty() || str.size() < suffix.size() )
123 return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
124 } // eo has_suffix(const std::string&,const std::string&)
128 * cut off characters from a given list from front and end of a string.
129 * @param[in,out] str the string which should be trimmed.
130 * @param charlist the list of characters to remove from beginning and end of string
131 * @return the result string.
133 std::string trim_mod(std::string& str, const std::string& charlist)
135 // first: trim the beginning:
136 std::string::size_type pos= str.find_first_not_of (charlist);
137 if (pos == std::string::npos)
139 // whole string consists of charlist (or is already empty)
145 // str starts with charlist
148 // now let's look at the tail:
149 pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char!
150 if ( pos < str.size() )
152 str.erase(pos, str.size()-pos);
155 } // eo trim_mod(std::string&,const std::string&)
160 * removes last character from a string when it is in a list of chars to be removed.
161 * @param[in,out] str the string.
162 * @param what the list of chars which will be tested for.
163 * @return the resulting string with last char removed (if applicable)
165 std::string chomp_mod(std::string& str, const std::string& what)
167 if (str.empty() || what.empty() )
171 if (what.find(str.at (str.size()-1) ) != std::string::npos)
173 str.erase(str.size() - 1);
176 } // eo chomp_mod(std::string&,const std::string&)
180 * @brief converts a string to lower case.
181 * @param[in,out] str the string to modify.
184 std::string to_lower_mod(std::string& str)
186 std::transform(str.begin(), str.end(), str.begin(), LowerFunc() );
188 } // eo to_lower_mod(std::string&)
192 * @brief converts a string to upper case.
193 * @param[in,out] str the string to modify.
196 std::string to_upper_mod(std::string& str)
198 std::transform( str.begin(), str.end(), str.begin(), UpperFunc() );
200 } // eo to_upper_mod(std::string&)
205 * cut off characters from a given list from front and end of a string.
206 * @param str the string which should be trimmed.
207 * @param charlist the list of characters to remove from beginning and end of string
208 * @return the result string.
210 std::string trim (const std::string& str, const std::string& charlist)
212 // first: trim the beginning:
213 std::string::size_type pos0= str.find_first_not_of(charlist);
214 if (pos0 == std::string::npos)
216 // whole string consists of charlist (or is already empty)
217 return std::string();
219 // now let's look at the end:
220 std::string::size_type pos1= str.find_last_not_of(charlist);
221 return str.substr(pos0, pos1 - pos0 + 1);
222 } // eo trim(const std:.string&,const std::string&)
226 * removes last character from a string when it is in a list of chars to be removed.
227 * @param str the string.
228 * @param what the list of chars which will be tested for.
229 * @return the resulting string with last char removed (if applicable)
231 std::string chomp (const std::string& str, const std::string& what)
233 if (str.empty() || what.empty() )
237 if (what.find(str.at (str.size()-1) ) != std::string::npos)
239 return str.substr(0, str.size()-1);
242 } // eo chomp(const std:.string&,const std::string&)
246 * @brief returns a lower case version of a given string.
247 * @param str the string
248 * @return the lower case version of the string
250 std::string to_lower (const std::string& str)
252 std::string result(str);
253 return to_lower_mod(result);
254 } // eo to_lower(const std::string&)
258 * @brief returns a upper case version of a given string.
259 * @param str the string
260 * @return the upper case version of the string
262 std::string to_upper(const std::string& str)
264 std::string result(str);
265 return to_upper_mod(result);
266 } // eo to_upper(const std::string&)
271 * @brief removes a given suffix from a string.
272 * @param str the string.
273 * @param suffix the suffix which should be removed if the string ends with it.
274 * @return the string without the suffix.
276 * If the string ends with the suffix, it is removed. If the the string doesn't end
277 * with the suffix the original string is returned.
279 std::string remove_suffix(const std::string& str, const std::string& suffix)
281 if (has_suffix(str,suffix) )
283 return str.substr(0, str.size()-suffix.size() );
286 } // eo remove_suffix(const std::string&,const std::string&)
291 * @brief removes a given prefix from a string.
292 * @param str the string.
293 * @param prefix the prefix which should be removed if the string begins with it.
294 * @return the string without the prefix.
296 * If the string begins with the prefix, it is removed. If the the string doesn't begin
297 * with the prefix the original string is returned.
299 std::string remove_prefix(const std::string& str, const std::string& prefix)
301 if (has_prefix(str,prefix) )
303 return str.substr( prefix.size() );
306 } // eo remove_prefix(const std::string&,const std::string&)
310 * split a string to key and value delimited by a given delimiter.
311 * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end).
312 * @param str the string which should be splitted.
313 * @param[out] key the resulting key
314 * @param[out] value the resulting value
315 * @param delimiter the delimiter between key and value; default is '='.
316 * @return @a true if the split was successful.
319 const std::string& str,
324 std::string::size_type pos = str.find (delimiter);
325 if (pos == std::string::npos) return false;
326 key= str.substr(0,pos);
327 value= str.substr(pos+1);
331 } // eo pair_split(const std::string&,std::string&,std::string&,char)
335 * splits a string by given delimiter
337 * @param[in] str the string which should be splitted.
338 * @param[out] result the list resulting from splitting @a str.
339 * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted.
340 * @param[in] omit_empty should empty parts not be stored?
341 * @param[in] trim_list list of characters the parts should be trimmed by.
342 * (empty string results in no trim)
345 const std::string& str,
346 std::list<std::string>& result,
347 const std::string& delimiter,
349 const std::string& trim_list
352 std::string::size_type pos, last_pos=0;
353 bool delimiter_found= false;
354 while ( last_pos < str.size() && last_pos != std::string::npos)
356 pos= str.find(delimiter, last_pos);
358 if (pos == std::string::npos)
360 part= str.substr(last_pos);
361 delimiter_found= false;
365 part= str.substr(last_pos, pos-last_pos);
366 delimiter_found=true;
368 if (pos != std::string::npos)
370 last_pos= pos+ delimiter.size();
374 last_pos= std::string::npos;
376 if (!trim_list.empty() ) trim_mod (part, trim_list);
377 if (omit_empty && part.empty() ) continue;
378 result.push_back( part );
380 // if the string ends with a delimiter we need to append an empty string if no omit_empty
382 // (this way we keep the split result consistent to a join operation)
383 if (delimiter_found && !omit_empty)
385 result.push_back("");
387 } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&)
390 /** call split_string with list<string>, converts result to vector; vector is clear()-ed first
392 * Note: Uses 3 O(n)-operations: list.size, vector.resize and std::swap_ranges;
393 * not sure whether there is a better way to do this
396 const std::string& str,
397 std::vector<std::string>& result,
398 const std::string& delimiter,
400 const std::string& trim_list
403 std::list<std::string> tmp;
404 split_string(str, tmp, delimiter, omit_empty, trim_list);
405 std::size_t size = tmp.size(); // this is O(n)
407 result.resize(size); // also O(n)
408 std::swap_ranges(tmp.begin(), tmp.end(), result.begin()); // also O(n)
412 * splits a string by a given delimiter
413 * @param str the string which should be splitted.
414 * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted.
415 * @param[in] omit_empty should empty parts not be stored?
416 * @param[in] trim_list list of characters the parts should be trimmed by.
417 * (empty string results in no trim)
418 * @return the list resulting from splitting @a str.
420 std::list<std::string> split_string(
421 const std::string& str,
422 const std::string& delimiter,
424 const std::string& trim_list
427 std::list<std::string> result;
428 split_string(str, result, delimiter, omit_empty, trim_list);
430 } // eo split_string(const std::string&,const std::string&,bool,const std::string&)
434 * @brief joins a list of strings into a single string.
436 * This funtion is (basically) the reverse operation of @a split_string.
438 * @param parts the list of strings.
439 * @param delimiter the delimiter which is inserted between the strings.
440 * @return the joined string.
442 std::string join_string(
443 const std::list< std::string >& parts,
444 const std::string& delimiter
448 if (! parts.empty() )
450 std::list< std::string >::const_iterator it= parts.begin();
452 while ( ++it != parts.end() )
459 } // eo join_string(const std::list< std::string >&,const std::string&)
462 /** @brief same as join_string for list, except uses a vector */
463 std::string join_string(
464 const std::vector< std::string >& parts,
465 const std::string& delimiter
469 if (! parts.empty() )
471 std::vector< std::string >::const_iterator it= parts.begin();
473 while ( ++it != parts.end() )
480 } // eo join_string(const std::vector< std::string >&,const std::string&)
482 /** @brief same as join_string for list, except uses a set */
483 std::string join_string(
484 const std::set< std::string >& parts,
485 const std::string& delimiter
490 if (! parts.empty() )
492 BOOST_FOREACH(const std::string &part, parts)
494 if (!result.empty ())
503 } // eo join_string(const std::vector< std::string >&,const std::string&)
505 std::string join_string (
506 const char *const parts[], /* assumed NULL-terminated */
507 const std::string& delimiter
514 const char *const *cur = parts;
517 result = std::string (*cur);
519 while (*++cur != NULL) {
521 result += std::string (*cur);
537 * @brief returns a hex string from a binary string.
538 * @param str the (binary) string
539 * @param upper_case_digits determine whether to use upper case characters for digits A-F.
540 * @return the string in hex notation.
542 std::string convert_binary_to_hex(
543 const std::string& str,
544 bool upper_case_digits
548 std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower);
549 for ( std::string::const_iterator it= str.begin();
553 result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] );
554 result.push_back( hexDigits[ (*it) & 0x0f ] );
557 } // eo convert_binary_to_hex(const std::string&,bool)
561 * @brief converts a hex digit string to binary string.
562 * @param str hex digit string
563 * @return the binary string.
565 * The hex digit string may contains white spaces or colons which are treated
566 * as delimiters between hex digit groups.
568 * @todo rework the handling of half nibbles (consistency)!
570 std::string convert_hex_to_binary(
571 const std::string& str
573 throw (std::runtime_error)
577 bool hasNibble= false;
578 bool lastWasWS= true;
579 for ( std::string::const_iterator it= str.begin();
583 std::string::size_type p = hexDigitsLower.find( *it );
584 if (p== std::string::npos)
586 p= hexDigitsUpper.find( *it );
588 if (p == std::string::npos)
590 if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace?
591 or ( *it == ':') // or a colon?
594 // we treat that as a valid delimiter:
597 // 1 nibble before WS is treate as lower part:
606 if (p == std::string::npos )
608 throw runtime_error("illegal character in hex digit string: " + str);
622 //we already had a nibble, so a char is complete now:
623 result.push_back( c );
628 // this is the first nibble of a new char:
634 //well, there is one nibble left
635 // let's do some heuristics:
638 // if the preceeding character was a white space (or a colon)
639 // we treat the nibble as lower part:
640 //( this is consistent with shortened hex notations where leading zeros are not noted)
641 result.push_back( c );
645 // if it was part of a hex digit chain, we treat it as UPPER part (!!)
646 result.push_back( c << 4 );
650 } // eo convert_hex_to_binary(const std::string&)
653 static list<string>& alloc_template_starts()
655 static list<string> result;
658 result.push_back("std::list");
659 result.push_back("std::vector");
664 string shorten_stl_types(const string &input)
666 string output = input;
668 // first: replace fixed string for std::string
669 replace_all(output, "std::basic_string<char, std::char_traits<char>, std::allocator<char> >",
672 // loop over list/vector/... that have an allocator, e.g.
673 // std::list< some_type_here, std::allocator<some_type_here> >
674 string::size_type start, comma, end, len, start_text_len;
676 string allocator_text;
677 BOOST_FOREACH(const string &start_text, alloc_template_starts())
682 start_text_len = start_text.length();
683 while( (start=output.find(start_text+"<", start)) != string::npos )
685 len = output.length();
686 start += start_text_len+1; // start next iter and tests here after opening bracket
688 // now comes the tricky part: find matching ',' and the closing '>' even if "subtype" is template again
690 n_open_brackets = 1; // the bracket right after start_text counts as first
691 while (comma < len && n_open_brackets > 0)
693 if (output[comma] == ',' && n_open_brackets == 1)
695 else if (output[comma] == '<')
697 else if (output[comma] == '>')
702 while (end < len && n_open_brackets > 0)
704 if (output[end] == '<')
706 else if (output[end] == '>')
709 if (n_open_brackets == 0)
710 break; // do not increment end
715 // check that start < comma < end < len && n_open_brackets == 0
716 if (start >= comma || comma >= end || end >= len || n_open_brackets != 0)
717 continue; // input seems to be of unexpected form
719 // check that type in allocator is same as until comma
720 string type = output.substr(start, comma-start);
721 if (type[type.length()-1] == '>')
722 allocator_text = string("std::allocator<") + type + " > ";
724 allocator_text = string("std::allocator<") + type + "> ";
725 if (output.substr(comma+2, end-comma-2) == allocator_text)
726 output.replace(comma+2, end-comma-2, "_alloc_");
733 typedef boost::shared_ptr<BIO> BIO_Ptr;
736 * @brief Converts openssl generic input/output to std::string
738 * Code adapted from keymakerd.
740 * @param bio Openssl's generic input/output
741 * @return :string STL string
743 static std::string _convert_BIO_to_string(BIO *input)
748 long written = BIO_get_mem_data(input, &output);
749 if (written <= 0 || output == NULL)
752 rtn.assign(output, written); //lint !e534 !e732
757 * @brief base64 encode a string using OpenSSL base64 functions
759 * Data size limit is 2GB on 32 bit (LONG_MAX)
761 * @param input String to encode
762 * @param one_line Encode all data as one line, no wrapping with line feeds
763 * @return base64 encoded string
765 std::string base64_encode(const std::string &input, bool one_line)
767 // check for empty buffer
771 // safety check to ensure our check afer BIO_write() works
772 if (input.size() >= LONG_MAX)
773 throw runtime_error("base64 encode: Too much data");
775 // setup encoder. Note: BIO_free_all frees both BIOs.
776 BIO_Ptr base64_encoder(BIO_new(BIO_f_base64()), BIO_free_all);
777 BIO *encoder_bio = base64_encoder.get();
779 BIO_set_flags(encoder_bio, BIO_FLAGS_BASE64_NO_NL);
781 // chain output buffer and encoder together
782 BIO *encoded_result = BIO_new(BIO_s_mem());
783 BIO_push(encoder_bio, encoded_result);
786 long written = BIO_write(encoder_bio, input.c_str(), input.size());
787 if ((unsigned)written != input.size())
790 out << "base64 encoding failed: input size: "
791 << input.size() << " vs. output size: " << written;
792 throw runtime_error(out.str());
794 if (BIO_flush(encoder_bio) != 1)
795 throw runtime_error("base64 encode: BIO_flush() failed");
797 return _convert_BIO_to_string(encoded_result);
801 * @brief base64 decode a string using OpenSSL base64 functions
803 * @param input String to decode
804 * @param one_line Expect all base64 data in one line. Input with line feeds will fail.
805 * @return base64 decoded string
807 std::string base64_decode(const std::string &input, bool one_line)
809 // check for empty buffer
813 // safety check for BIO_new_mem_buf()
814 if (input.size() >= INT_MAX)
815 throw runtime_error("base64 decode: Too much data");
817 // setup encoder. Note: BIO_free_all frees both BIOs.
818 BIO_Ptr base64_decoder(BIO_new(BIO_f_base64()), BIO_free_all);
819 BIO *bio_base64 = base64_decoder.get();
821 BIO_set_flags(bio_base64, BIO_FLAGS_BASE64_NO_NL);
823 // chain input buffer and decoder together
824 BIO *bio_input = BIO_new_mem_buf((void*)input.c_str(), input.size());
825 bio_input = BIO_push(bio_base64, bio_input);
827 BIO_Ptr decoded_result(BIO_new(BIO_s_mem()), BIO_free_all);
828 BIO *bio_decoded = decoded_result.get();
829 const int convbuf_size = 512;
830 char convbuf[convbuf_size];
833 while((read_bytes = BIO_read(bio_input, convbuf, convbuf_size)) > 0)
835 BOOST_ASSERT(read_bytes <= convbuf_size);
836 long written_bytes = BIO_write(bio_decoded, convbuf, read_bytes);
837 if (written_bytes != read_bytes)
840 out << "base64 decoding failed: read_bytes: "
841 << read_bytes << " vs. written_bytes: " << written_bytes;
842 throw runtime_error(out.str());
845 if (read_bytes == -2 || read_bytes == -1)
846 throw runtime_error("base64 decode: Error during decoding");
848 return _convert_BIO_to_string(bio_decoded);
851 } // eo namespace I2n
856 std::string iso_to_utf8(const std::string& isostring)
860 iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1");
862 if (iso_to_utf8 == (iconv_t)-1)
863 throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
865 size_t in_size=isostring.size();
866 size_t out_size=in_size*4;
868 char *buf = (char *)malloc(out_size+1);
870 throw runtime_error("out of memory for iconv buffer");
872 char *in = (char *)isostring.c_str();
874 iconv(i2utf8, &in, &in_size, &out, &out_size);
876 buf[isostring.size()*4-out_size]=0;
886 std::string utf8_to_iso(const std::string& utf8string)
890 iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8");
892 if (utf82iso == (iconv_t)-1)
893 throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
895 size_t in_size=utf8string.size();
896 size_t out_size=in_size;
898 char *buf = (char *)malloc(out_size+1);
900 throw runtime_error("out of memory for iconv buffer");
902 char *in = (char *)utf8string.c_str();
904 iconv(utf82iso, &in, &in_size, &out, &out_size);
906 buf[utf8string.size()-out_size]=0;
911 iconv_close(utf82iso);
916 wchar_t* utf8_to_wbuf(const std::string& utf8string)
918 iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8");
920 if (utf82wstr == (iconv_t)-1)
921 throw runtime_error("iconv can't convert from UTF-8 to UCS-4");
923 size_t in_size=utf8string.size();
924 size_t out_size= (in_size+1)*sizeof(wchar_t);
926 wchar_t *buf = (wchar_t *)malloc(out_size);
928 throw runtime_error("out of memory for iconv buffer");
930 char *in = (char *)utf8string.c_str();
931 char *out = (char*) buf;
932 if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1)
933 throw runtime_error("error converting char encodings");
935 buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0;
937 iconv_close(utf82wstr);
942 std::string utf7imap_to_utf8(const std::string& utf7imapstring)
946 iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP");
948 if (utf7imap2utf8 == (iconv_t)-1)
949 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
951 size_t in_size=utf7imapstring.size();
952 size_t out_size=in_size*4;
954 char *buf = (char *)malloc(out_size+1);
956 throw runtime_error("out of memory for iconv buffer");
958 char *in = (char *)utf7imapstring.c_str();
960 iconv(utf7imap2utf8, &in, &in_size, &out, &out_size);
962 buf[utf7imapstring.size()*4-out_size]=0;
967 iconv_close(utf7imap2utf8);
972 std::string utf8_to_utf7imap(const std::string& utf8string)
976 iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8");
978 if (utf82utf7imap == (iconv_t)-1)
979 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
981 // UTF-7 is base64 encoded, a buffer 10x as large
982 // as the utf-8 buffer should be enough. If not the string will be truncated.
983 size_t in_size=utf8string.size();
984 size_t out_size=in_size*10;
986 char *buf = (char *)malloc(out_size+1);
988 throw runtime_error("out of memory for iconv buffer");
990 char *in = (char *)utf8string.c_str();
992 iconv(utf82utf7imap, &in, &in_size, &out, &out_size);
994 buf[utf8string.size()*10-out_size]= 0;
999 iconv_close(utf82utf7imap);
1004 // Tokenize string by (html) tags
1005 void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
1007 string::size_type pos, len = input.size();
1008 bool inside_tag = false;
1011 for (pos = 0; pos < len; pos++)
1013 if (input[pos] == '<')
1017 if (!current.empty() )
1019 tokenized.push_back( make_pair(current, false) );
1023 current += input[pos];
1025 else if (input[pos] == '>' && inside_tag)
1027 current += input[pos];
1029 if (!current.empty() )
1031 tokenized.push_back( make_pair(current, true) );
1036 current += input[pos];
1039 // String left over in buffer?
1040 if (!current.empty() )
1041 tokenized.push_back( make_pair(current, false) );
1042 } // eo tokenize_by_tag
1045 std::string strip_html_tags(const std::string &input)
1047 // Pair first: string, second: isTag
1048 vector<pair<string,bool> > tokenized;
1049 tokenize_by_tag (tokenized, input);
1052 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
1053 for (token = tokenized.begin(); token != tokens_end; ++token)
1055 output += token->first;
1058 } // eo strip_html_tags
1061 // Smart-encode HTML en
1062 string smart_html_entities(const std::string &input)
1064 // Pair first: string, second: isTag
1065 vector<pair<string,bool> > tokenized;
1066 tokenize_by_tag (tokenized, input);
1069 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
1070 for (token = tokenized.begin(); token != tokens_end; ++token)
1072 // keep HTML tags as they are
1074 output += token->first;
1076 output += html_entities(token->first);
1083 string::size_type find_8bit(const std::string &str)
1085 string::size_type l=str.size();
1086 for (string::size_type p=0; p < l; p++)
1087 if (static_cast<unsigned char>(str[p]) > 127)
1090 return string::npos;
1093 // encoded UTF-8 chars into HTML entities
1094 string html_entities(std::string str)
1097 replace_all (str, "&", "&");
1098 replace_all (str, "<", "<");
1099 replace_all (str, ">", ">");
1100 replace_all (str, "\"", """);
1101 replace_all (str, "'", "'");
1102 replace_all (str, "/", "/");
1105 replace_all (str, "\xC3\xA4", "ä");
1106 replace_all (str, "\xC3\xB6", "ö");
1107 replace_all (str, "\xC3\xBC", "ü");
1108 replace_all (str, "\xC3\x84", "Ä");
1109 replace_all (str, "\xC3\x96", "Ö");
1110 replace_all (str, "\xC3\x9C", "Ü");
1113 replace_all (str, "\xC3\x9F", "ß");
1115 // conversion of remaining non-ASCII chars needed?
1116 // just do if needed because of performance
1117 if (find_8bit(str) != string::npos)
1119 // convert to fixed-size encoding UTF-32
1120 wchar_t* wbuf=utf8_to_wbuf(str);
1121 ostringstream target;
1123 // replace all non-ASCII chars with HTML representation
1124 for (int p=0; wbuf[p] != 0; p++)
1126 unsigned int c=wbuf[p];
1129 target << static_cast<unsigned char>(c);
1131 target << "&#" << c << ';';
1140 } // eo html_entities(std::string)
1142 // convert HTML entities to something that can be viewed on a basic text console (restricted to ASCII-7)
1143 string html_entities_to_console(std::string str)
1146 replace_all (str, "&", "&");
1147 replace_all (str, "<", "<");
1148 replace_all (str, ">", ">");
1149 replace_all (str, """, "\"");
1150 replace_all (str, "'", "'");
1151 replace_all (str, "/", "/");
1154 replace_all (str, "ä", "ae");
1155 replace_all (str, "ö", "oe");
1156 replace_all (str, "ü", "ue");
1157 replace_all (str, "Ä", "Ae");
1158 replace_all (str, "Ö", "Oe");
1159 replace_all (str, "Ü", "Ue");
1162 replace_all (str, "ß", "ss");
1167 // find_html_comments + remove_html_comments(str, comments)
1168 void remove_html_comments(string &str)
1170 vector<CommentZone> comments = find_html_comments(str);
1171 remove_html_comments(str, comments);
1174 // find all html comments, behaving correctly if they are nested; ignores comment tags ("<!--FOO .... BAR-->")
1175 // If there are invalid comments ("-->" before "<!--" or different number of closing and opening tags),
1176 // then the unknown index of corresponding start/end tag will be represented by a string::npos
1177 // Indices are from start of start tag until first index after closing tag
1178 vector<CommentZone> find_html_comments(const std::string &str)
1180 static const string START = "<!--";
1181 static const string CLOSE = "-->";
1182 static const string::size_type START_LEN = START.length();
1183 static const string::size_type CLOSE_LEN = CLOSE.length();
1185 vector<CommentZone> comments;
1187 // in order to find nested comments, need either recursion or a stack
1188 vector<string::size_type> starts; // stack of start tags
1190 string::size_type pos = 0;
1191 string::size_type len = str.length();
1192 string::size_type next_start, next_close;
1194 while (pos < len) // not really needed but just in case
1196 next_start = str.find(START, pos);
1197 next_close = str.find(CLOSE, pos);
1199 if ( (next_start == string::npos) && (next_close == string::npos) )
1200 break; // we are done
1202 else if ( (next_start == string::npos) || (next_close < next_start) ) // close one comment (pop)
1204 if (starts.empty()) // closing tag without a start
1205 comments.push_back(CommentZone(string::npos, next_close+CLOSE_LEN));
1208 comments.push_back(CommentZone(starts.back(), next_close+CLOSE_LEN));
1211 pos = next_close + CLOSE_LEN;
1214 else if ( (next_close == string::npos) || (next_start < next_close) ) // start a new comment (push)
1216 starts.push_back(next_start);
1217 pos = next_start + START_LEN;
1221 // add comments that have no closing tag from back to front (important for remove_html_comments!)
1222 while (!starts.empty())
1224 comments.push_back(CommentZone(starts.back(), string::npos));
1231 // remove all html comments foundby find_html_comments
1232 void remove_html_comments(std::string &str, const vector<CommentZone> &comments)
1234 // remember position where last removal started
1235 string::size_type last_removal_start = str.length();
1237 // Go from back to front to not mess up indices.
1238 // This requires that bigger comments, that contain smaller comments, come AFTER
1239 // the small contained comments in the comments vector (i.e. comments are ordered by
1240 // their closing tag, not their opening tag). This is true for results from find_html_comments
1241 BOOST_REVERSE_FOREACH(const CommentZone &comment, comments)
1243 if (comment.first == string::npos)
1245 str = str.replace(0, comment.second, ""); // comment starts "before" str --> delete from start
1246 break; // there can be no more
1248 else if (comment.first >= last_removal_start)
1250 continue; // this comment is inside another comment that we have removed already
1252 else if (comment.second == string::npos) // comment ends "after" str --> delete until end
1254 str = str.replace(comment.first, string::npos, "");
1255 last_removal_start = comment.first;
1259 str = str.replace(comment.first, comment.second-comment.first, "");
1260 last_removal_start = comment.first;
1265 bool replace_all(string &base, const char *ist, const char *soll)
1269 return replace_all(base,&i,&s);
1272 bool replace_all(string &base, const string &ist, const char *soll)
1275 return replace_all(base,&ist,&s);
1278 bool replace_all(string &base, const string *ist, const string *soll)
1280 return replace_all(base,*ist,*soll);
1283 bool replace_all(string &base, const char *ist, const string *soll)
1286 return replace_all(base,&i,soll);
1289 bool replace_all(string &base, const string &ist, const string &soll)
1291 bool found_ist = false;
1292 string::size_type a=0;
1295 throw runtime_error ("replace_all called with empty search string");
1297 while ( (a=base.find(ist,a) ) != string::npos)
1299 base.replace(a,ist.size(),soll);
1308 * @brief replaces all characters that could be problematic or impose a security risk when being logged
1309 * @param str the original string
1310 * @param replace_with the character to replace the unsafe chars with
1311 * @return a string that is safe to send to syslog or other logfiles
1313 * All chars between 0x20 (space) and 0x7E (~) (including) are considered safe for logging.
1314 * See e.g. RFC 5424, section 8.2 or the posix character class "printable".
1315 * This eliminates all possible problems with NUL, control characters, 8 bit chars, UTF8.
1318 std::string sanitize_for_logging(const std::string &str, const char replace_with)
1320 std::string output=str;
1322 const string::size_type len = output.size();
1323 for (std::string::size_type p=0; p < len; p++)
1324 if (output[p] < 0x20 || output[p] > 0x7E)
1325 output[p]=replace_with;
1331 string to_lower(const string &src)
1335 string::size_type pos, end = dst.size();
1336 for (pos = 0; pos < end; pos++)
1337 dst[pos] = tolower(dst[pos]);
1342 string to_upper(const string &src)
1346 string::size_type pos, end = dst.size();
1347 for (pos = 0; pos < end; pos++)
1348 dst[pos] = toupper(dst[pos]);
1354 const int MAX_UNIT_FORMAT_SYMBOLS = 6;
1356 const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
1365 const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
1366 i18n_noop(" Bytes"),
1367 i18n_noop(" KBytes"),
1368 i18n_noop(" MBytes"),
1369 i18n_noop(" GBytes"),
1370 i18n_noop(" TBytes"),
1371 i18n_noop(" PBytes")
1375 static long double rounding_upwards(
1376 const long double number,
1377 const int rounding_multiplier
1380 long double rounded_number;
1381 rounded_number = number * rounding_multiplier;
1382 rounded_number += 0.5;
1383 rounded_number = (int64_t) (rounded_number);
1384 rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier);
1386 return rounded_number;
1390 string nice_unit_format(
1391 const int64_t input,
1392 const UnitFormat format,
1396 // select the system of units (decimal or binary)
1398 if (base == UnitBase1000)
1407 long double size = input;
1409 // check the size of the input number to fit in the appropriate symbol
1411 while (size > multiple)
1413 size = size / multiple;
1416 // rollback to the previous values and stop the loop when cannot
1417 // represent the number length.
1418 if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS)
1420 size = size * multiple;
1426 // round the input number "half up" to multiples of 10
1427 const int rounding_multiplier = 10;
1428 size = rounding_upwards(size, rounding_multiplier);
1430 // format the input number, placing the appropriate symbol
1432 out.setf (ios::fixed);
1433 if (format == ShortUnitFormat)
1436 out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() );
1441 out << size << i18n( longUnitFormatSymbols[sizecount].c_str() );
1445 } // eo nice_unit_format(int input)
1448 string nice_unit_format(
1450 const UnitFormat format,
1454 // round as double and cast to int64_t
1455 // cast raised overflow error near max val of int64_t (~9.2e18, see unittest)
1456 int64_t input_casted_and_rounded =
1457 boost::numeric_cast<int64_t>( round(input) );
1460 return nice_unit_format( input_casted_and_rounded, format, base );
1461 } // eo nice_unit_format(double input)
1464 string escape(const string &s)
1467 string::size_type p;
1470 while ( (p=out.find_first_of("\"\\",p) ) !=out.npos)
1472 out.insert (p,"\\");
1477 while ( (p=out.find_first_of("\r",p) ) !=out.npos)
1479 out.replace (p,1,"\\r");
1484 while ( (p=out.find_first_of("\n",p) ) !=out.npos)
1486 out.replace (p,1,"\\n");
1493 } // eo scape(const std::string&)
1496 string descape(const string &s, int startpos, int &endpos)
1500 if (s.at(startpos) != '"')
1501 throw out_of_range("value not type escaped string");
1503 out=s.substr(startpos+1);
1504 string::size_type p=0;
1506 // search for the end of the string
1507 while ( (p=out.find("\"",p) ) !=out.npos)
1512 // the " might be escaped with a backslash
1513 while (e>=0 && out.at (e) =='\\')
1515 if (escaped == false)
1529 // we now have the end of the string
1530 out=out.substr(0,p);
1532 // tell calling prog about the endposition
1533 endpos=startpos+p+1;
1535 // descape all \ stuff inside the string now
1537 while ( (p=out.find_first_of("\\",p) ) !=out.npos)
1539 switch (out.at(p+1) )
1542 out.replace(p,2,"\r");
1545 out.replace(p,2,"\n");
1554 } // eo descape(const std::string&,int,int&)
1557 string escape_shellarg(const string &input)
1559 string output = "'";
1560 string::const_iterator it, it_end = input.end();
1561 for (it = input.begin(); it != it_end; ++it)