2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
22 * (c) Copyright 2007-2008 by Intra2net AG
30 #include <cmath> // for round()
38 #include <boost/numeric/conversion/cast.hpp>
39 #include <boost/foreach.hpp>
41 #include <boost/assert.hpp>
42 #include <boost/shared_ptr.hpp>
43 #include <openssl/bio.h>
44 #include <openssl/evp.h>
46 #include <stringfunc.hxx>
57 const std::string hexDigitsLower("0123456789abcdef");
58 const std::string hexDigitsUpper("0123456789ABCDEF");
63 char operator() (char c)
65 return std::toupper(c);
67 }; // eo struct UpperFunc
72 char operator() (char c)
74 return std::tolower(c);
76 }; // eo struct LowerFunc
79 } // eo namespace <anonymous>
84 * default list of Whitespaces (" \t\r\n");
86 const std::string Whitespaces = " \t\r\n";
89 * default list of lineendings ("\r\n");
91 const std::string LineEndings= "\r\n";
96 * @brief checks if a string begins with a given prefix.
97 * @param[in,out] str the string which is tested
98 * @param prefix the prefix which should be tested for.
99 * @return @a true iff the prefix is not empty and the string begins with that prefix.
101 bool has_prefix(const std::string& str, const std::string& prefix)
103 if (prefix.empty() || str.empty() || str.size() < prefix.size() )
107 return str.compare(0, prefix.size(), prefix) == 0;
108 } // eo has_prefix(const std::string&,const std::string&)
112 * @brief checks if a string ends with a given suffix.
113 * @param[in,out] str the string which is tested
114 * @param suffix the suffix which should be tested for.
115 * @return @a true iff the suffix is not empty and the string ends with that suffix.
117 bool has_suffix(const std::string& str, const std::string& suffix)
119 if (suffix.empty() || str.empty() || str.size() < suffix.size() )
123 return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
124 } // eo has_suffix(const std::string&,const std::string&)
128 * cut off characters from a given list from front and end of a string.
129 * @param[in,out] str the string which should be trimmed.
130 * @param charlist the list of characters to remove from beginning and end of string
131 * @return the result string.
133 std::string trim_mod(std::string& str, const std::string& charlist)
135 // first: trim the beginning:
136 std::string::size_type pos= str.find_first_not_of (charlist);
137 if (pos == std::string::npos)
139 // whole string consists of charlist (or is already empty)
145 // str starts with charlist
148 // now let's look at the tail:
149 pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char!
150 if ( pos < str.size() )
152 str.erase(pos, str.size()-pos);
155 } // eo trim_mod(std::string&,const std::string&)
160 * removes last character from a string when it is in a list of chars to be removed.
161 * @param[in,out] str the string.
162 * @param what the list of chars which will be tested for.
163 * @return the resulting string with last char removed (if applicable)
165 std::string chomp_mod(std::string& str, const std::string& what)
167 if (str.empty() || what.empty() )
171 if (what.find(str.at (str.size()-1) ) != std::string::npos)
173 str.erase(str.size() - 1);
176 } // eo chomp_mod(std::string&,const std::string&)
180 * @brief converts a string to lower case.
181 * @param[in,out] str the string to modify.
184 std::string to_lower_mod(std::string& str)
186 std::transform(str.begin(), str.end(), str.begin(), LowerFunc() );
188 } // eo to_lower_mod(std::string&)
192 * @brief converts a string to upper case.
193 * @param[in,out] str the string to modify.
196 std::string to_upper_mod(std::string& str)
198 std::transform( str.begin(), str.end(), str.begin(), UpperFunc() );
200 } // eo to_upper_mod(std::string&)
205 * cut off characters from a given list from front and end of a string.
206 * @param str the string which should be trimmed.
207 * @param charlist the list of characters to remove from beginning and end of string
208 * @return the result string.
210 std::string trim (const std::string& str, const std::string& charlist)
212 // first: trim the beginning:
213 std::string::size_type pos0= str.find_first_not_of(charlist);
214 if (pos0 == std::string::npos)
216 // whole string consists of charlist (or is already empty)
217 return std::string();
219 // now let's look at the end:
220 std::string::size_type pos1= str.find_last_not_of(charlist);
221 return str.substr(pos0, pos1 - pos0 + 1);
222 } // eo trim(const std:.string&,const std::string&)
226 * removes last character from a string when it is in a list of chars to be removed.
227 * @param str the string.
228 * @param what the list of chars which will be tested for.
229 * @return the resulting string with last char removed (if applicable)
231 std::string chomp (const std::string& str, const std::string& what)
233 if (str.empty() || what.empty() )
237 if (what.find(str.at (str.size()-1) ) != std::string::npos)
239 return str.substr(0, str.size()-1);
242 } // eo chomp(const std:.string&,const std::string&)
246 * @brief returns a lower case version of a given string.
247 * @param str the string
248 * @return the lower case version of the string
250 std::string to_lower (const std::string& str)
252 std::string result(str);
253 return to_lower_mod(result);
254 } // eo to_lower(const std::string&)
258 * @brief returns a upper case version of a given string.
259 * @param str the string
260 * @return the upper case version of the string
262 std::string to_upper(const std::string& str)
264 std::string result(str);
265 return to_upper_mod(result);
266 } // eo to_upper(const std::string&)
271 * @brief removes a given suffix from a string.
272 * @param str the string.
273 * @param suffix the suffix which should be removed if the string ends with it.
274 * @return the string without the suffix.
276 * If the string ends with the suffix, it is removed. If the the string doesn't end
277 * with the suffix the original string is returned.
279 std::string remove_suffix(const std::string& str, const std::string& suffix)
281 if (has_suffix(str,suffix) )
283 return str.substr(0, str.size()-suffix.size() );
286 } // eo remove_suffix(const std::string&,const std::string&)
291 * @brief removes a given prefix from a string.
292 * @param str the string.
293 * @param prefix the prefix which should be removed if the string begins with it.
294 * @return the string without the prefix.
296 * If the string begins with the prefix, it is removed. If the the string doesn't begin
297 * with the prefix the original string is returned.
299 std::string remove_prefix(const std::string& str, const std::string& prefix)
301 if (has_prefix(str,prefix) )
303 return str.substr( prefix.size() );
306 } // eo remove_prefix(const std::string&,const std::string&)
310 * split a string to key and value delimited by a given delimiter.
311 * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end).
312 * @param str the string which should be splitted.
313 * @param[out] key the resulting key
314 * @param[out] value the resulting value
315 * @param delimiter the delimiter between key and value; default is '='.
316 * @return @a true if the split was successful.
319 const std::string& str,
324 std::string::size_type pos = str.find (delimiter);
325 if (pos == std::string::npos) return false;
326 key= str.substr(0,pos);
327 value= str.substr(pos+1);
331 } // eo pair_split(const std::string&,std::string&,std::string&,char)
335 * splits a string by given delimiter
337 * @param[in] str the string which should be splitted.
338 * @param[out] result the list resulting from splitting @a str.
339 * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted.
340 * @param[in] omit_empty should empty parts not be stored?
341 * @param[in] trim_list list of characters the parts should be trimmed by.
342 * (empty string results in no trim)
345 const std::string& str,
346 std::list<std::string>& result,
347 const std::string& delimiter,
349 const std::string& trim_list
352 std::string::size_type pos, last_pos=0;
353 bool delimiter_found= false;
354 while ( last_pos < str.size() && last_pos != std::string::npos)
356 pos= str.find(delimiter, last_pos);
358 if (pos == std::string::npos)
360 part= str.substr(last_pos);
361 delimiter_found= false;
365 part= str.substr(last_pos, pos-last_pos);
366 delimiter_found=true;
368 if (pos != std::string::npos)
370 last_pos= pos+ delimiter.size();
374 last_pos= std::string::npos;
376 if (!trim_list.empty() ) trim_mod (part, trim_list);
377 if (omit_empty && part.empty() ) continue;
378 result.push_back( part );
380 // if the string ends with a delimiter we need to append an empty string if no omit_empty
382 // (this way we keep the split result consistent to a join operation)
383 if (delimiter_found && !omit_empty)
385 result.push_back("");
387 } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&)
390 /** call split_string with list<string>, converts result to vector; vector is clear()-ed first
392 * Note: Uses 3 O(n)-operations: list.size, vector.resize and std::swap_ranges;
393 * not sure whether there is a better way to do this
396 const std::string& str,
397 std::vector<std::string>& result,
398 const std::string& delimiter,
400 const std::string& trim_list
403 std::list<std::string> tmp;
404 split_string(str, tmp, delimiter, omit_empty, trim_list);
405 std::size_t size = tmp.size(); // this is O(n)
407 result.resize(size); // also O(n)
408 std::swap_ranges(tmp.begin(), tmp.end(), result.begin()); // also O(n)
412 * splits a string by a given delimiter
413 * @param str the string which should be splitted.
414 * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted.
415 * @param[in] omit_empty should empty parts not be stored?
416 * @param[in] trim_list list of characters the parts should be trimmed by.
417 * (empty string results in no trim)
418 * @return the list resulting from splitting @a str.
420 std::list<std::string> split_string(
421 const std::string& str,
422 const std::string& delimiter,
424 const std::string& trim_list
427 std::list<std::string> result;
428 split_string(str, result, delimiter, omit_empty, trim_list);
430 } // eo split_string(const std::string&,const std::string&,bool,const std::string&)
433 std::string join_string (
434 const char *const parts[], /* assumed NULL-terminated */
435 const std::string& delimiter
442 const char *const *cur = parts;
445 result = std::string (*cur);
447 while (*++cur != NULL) {
449 result += std::string (*cur);
458 join_string (const std::list<std::string> &l, const std::string &d)
459 { return join_string<std::list<std::string> >(l, d); }
462 join_string (const std::vector<std::string> &l, const std::string &d)
463 { return join_string<std::vector<std::string> >(l, d); }
471 * @brief returns a hex string from a binary string.
472 * @param str the (binary) string
473 * @param upper_case_digits determine whether to use upper case characters for digits A-F.
474 * @return the string in hex notation.
476 std::string convert_binary_to_hex(
477 const std::string& str,
478 bool upper_case_digits
482 std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower);
483 for ( std::string::const_iterator it= str.begin();
487 result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] );
488 result.push_back( hexDigits[ (*it) & 0x0f ] );
491 } // eo convert_binary_to_hex(const std::string&,bool)
495 * @brief converts a hex digit string to binary string.
496 * @param str hex digit string
497 * @return the binary string.
499 * The hex digit string may contains white spaces or colons which are treated
500 * as delimiters between hex digit groups.
502 * @todo rework the handling of half nibbles (consistency)!
504 std::string convert_hex_to_binary(
505 const std::string& str
507 throw (std::runtime_error)
511 bool hasNibble= false;
512 bool lastWasWS= true;
513 for ( std::string::const_iterator it= str.begin();
517 std::string::size_type p = hexDigitsLower.find( *it );
518 if (p== std::string::npos)
520 p= hexDigitsUpper.find( *it );
522 if (p == std::string::npos)
524 if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace?
525 or ( *it == ':') // or a colon?
528 // we treat that as a valid delimiter:
531 // 1 nibble before WS is treate as lower part:
540 if (p == std::string::npos )
542 throw runtime_error("illegal character in hex digit string: " + str);
556 //we already had a nibble, so a char is complete now:
557 result.push_back( c );
562 // this is the first nibble of a new char:
568 //well, there is one nibble left
569 // let's do some heuristics:
572 // if the preceeding character was a white space (or a colon)
573 // we treat the nibble as lower part:
574 //( this is consistent with shortened hex notations where leading zeros are not noted)
575 result.push_back( c );
579 // if it was part of a hex digit chain, we treat it as UPPER part (!!)
580 result.push_back( c << 4 );
584 } // eo convert_hex_to_binary(const std::string&)
587 static list<string>& alloc_template_starts()
589 static list<string> result;
592 result.push_back("std::list");
593 result.push_back("std::vector");
598 string shorten_stl_types(const string &input)
600 string output = input;
602 // first: replace fixed string for std::string
603 replace_all(output, "std::basic_string<char, std::char_traits<char>, std::allocator<char> >",
606 // loop over list/vector/... that have an allocator, e.g.
607 // std::list< some_type_here, std::allocator<some_type_here> >
608 string::size_type start, comma, end, len, start_text_len;
610 string allocator_text;
611 BOOST_FOREACH(const string &start_text, alloc_template_starts())
616 start_text_len = start_text.length();
617 while( (start=output.find(start_text+"<", start)) != string::npos )
619 len = output.length();
620 start += start_text_len+1; // start next iter and tests here after opening bracket
622 // now comes the tricky part: find matching ',' and the closing '>' even if "subtype" is template again
624 n_open_brackets = 1; // the bracket right after start_text counts as first
625 while (comma < len && n_open_brackets > 0)
627 if (output[comma] == ',' && n_open_brackets == 1)
629 else if (output[comma] == '<')
631 else if (output[comma] == '>')
636 while (end < len && n_open_brackets > 0)
638 if (output[end] == '<')
640 else if (output[end] == '>')
643 if (n_open_brackets == 0)
644 break; // do not increment end
649 // check that start < comma < end < len && n_open_brackets == 0
650 if (start >= comma || comma >= end || end >= len || n_open_brackets != 0)
651 continue; // input seems to be of unexpected form
653 // check that type in allocator is same as until comma
654 string type = output.substr(start, comma-start);
655 if (type[type.length()-1] == '>')
656 allocator_text = string("std::allocator<") + type + " > ";
658 allocator_text = string("std::allocator<") + type + "> ";
659 if (output.substr(comma+2, end-comma-2) == allocator_text)
660 output.replace(comma+2, end-comma-2, "_alloc_");
667 typedef boost::shared_ptr<BIO> BIO_Ptr;
670 * @brief Converts openssl generic input/output to std::string
672 * Code adapted from keymakerd.
674 * @param bio Openssl's generic input/output
675 * @return :string STL string
677 static std::string _convert_BIO_to_string(BIO *input)
682 long written = BIO_get_mem_data(input, &output);
683 if (written <= 0 || output == NULL)
686 rtn.assign(output, written); //lint !e534 !e732
691 * @brief base64 encode a string using OpenSSL base64 functions
693 * Data size limit is 2GB on 32 bit (LONG_MAX)
695 * @param input String to encode
696 * @param one_line Encode all data as one line, no wrapping with line feeds
697 * @return base64 encoded string
699 std::string base64_encode(const std::string &input, bool one_line)
701 // check for empty buffer
705 // safety check to ensure our check afer BIO_write() works
706 if (input.size() >= LONG_MAX)
707 throw runtime_error("base64 encode: Too much data");
709 // setup encoder. Note: BIO_free_all frees both BIOs.
710 BIO_Ptr base64_encoder(BIO_new(BIO_f_base64()), BIO_free_all);
711 BIO *encoder_bio = base64_encoder.get();
713 BIO_set_flags(encoder_bio, BIO_FLAGS_BASE64_NO_NL);
715 // chain output buffer and encoder together
716 BIO *encoded_result = BIO_new(BIO_s_mem());
717 BIO_push(encoder_bio, encoded_result);
720 long written = BIO_write(encoder_bio, input.c_str(), input.size());
721 if ((unsigned)written != input.size())
724 out << "base64 encoding failed: input size: "
725 << input.size() << " vs. output size: " << written;
726 throw runtime_error(out.str());
728 if (BIO_flush(encoder_bio) != 1)
729 throw runtime_error("base64 encode: BIO_flush() failed");
731 return _convert_BIO_to_string(encoded_result);
735 * @brief base64 decode a string using OpenSSL base64 functions
737 * @param input String to decode
738 * @param one_line Expect all base64 data in one line. Input with line feeds will fail.
739 * @return base64 decoded string
741 std::string base64_decode(const std::string &input, bool one_line)
743 // check for empty buffer
747 // safety check for BIO_new_mem_buf()
748 if (input.size() >= INT_MAX)
749 throw runtime_error("base64 decode: Too much data");
751 // setup encoder. Note: BIO_free_all frees both BIOs.
752 BIO_Ptr base64_decoder(BIO_new(BIO_f_base64()), BIO_free_all);
753 BIO *bio_base64 = base64_decoder.get();
755 BIO_set_flags(bio_base64, BIO_FLAGS_BASE64_NO_NL);
757 // chain input buffer and decoder together
758 BIO *bio_input = BIO_new_mem_buf((void*)input.c_str(), input.size());
759 bio_input = BIO_push(bio_base64, bio_input);
761 BIO_Ptr decoded_result(BIO_new(BIO_s_mem()), BIO_free_all);
762 BIO *bio_decoded = decoded_result.get();
763 const int convbuf_size = 512;
764 char convbuf[convbuf_size];
767 while((read_bytes = BIO_read(bio_input, convbuf, convbuf_size)) > 0)
769 BOOST_ASSERT(read_bytes <= convbuf_size);
770 long written_bytes = BIO_write(bio_decoded, convbuf, read_bytes);
771 if (written_bytes != read_bytes)
774 out << "base64 decoding failed: read_bytes: "
775 << read_bytes << " vs. written_bytes: " << written_bytes;
776 throw runtime_error(out.str());
779 if (read_bytes == -2 || read_bytes == -1)
780 throw runtime_error("base64 decode: Error during decoding");
782 return _convert_BIO_to_string(bio_decoded);
785 } // eo namespace I2n
790 std::string iso_to_utf8(const std::string& isostring)
794 iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1");
796 if (iso_to_utf8 == (iconv_t)-1)
797 throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
799 size_t in_size=isostring.size();
800 size_t out_size=in_size*4;
802 char *buf = (char *)malloc(out_size+1);
804 throw runtime_error("out of memory for iconv buffer");
806 char *in = (char *)isostring.c_str();
808 iconv(i2utf8, &in, &in_size, &out, &out_size);
810 buf[isostring.size()*4-out_size]=0;
820 std::string utf8_to_iso(const std::string& utf8string)
824 iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8");
826 if (utf82iso == (iconv_t)-1)
827 throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
829 size_t in_size=utf8string.size();
830 size_t out_size=in_size;
832 char *buf = (char *)malloc(out_size+1);
834 throw runtime_error("out of memory for iconv buffer");
836 char *in = (char *)utf8string.c_str();
838 iconv(utf82iso, &in, &in_size, &out, &out_size);
840 buf[utf8string.size()-out_size]=0;
845 iconv_close(utf82iso);
850 wchar_t* utf8_to_wbuf(const std::string& utf8string)
852 iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8");
854 if (utf82wstr == (iconv_t)-1)
855 throw runtime_error("iconv can't convert from UTF-8 to UCS-4");
857 size_t in_size=utf8string.size();
858 size_t out_size= (in_size+1)*sizeof(wchar_t);
860 wchar_t *buf = (wchar_t *)malloc(out_size);
862 throw runtime_error("out of memory for iconv buffer");
864 char *in = (char *)utf8string.c_str();
865 char *out = (char*) buf;
866 if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1)
867 throw runtime_error("error converting char encodings");
869 buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0;
871 iconv_close(utf82wstr);
876 std::string utf7imap_to_utf8(const std::string& utf7imapstring)
880 iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP");
882 if (utf7imap2utf8 == (iconv_t)-1)
883 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
885 size_t in_size=utf7imapstring.size();
886 size_t out_size=in_size*4;
888 char *buf = (char *)malloc(out_size+1);
890 throw runtime_error("out of memory for iconv buffer");
892 char *in = (char *)utf7imapstring.c_str();
894 iconv(utf7imap2utf8, &in, &in_size, &out, &out_size);
896 buf[utf7imapstring.size()*4-out_size]=0;
901 iconv_close(utf7imap2utf8);
906 std::string utf8_to_utf7imap(const std::string& utf8string)
910 iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8");
912 if (utf82utf7imap == (iconv_t)-1)
913 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
915 // UTF-7 is base64 encoded, a buffer 10x as large
916 // as the utf-8 buffer should be enough. If not the string will be truncated.
917 size_t in_size=utf8string.size();
918 size_t out_size=in_size*10;
920 char *buf = (char *)malloc(out_size+1);
922 throw runtime_error("out of memory for iconv buffer");
924 char *in = (char *)utf8string.c_str();
926 iconv(utf82utf7imap, &in, &in_size, &out, &out_size);
928 buf[utf8string.size()*10-out_size]= 0;
933 iconv_close(utf82utf7imap);
938 // Tokenize string by (html) tags
939 void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
941 string::size_type pos, len = input.size();
942 bool inside_tag = false;
945 for (pos = 0; pos < len; pos++)
947 if (input[pos] == '<')
951 if (!current.empty() )
953 tokenized.push_back( make_pair(current, false) );
957 current += input[pos];
959 else if (input[pos] == '>' && inside_tag)
961 current += input[pos];
963 if (!current.empty() )
965 tokenized.push_back( make_pair(current, true) );
970 current += input[pos];
973 // String left over in buffer?
974 if (!current.empty() )
975 tokenized.push_back( make_pair(current, false) );
976 } // eo tokenize_by_tag
979 std::string strip_html_tags(const std::string &input)
981 // Pair first: string, second: isTag
982 vector<pair<string,bool> > tokenized;
983 tokenize_by_tag (tokenized, input);
986 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
987 for (token = tokenized.begin(); token != tokens_end; ++token)
989 output += token->first;
992 } // eo strip_html_tags
995 // Smart-encode HTML en
996 string smart_html_entities(const std::string &input)
998 // Pair first: string, second: isTag
999 vector<pair<string,bool> > tokenized;
1000 tokenize_by_tag (tokenized, input);
1003 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
1004 for (token = tokenized.begin(); token != tokens_end; ++token)
1006 // keep HTML tags as they are
1008 output += token->first;
1010 output += html_entities(token->first);
1017 string::size_type find_8bit(const std::string &str)
1019 string::size_type l=str.size();
1020 for (string::size_type p=0; p < l; p++)
1021 if (static_cast<unsigned char>(str[p]) > 127)
1024 return string::npos;
1027 // encoded UTF-8 chars into HTML entities
1028 string html_entities(std::string str)
1031 replace_all (str, "&", "&");
1032 replace_all (str, "<", "<");
1033 replace_all (str, ">", ">");
1034 replace_all (str, "\"", """);
1035 replace_all (str, "'", "'");
1036 replace_all (str, "/", "/");
1039 replace_all (str, "\xC3\xA4", "ä");
1040 replace_all (str, "\xC3\xB6", "ö");
1041 replace_all (str, "\xC3\xBC", "ü");
1042 replace_all (str, "\xC3\x84", "Ä");
1043 replace_all (str, "\xC3\x96", "Ö");
1044 replace_all (str, "\xC3\x9C", "Ü");
1047 replace_all (str, "\xC3\x9F", "ß");
1049 // conversion of remaining non-ASCII chars needed?
1050 // just do if needed because of performance
1051 if (find_8bit(str) != string::npos)
1053 // convert to fixed-size encoding UTF-32
1054 wchar_t* wbuf=utf8_to_wbuf(str);
1055 ostringstream target;
1057 // replace all non-ASCII chars with HTML representation
1058 for (int p=0; wbuf[p] != 0; p++)
1060 unsigned int c=wbuf[p];
1063 target << static_cast<unsigned char>(c);
1065 target << "&#" << c << ';';
1074 } // eo html_entities(std::string)
1076 // convert HTML entities to something that can be viewed on a basic text console (restricted to ASCII-7)
1077 string html_entities_to_console(std::string str)
1080 replace_all (str, "&", "&");
1081 replace_all (str, "<", "<");
1082 replace_all (str, ">", ">");
1083 replace_all (str, """, "\"");
1084 replace_all (str, "'", "'");
1085 replace_all (str, "/", "/");
1088 replace_all (str, "ä", "ae");
1089 replace_all (str, "ö", "oe");
1090 replace_all (str, "ü", "ue");
1091 replace_all (str, "Ä", "Ae");
1092 replace_all (str, "Ö", "Oe");
1093 replace_all (str, "Ü", "Ue");
1096 replace_all (str, "ß", "ss");
1101 // find_html_comments + remove_html_comments(str, comments)
1102 void remove_html_comments(string &str)
1104 vector<CommentZone> comments = find_html_comments(str);
1105 remove_html_comments(str, comments);
1108 // find all html comments, behaving correctly if they are nested; ignores comment tags ("<!--FOO .... BAR-->")
1109 // If there are invalid comments ("-->" before "<!--" or different number of closing and opening tags),
1110 // then the unknown index of corresponding start/end tag will be represented by a string::npos
1111 // Indices are from start of start tag until first index after closing tag
1112 vector<CommentZone> find_html_comments(const std::string &str)
1114 static const string START = "<!--";
1115 static const string CLOSE = "-->";
1116 static const string::size_type START_LEN = START.length();
1117 static const string::size_type CLOSE_LEN = CLOSE.length();
1119 vector<CommentZone> comments;
1121 // in order to find nested comments, need either recursion or a stack
1122 vector<string::size_type> starts; // stack of start tags
1124 string::size_type pos = 0;
1125 string::size_type len = str.length();
1126 string::size_type next_start, next_close;
1128 while (pos < len) // not really needed but just in case
1130 next_start = str.find(START, pos);
1131 next_close = str.find(CLOSE, pos);
1133 if ( (next_start == string::npos) && (next_close == string::npos) )
1134 break; // we are done
1136 else if ( (next_start == string::npos) || (next_close < next_start) ) // close one comment (pop)
1138 if (starts.empty()) // closing tag without a start
1139 comments.push_back(CommentZone(string::npos, next_close+CLOSE_LEN));
1142 comments.push_back(CommentZone(starts.back(), next_close+CLOSE_LEN));
1145 pos = next_close + CLOSE_LEN;
1148 else if ( (next_close == string::npos) || (next_start < next_close) ) // start a new comment (push)
1150 starts.push_back(next_start);
1151 pos = next_start + START_LEN;
1155 // add comments that have no closing tag from back to front (important for remove_html_comments!)
1156 while (!starts.empty())
1158 comments.push_back(CommentZone(starts.back(), string::npos));
1165 // remove all html comments foundby find_html_comments
1166 void remove_html_comments(std::string &str, const vector<CommentZone> &comments)
1168 // remember position where last removal started
1169 string::size_type last_removal_start = str.length();
1171 // Go from back to front to not mess up indices.
1172 // This requires that bigger comments, that contain smaller comments, come AFTER
1173 // the small contained comments in the comments vector (i.e. comments are ordered by
1174 // their closing tag, not their opening tag). This is true for results from find_html_comments
1175 BOOST_REVERSE_FOREACH(const CommentZone &comment, comments)
1177 if (comment.first == string::npos)
1179 str = str.replace(0, comment.second, ""); // comment starts "before" str --> delete from start
1180 break; // there can be no more
1182 else if (comment.first >= last_removal_start)
1184 continue; // this comment is inside another comment that we have removed already
1186 else if (comment.second == string::npos) // comment ends "after" str --> delete until end
1188 str = str.replace(comment.first, string::npos, "");
1189 last_removal_start = comment.first;
1193 str = str.replace(comment.first, comment.second-comment.first, "");
1194 last_removal_start = comment.first;
1199 bool replace_all(string &base, const char *ist, const char *soll)
1203 return replace_all(base,&i,&s);
1206 bool replace_all(string &base, const string &ist, const char *soll)
1209 return replace_all(base,&ist,&s);
1212 bool replace_all(string &base, const string *ist, const string *soll)
1214 return replace_all(base,*ist,*soll);
1217 bool replace_all(string &base, const char *ist, const string *soll)
1220 return replace_all(base,&i,soll);
1223 bool replace_all(string &base, const string &ist, const string &soll)
1225 bool found_ist = false;
1226 string::size_type a=0;
1229 throw runtime_error ("replace_all called with empty search string");
1231 while ( (a=base.find(ist,a) ) != string::npos)
1233 base.replace(a,ist.size(),soll);
1242 * @brief replaces all characters that could be problematic or impose a security risk when being logged
1243 * @param str the original string
1244 * @param replace_with the character to replace the unsafe chars with
1245 * @return a string that is safe to send to syslog or other logfiles
1247 * All chars between 0x20 (space) and 0x7E (~) (including) are considered safe for logging.
1248 * See e.g. RFC 5424, section 8.2 or the posix character class "printable".
1249 * This eliminates all possible problems with NUL, control characters, 8 bit chars, UTF8.
1252 std::string sanitize_for_logging(const std::string &str, const char replace_with)
1254 std::string output=str;
1256 const string::size_type len = output.size();
1257 for (std::string::size_type p=0; p < len; p++)
1258 if (output[p] < 0x20 || output[p] > 0x7E)
1259 output[p]=replace_with;
1265 string to_lower(const string &src)
1269 string::size_type pos, end = dst.size();
1270 for (pos = 0; pos < end; pos++)
1271 dst[pos] = tolower(dst[pos]);
1276 string to_upper(const string &src)
1280 string::size_type pos, end = dst.size();
1281 for (pos = 0; pos < end; pos++)
1282 dst[pos] = toupper(dst[pos]);
1288 const int MAX_UNIT_FORMAT_SYMBOLS = 6;
1290 const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
1299 const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
1300 i18n_noop(" Bytes"),
1301 i18n_noop(" KBytes"),
1302 i18n_noop(" MBytes"),
1303 i18n_noop(" GBytes"),
1304 i18n_noop(" TBytes"),
1305 i18n_noop(" PBytes")
1309 static long double rounding_upwards(
1310 const long double number,
1311 const int rounding_multiplier
1314 long double rounded_number;
1315 rounded_number = number * rounding_multiplier;
1316 rounded_number += 0.5;
1317 rounded_number = (int64_t) (rounded_number);
1318 rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier);
1320 return rounded_number;
1324 string nice_unit_format(
1325 const int64_t input,
1326 const UnitFormat format,
1330 // select the system of units (decimal or binary)
1332 if (base == UnitBase1000)
1341 long double size = input;
1343 // check the size of the input number to fit in the appropriate symbol
1345 while (size > multiple)
1347 size = size / multiple;
1350 // rollback to the previous values and stop the loop when cannot
1351 // represent the number length.
1352 if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS)
1354 size = size * multiple;
1360 // round the input number "half up" to multiples of 10
1361 const int rounding_multiplier = 10;
1362 size = rounding_upwards(size, rounding_multiplier);
1364 // format the input number, placing the appropriate symbol
1366 out.setf (ios::fixed);
1367 if (format == ShortUnitFormat)
1370 out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() );
1375 out << size << i18n( longUnitFormatSymbols[sizecount].c_str() );
1379 } // eo nice_unit_format(int input)
1382 string nice_unit_format(
1384 const UnitFormat format,
1388 // round as double and cast to int64_t
1389 // cast raised overflow error near max val of int64_t (~9.2e18, see unittest)
1390 int64_t input_casted_and_rounded =
1391 boost::numeric_cast<int64_t>( round(input) );
1394 return nice_unit_format( input_casted_and_rounded, format, base );
1395 } // eo nice_unit_format(double input)
1398 string escape(const string &s)
1401 string::size_type p;
1404 while ( (p=out.find_first_of("\"\\",p) ) !=out.npos)
1406 out.insert (p,"\\");
1411 while ( (p=out.find_first_of("\r",p) ) !=out.npos)
1413 out.replace (p,1,"\\r");
1418 while ( (p=out.find_first_of("\n",p) ) !=out.npos)
1420 out.replace (p,1,"\\n");
1427 } // eo scape(const std::string&)
1430 string descape(const string &s, int startpos, int &endpos)
1434 if (s.at(startpos) != '"')
1435 throw out_of_range("value not type escaped string");
1437 out=s.substr(startpos+1);
1438 string::size_type p=0;
1440 // search for the end of the string
1441 while ( (p=out.find("\"",p) ) !=out.npos)
1446 // the " might be escaped with a backslash
1447 while (e>=0 && out.at (e) =='\\')
1449 if (escaped == false)
1463 // we now have the end of the string
1464 out=out.substr(0,p);
1466 // tell calling prog about the endposition
1467 endpos=startpos+p+1;
1469 // descape all \ stuff inside the string now
1471 while ( (p=out.find_first_of("\\",p) ) !=out.npos)
1473 switch (out.at(p+1) )
1476 out.replace(p,2,"\r");
1479 out.replace(p,2,"\n");
1488 } // eo descape(const std::string&,int,int&)
1491 string escape_shellarg(const string &input)
1493 string output = "'";
1494 string::const_iterator it, it_end = input.end();
1495 for (it = input.begin(); it != it_end; ++it)