2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
22 * (c) Copyright 2007-2008 by Intra2net AG
36 #include <stringfunc.hxx>
47 const std::string hexDigitsLower("0123456789abcdef");
48 const std::string hexDigitsUpper("0123456789ABCDEF");
53 char operator() (char c)
55 return std::toupper(c);
57 }; // eo struct UpperFunc
62 char operator() (char c)
64 return std::tolower(c);
66 }; // eo struct LowerFunc
69 } // eo namespace <anonymous>
74 * default list of Whitespaces (" \t\r\n");
76 const std::string Whitespaces = " \t\r\n";
79 * default list of lineendings ("\r\n");
81 const std::string LineEndings= "\r\n";
86 * @brief checks if a string begins with a given prefix.
87 * @param[in,out] str the string which is tested
88 * @param prefix the prefix which should be tested for.
89 * @return @a true iff the prefix is not empty and the string begins with that prefix.
91 bool has_prefix(const std::string& str, const std::string& prefix)
93 if (prefix.empty() || str.empty() || str.size() < prefix.size() )
97 return str.compare(0, prefix.size(), prefix) == 0;
98 } // eo has_prefix(const std::string&,const std::string&)
102 * @brief checks if a string ends with a given suffix.
103 * @param[in,out] str the string which is tested
104 * @param suffix the suffix which should be tested for.
105 * @return @a true iff the suffix is not empty and the string ends with that suffix.
107 bool has_suffix(const std::string& str, const std::string& suffix)
109 if (suffix.empty() || str.empty() || str.size() < suffix.size() )
113 return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
114 } // eo has_suffix(const std::string&,const std::string&)
118 * cut off characters from a given list from front and end of a string.
119 * @param[in,out] str the string which should be trimmed.
120 * @param charlist the list of characters to remove from beginning and end of string
121 * @return the result string.
123 std::string trim_mod(std::string& str, const std::string& charlist)
125 // first: trim the beginning:
126 std::string::size_type pos= str.find_first_not_of (charlist);
127 if (pos == std::string::npos)
129 // whole string consists of charlist (or is already empty)
135 // str starts with charlist
138 // now let's look at the tail:
139 pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char!
140 if ( pos < str.size() )
142 str.erase(pos, str.size()-pos);
145 } // eo trim_mod(std::string&,const std::string&)
150 * removes last character from a string when it is in a list of chars to be removed.
151 * @param[in,out] str the string.
152 * @param what the list of chars which will be tested for.
153 * @return the resulting string with last char removed (if applicable)
155 std::string chomp_mod(std::string& str, const std::string& what)
157 if (str.empty() || what.empty() )
161 if (what.find(str.at (str.size()-1) ) != std::string::npos)
163 str.erase(str.size() - 1);
166 } // eo chomp_mod(std::string&,const std::string&)
170 * @brief converts a string to lower case.
171 * @param[in,out] str the string to modify.
174 std::string to_lower_mod(std::string& str)
176 std::transform(str.begin(), str.end(), str.begin(), LowerFunc() );
178 } // eo to_lower_mod(std::string&)
182 * @brief converts a string to upper case.
183 * @param[in,out] str the string to modify.
186 std::string to_upper_mod(std::string& str)
188 std::transform( str.begin(), str.end(), str.begin(), UpperFunc() );
190 } // eo to_upper_mod(std::string&)
195 * cut off characters from a given list from front and end of a string.
196 * @param str the string which should be trimmed.
197 * @param charlist the list of characters to remove from beginning and end of string
198 * @return the result string.
200 std::string trim (const std::string& str, const std::string& charlist)
202 // first: trim the beginning:
203 std::string::size_type pos0= str.find_first_not_of(charlist);
204 if (pos0 == std::string::npos)
206 // whole string consists of charlist (or is already empty)
207 return std::string();
209 // now let's look at the end:
210 std::string::size_type pos1= str.find_last_not_of(charlist);
211 return str.substr(pos0, pos1 - pos0 + 1);
212 } // eo trim(const std:.string&,const std::string&)
216 * removes last character from a string when it is in a list of chars to be removed.
217 * @param str the string.
218 * @param what the list of chars which will be tested for.
219 * @return the resulting string with last char removed (if applicable)
221 std::string chomp (const std::string& str, const std::string& what)
223 if (str.empty() || what.empty() )
227 if (what.find(str.at (str.size()-1) ) != std::string::npos)
229 return str.substr(0, str.size()-1);
232 } // eo chomp(const std:.string&,const std::string&)
236 * @brief returns a lower case version of a given string.
237 * @param str the string
238 * @return the lower case version of the string
240 std::string to_lower (const std::string& str)
242 std::string result(str);
243 return to_lower_mod(result);
244 } // eo to_lower(const std::string&)
248 * @brief returns a upper case version of a given string.
249 * @param str the string
250 * @return the upper case version of the string
252 std::string to_upper(const std::string& str)
254 std::string result(str);
255 return to_upper_mod(result);
256 } // eo to_upper(const std::string&)
261 * @brief removes a given suffix from a string.
262 * @param str the string.
263 * @param suffix the suffix which should be removed if the string ends with it.
264 * @return the string without the suffix.
266 * If the string ends with the suffix, it is removed. If the the string doesn't end
267 * with the suffix the original string is returned.
269 std::string remove_suffix(const std::string& str, const std::string& suffix)
271 if (has_suffix(str,suffix) )
273 return str.substr(0, str.size()-suffix.size() );
276 } // eo remove_suffix(const std::string&,const std::string&)
281 * @brief removes a given prefix from a string.
282 * @param str the string.
283 * @param prefix the prefix which should be removed if the string begins with it.
284 * @return the string without the prefix.
286 * If the string begins with the prefix, it is removed. If the the string doesn't begin
287 * with the prefix the original string is returned.
289 std::string remove_prefix(const std::string& str, const std::string& prefix)
291 if (has_prefix(str,prefix) )
293 return str.substr( prefix.size() );
296 } // eo remove_prefix(const std::string&,const std::string&)
300 * split a string to key and value delimited by a given delimiter.
301 * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end).
302 * @param str the string which should be splitted.
303 * @param[out] key the resulting key
304 * @param[out] value the resulting value
305 * @param delimiter the delimiter between key and value; default is '='.
306 * @return @a true if the split was successful.
309 const std::string& str,
314 std::string::size_type pos = str.find (delimiter);
315 if (pos == std::string::npos) return false;
316 key= str.substr(0,pos);
317 value= str.substr(pos+1);
321 } // eo pair_split(const std::string&,std::string&,std::string&,char)
325 * splits a string by given delimiter
327 * @param[in] str the string which should be splitted.
328 * @param[out] result the list resulting from splitting @a str.
329 * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted.
330 * @param[in] omit_empty should empty parts not be stored?
331 * @param[in] trim_list list of characters the parts should be trimmed by.
332 * (empty string results in no trim)
335 const std::string& str,
336 std::list<std::string>& result,
337 const std::string& delimiter,
339 const std::string& trim_list
342 std::string::size_type pos, last_pos=0;
343 bool delimiter_found= false;
344 while ( last_pos < str.size() && last_pos != std::string::npos)
346 pos= str.find(delimiter, last_pos);
348 if (pos == std::string::npos)
350 part= str.substr(last_pos);
351 delimiter_found= false;
355 part= str.substr(last_pos, pos-last_pos);
356 delimiter_found=true;
358 if (pos != std::string::npos)
360 last_pos= pos+ delimiter.size();
364 last_pos= std::string::npos;
366 if (!trim_list.empty() ) trim_mod (part, trim_list);
367 if (omit_empty && part.empty() ) continue;
368 result.push_back( part );
370 // if the string ends with a delimiter we need to append an empty string if no omit_empty
372 // (this way we keep the split result consistent to a join operation)
373 if (delimiter_found && !omit_empty)
375 result.push_back("");
377 } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&)
381 * splits a string by a given delimiter
382 * @param str the string which should be splitted.
383 * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted.
384 * @param[in] omit_empty should empty parts not be stored?
385 * @param[in] trim_list list of characters the parts should be trimmed by.
386 * (empty string results in no trim)
387 * @return the list resulting from splitting @a str.
389 std::list<std::string> split_string(
390 const std::string& str,
391 const std::string& delimiter,
393 const std::string& trim_list
396 std::list<std::string> result;
397 split_string(str, result, delimiter, omit_empty, trim_list);
399 } // eo split_string(const std::string&,const std::string&,bool,const std::string&)
403 * @brief joins a list of strings into a single string.
405 * This funtion is (basically) the reverse operation of @a split_string.
407 * @param parts the list of strings.
408 * @param delimiter the delimiter which is inserted between the strings.
409 * @return the joined string.
411 std::string join_string(
412 const std::list< std::string >& parts,
413 const std::string& delimiter
417 if (! parts.empty() )
419 std::list< std::string >::const_iterator it= parts.begin();
421 while ( ++it != parts.end() )
428 } // eo join_string(const std::list< std::string >&,const std::string&)
438 * @brief returns a hex string from a binary string.
439 * @param str the (binary) string
440 * @param upper_case_digits determine whether to use upper case characters for digits A-F.
441 * @return the string in hex notation.
443 std::string convert_binary_to_hex(
444 const std::string& str,
445 bool upper_case_digits
449 std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower);
450 for ( std::string::const_iterator it= str.begin();
454 result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] );
455 result.push_back( hexDigits[ (*it) & 0x0f ] );
458 } // eo convert_binary_to_hex(const std::string&,bool)
462 * @brief converts a hex digit string to binary string.
463 * @param str hex digit string
464 * @return the binary string.
466 * The hex digit string may contains white spaces or colons which are treated
467 * as delimiters between hex digit groups.
469 * @todo rework the handling of half nibbles (consistency)!
471 std::string convert_hex_to_binary(
472 const std::string& str
474 throw (std::runtime_error)
478 bool hasNibble= false;
479 bool lastWasWS= true;
480 for ( std::string::const_iterator it= str.begin();
484 std::string::size_type p = hexDigitsLower.find( *it );
485 if (p== std::string::npos)
487 p= hexDigitsUpper.find( *it );
489 if (p == std::string::npos)
491 if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace?
492 or ( *it == ':') // or a colon?
495 // we treat that as a valid delimiter:
498 // 1 nibble before WS is treate as lower part:
507 if (p == std::string::npos )
509 throw runtime_error("illegal character in hex digit string: " + str);
523 //we already had a nibble, so a char is complete now:
524 result.push_back( c );
529 // this is the first nibble of a new char:
535 //well, there is one nibble left
536 // let's do some heuristics:
539 // if the preceeding character was a white space (or a colon)
540 // we treat the nibble as lower part:
541 //( this is consistent with shortened hex notations where leading zeros are not noted)
542 result.push_back( c );
546 // if it was part of a hex digit chain, we treat it as UPPER part (!!)
547 result.push_back( c << 4 );
551 } // eo convert_hex_to_binary(const std::string&)
554 } // eo namespace I2n
559 std::string iso_to_utf8(const std::string& isostring)
563 iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1");
565 if (iso_to_utf8 == (iconv_t)-1)
566 throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
568 size_t in_size=isostring.size();
569 size_t out_size=in_size*4;
571 char *buf = (char *)malloc(out_size+1);
573 throw runtime_error("out of memory for iconv buffer");
575 char *in = (char *)isostring.c_str();
577 iconv(i2utf8, &in, &in_size, &out, &out_size);
579 buf[isostring.size()*4-out_size]=0;
589 std::string utf8_to_iso(const std::string& utf8string)
593 iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8");
595 if (utf82iso == (iconv_t)-1)
596 throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
598 size_t in_size=utf8string.size();
599 size_t out_size=in_size;
601 char *buf = (char *)malloc(out_size+1);
603 throw runtime_error("out of memory for iconv buffer");
605 char *in = (char *)utf8string.c_str();
607 iconv(utf82iso, &in, &in_size, &out, &out_size);
609 buf[utf8string.size()-out_size]=0;
614 iconv_close(utf82iso);
619 wchar_t* utf8_to_wbuf(const std::string& utf8string)
621 iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8");
623 if (utf82wstr == (iconv_t)-1)
624 throw runtime_error("iconv can't convert from UTF-8 to UCS-4");
626 size_t in_size=utf8string.size();
627 size_t out_size= (in_size+1)*sizeof(wchar_t);
629 wchar_t *buf = (wchar_t *)malloc(out_size);
631 throw runtime_error("out of memory for iconv buffer");
633 char *in = (char *)utf8string.c_str();
634 char *out = (char*) buf;
635 if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1)
636 throw runtime_error("error converting char encodings");
638 buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0;
640 iconv_close(utf82wstr);
645 std::string utf7imap_to_utf8(const std::string& utf7imapstring)
649 iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP");
651 if (utf7imap2utf8 == (iconv_t)-1)
652 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
654 size_t in_size=utf7imapstring.size();
655 size_t out_size=in_size*4;
657 char *buf = (char *)malloc(out_size+1);
659 throw runtime_error("out of memory for iconv buffer");
661 char *in = (char *)utf7imapstring.c_str();
663 iconv(utf7imap2utf8, &in, &in_size, &out, &out_size);
665 buf[utf7imapstring.size()*4-out_size]=0;
670 iconv_close(utf7imap2utf8);
675 std::string utf8_to_utf7imap(const std::string& utf8string)
679 iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8");
681 if (utf82utf7imap == (iconv_t)-1)
682 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
684 // UTF-7 is base64 encoded, a buffer 10x as large
685 // as the utf-8 buffer should be enough. If not the string will be truncated.
686 size_t in_size=utf8string.size();
687 size_t out_size=in_size*10;
689 char *buf = (char *)malloc(out_size+1);
691 throw runtime_error("out of memory for iconv buffer");
693 char *in = (char *)utf8string.c_str();
695 iconv(utf82utf7imap, &in, &in_size, &out, &out_size);
697 buf[utf8string.size()*10-out_size]= 0;
702 iconv_close(utf82utf7imap);
707 // Tokenize string by (html) tags
708 void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
710 string::size_type pos, len = input.size();
711 bool inside_tag = false;
714 for (pos = 0; pos < len; pos++)
716 if (input[pos] == '<')
720 if (!current.empty() )
722 tokenized.push_back( make_pair(current, false) );
726 current += input[pos];
728 else if (input[pos] == '>' && inside_tag)
730 current += input[pos];
732 if (!current.empty() )
734 tokenized.push_back( make_pair(current, true) );
739 current += input[pos];
742 // String left over in buffer?
743 if (!current.empty() )
744 tokenized.push_back( make_pair(current, false) );
745 } // eo tokenize_by_tag
748 std::string strip_html_tags(const std::string &input)
750 // Pair first: string, second: isTag
751 vector<pair<string,bool> > tokenized;
752 tokenize_by_tag (tokenized, input);
755 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
756 for (token = tokenized.begin(); token != tokens_end; ++token)
758 output += token->first;
761 } // eo strip_html_tags
764 // Smart-encode HTML en
765 string smart_html_entities(const std::string &input)
767 // Pair first: string, second: isTag
768 vector<pair<string,bool> > tokenized;
769 tokenize_by_tag (tokenized, input);
772 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
773 for (token = tokenized.begin(); token != tokens_end; ++token)
775 // keep HTML tags as they are
777 output += token->first;
779 output += html_entities(token->first);
786 string::size_type find_8bit(const std::string &str)
788 string::size_type l=str.size();
789 for (string::size_type p=0; p < l; p++)
790 if (static_cast<unsigned char>(str[p]) > 127)
796 // encoded UTF-8 chars into HTML entities
797 string html_entities(std::string str)
800 replace_all (str, "&", "&");
801 replace_all (str, "<", "<");
802 replace_all (str, ">", ">");
803 replace_all (str, "\"", """);
804 replace_all (str, "'", "'");
805 replace_all (str, "/", "/");
808 replace_all (str, "\xC3\xA4", "ä");
809 replace_all (str, "\xC3\xB6", "ö");
810 replace_all (str, "\xC3\xBC", "ü");
811 replace_all (str, "\xC3\x84", "Ä");
812 replace_all (str, "\xC3\x96", "Ö");
813 replace_all (str, "\xC3\x9C", "Ü");
816 replace_all (str, "\xC3\x9F", "ß");
818 // conversion of remaining non-ASCII chars needed?
819 // just do if needed because of performance
820 if (find_8bit(str) != string::npos)
822 // convert to fixed-size encoding UTF-32
823 wchar_t* wbuf=utf8_to_wbuf(str);
824 ostringstream target;
826 // replace all non-ASCII chars with HTML representation
827 for (int p=0; wbuf[p] != 0; p++)
829 unsigned int c=wbuf[p];
832 target << static_cast<unsigned char>(c);
834 target << "&#" << c << ';';
843 } // eo html_entities(std::string)
846 bool replace_all(string &base, const char *ist, const char *soll)
850 return replace_all(base,&i,&s);
853 bool replace_all(string &base, const string &ist, const char *soll)
856 return replace_all(base,&ist,&s);
859 bool replace_all(string &base, const string *ist, const string *soll)
861 return replace_all(base,*ist,*soll);
864 bool replace_all(string &base, const char *ist, const string *soll)
867 return replace_all(base,&i,soll);
870 bool replace_all(string &base, const string &ist, const string &soll)
872 bool found_ist = false;
873 string::size_type a=0;
876 throw runtime_error ("replace_all called with empty search string");
878 while ( (a=base.find(ist,a) ) != string::npos)
880 base.replace(a,ist.size(),soll);
889 string to_lower(const string &src)
893 string::size_type pos, end = dst.size();
894 for (pos = 0; pos < end; pos++)
895 dst[pos] = tolower(dst[pos]);
900 string to_upper(const string &src)
904 string::size_type pos, end = dst.size();
905 for (pos = 0; pos < end; pos++)
906 dst[pos] = toupper(dst[pos]);
912 const int MAX_UNIT_FORMAT_SYMBOLS = 6;
914 const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
923 const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
925 i18n_noop(" KBytes"),
926 i18n_noop(" MBytes"),
927 i18n_noop(" GBytes"),
928 i18n_noop(" TBytes"),
933 long double rounding_upwards(
935 const int rounding_multiplier
938 long double rounded_number;
939 rounded_number = number * rounding_multiplier;
940 rounded_number += 0.5;
941 rounded_number = (int64_t) (rounded_number);
942 rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier);
944 return rounded_number;
948 string nice_unit_format(
950 const UnitFormat format,
954 // select the system of units (decimal or binary)
956 if (base == UnitBase1000)
965 long double size = input;
967 // check the size of the input number to fit in the appropriate symbol
969 while (size > multiple)
971 size = size / multiple;
974 // rollback to the previous values and stop the loop when cannot
975 // represent the number length.
976 if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS)
978 size = size * multiple;
984 // round the input number "half up" to multiples of 10
985 const int rounding_multiplier = 10;
986 size = rounding_upwards(size, rounding_multiplier);
988 // format the input number, placing the appropriate symbol
990 out.setf (ios::fixed);
991 if (format == ShortUnitFormat)
994 out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() );
999 out << size << i18n( longUnitFormatSymbols[sizecount].c_str() );
1003 } // eo nice_unit_format(int input)
1006 string escape(const string &s)
1009 string::size_type p;
1012 while ( (p=out.find_first_of("\"\\",p) ) !=out.npos)
1014 out.insert (p,"\\");
1019 while ( (p=out.find_first_of("\r",p) ) !=out.npos)
1021 out.replace (p,1,"\\r");
1026 while ( (p=out.find_first_of("\n",p) ) !=out.npos)
1028 out.replace (p,1,"\\n");
1035 } // eo scape(const std::string&)
1038 string descape(const string &s, int startpos, int &endpos)
1042 if (s.at(startpos) != '"')
1043 throw out_of_range("value not type escaped string");
1045 out=s.substr(startpos+1);
1046 string::size_type p=0;
1048 // search for the end of the string
1049 while ( (p=out.find("\"",p) ) !=out.npos)
1054 // the " might be escaped with a backslash
1055 while (e>=0 && out.at (e) =='\\')
1057 if (escaped == false)
1071 // we now have the end of the string
1072 out=out.substr(0,p);
1074 // tell calling prog about the endposition
1075 endpos=startpos+p+1;
1077 // descape all \ stuff inside the string now
1079 while ( (p=out.find_first_of("\\",p) ) !=out.npos)
1081 switch (out.at(p+1) )
1084 out.replace(p,2,"\r");
1087 out.replace(p,2,"\n");
1096 } // eo descape(const std::string&,int,int&)
1099 string escape_shellarg(const string &input)
1101 string output = "'";
1102 string::const_iterator it, it_end = input.end();
1103 for (it = input.begin(); it != it_end; ++it)