3 * (c) Copyright 2007-2008 by Intra2net AG
19 #include <stringfunc.hxx>
30 const std::string hexDigitsLower("0123456789abcdef");
31 const std::string hexDigitsUpper("0123456789ABCDEF");
36 char operator() (char c)
38 return std::toupper(c);
40 }; // eo struct UpperFunc
45 char operator() (char c)
47 return std::tolower(c);
49 }; // eo struct LowerFunc
52 } // eo namespace <anonymous>
57 * default list of Whitespaces (" \t\r\n");
59 const std::string Whitespaces = " \t\r\n";
62 * default list of lineendings ("\r\n");
64 const std::string LineEndings= "\r\n";
69 * @brief checks if a string begins with a given prefix.
70 * @param[in,out] str the string which is tested
71 * @param prefix the prefix which should be tested for.
72 * @return @a true iff the prefix is not empty and the string begins with that prefix.
74 bool has_prefix(const std::string& str, const std::string& prefix)
76 if (prefix.empty() || str.empty() || str.size() < prefix.size() )
80 return str.compare(0, prefix.size(), prefix) == 0;
81 } // eo has_prefix(const std::string&,const std::string&)
85 * @brief checks if a string ends with a given suffix.
86 * @param[in,out] str the string which is tested
87 * @param suffix the suffix which should be tested for.
88 * @return @a true iff the suffix is not empty and the string ends with that suffix.
90 bool has_suffix(const std::string& str, const std::string& suffix)
92 if (suffix.empty() || str.empty() || str.size() < suffix.size() )
96 return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
97 } // eo has_suffix(const std::string&,const std::string&)
101 * cut off characters from a given list from front and end of a string.
102 * @param[in,out] str the string which should be trimmed.
103 * @param charlist the list of characters to remove from beginning and end of string
104 * @return the result string.
106 std::string trim_mod(std::string& str, const std::string& charlist)
108 // first: trim the beginning:
109 std::string::size_type pos= str.find_first_not_of (charlist);
110 if (pos == std::string::npos)
112 // whole string consists of charlist (or is already empty)
118 // str starts with charlist
121 // now let's look at the tail:
122 pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char!
123 if ( pos < str.size() )
125 str.erase(pos, str.size()-pos);
128 } // eo trim_mod(std::string&,const std::string&)
133 * removes last character from a string when it is in a list of chars to be removed.
134 * @param[in,out] str the string.
135 * @param what the list of chars which will be tested for.
136 * @return the resulting string with last char removed (if applicable)
138 std::string chomp_mod(std::string& str, const std::string& what)
140 if (str.empty() || what.empty() )
144 if (what.find(str.at (str.size()-1) ) != std::string::npos)
146 str.erase(str.size() - 1);
149 } // eo chomp_mod(std::string&,const std::string&)
153 * @brief converts a string to lower case.
154 * @param[in,out] str the string to modify.
157 std::string to_lower_mod(std::string& str)
159 std::transform(str.begin(), str.end(), str.begin(), LowerFunc() );
161 } // eo to_lower_mod(std::string&)
165 * @brief converts a string to upper case.
166 * @param[in,out] str the string to modify.
169 std::string to_upper_mod(std::string& str)
171 std::transform( str.begin(), str.end(), str.begin(), UpperFunc() );
173 } // eo to_upper_mod(std::string&)
178 * cut off characters from a given list from front and end of a string.
179 * @param str the string which should be trimmed.
180 * @param charlist the list of characters to remove from beginning and end of string
181 * @return the result string.
183 std::string trim (const std::string& str, const std::string& charlist)
185 // first: trim the beginning:
186 std::string::size_type pos0= str.find_first_not_of(charlist);
187 if (pos0 == std::string::npos)
189 // whole string consists of charlist (or is already empty)
190 return std::string();
192 // now let's look at the end:
193 std::string::size_type pos1= str.find_last_not_of(charlist);
194 return str.substr(pos0, pos1 - pos0 + 1);
195 } // eo trim(const std:.string&,const std::string&)
199 * removes last character from a string when it is in a list of chars to be removed.
200 * @param str the string.
201 * @param what the list of chars which will be tested for.
202 * @return the resulting string with last char removed (if applicable)
204 std::string chomp (const std::string& str, const std::string& what)
206 if (str.empty() || what.empty() )
210 if (what.find(str.at (str.size()-1) ) != std::string::npos)
212 return str.substr(0, str.size()-1);
215 } // eo chomp(const std:.string&,const std::string&)
219 * @brief returns a lower case version of a given string.
220 * @param str the string
221 * @return the lower case version of the string
223 std::string to_lower (const std::string& str)
225 std::string result(str);
226 return to_lower_mod(result);
227 } // eo to_lower(const std::string&)
231 * @brief returns a upper case version of a given string.
232 * @param str the string
233 * @return the upper case version of the string
235 std::string to_upper(const std::string& str)
237 std::string result(str);
238 return to_upper_mod(result);
239 } // eo to_upper(const std::string&)
244 * @brief removes a given suffix from a string.
245 * @param str the string.
246 * @param suffix the suffix which should be removed if the string ends with it.
247 * @return the string without the suffix.
249 * If the string ends with the suffix, it is removed. If the the string doesn't end
250 * with the suffix the original string is returned.
252 std::string remove_suffix(const std::string& str, const std::string& suffix)
254 if (has_suffix(str,suffix) )
256 return str.substr(0, str.size()-suffix.size() );
259 } // eo remove_suffix(const std::string&,const std::string&)
264 * @brief removes a given prefix from a string.
265 * @param str the string.
266 * @param prefix the prefix which should be removed if the string begins with it.
267 * @return the string without the prefix.
269 * If the string begins with the prefix, it is removed. If the the string doesn't begin
270 * with the prefix the original string is returned.
272 std::string remove_prefix(const std::string& str, const std::string& prefix)
274 if (has_prefix(str,prefix) )
276 return str.substr( prefix.size() );
279 } // eo remove_prefix(const std::string&,const std::string&)
283 * split a string to key and value delimited by a given delimiter.
284 * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end).
285 * @param str the string which should be splitted.
286 * @param[out] key the resulting key
287 * @param[out] value the resulting value
288 * @param delimiter the delimiter between key and value; default is '='.
289 * @return @a true if the split was successful.
292 const std::string& str,
297 std::string::size_type pos = str.find (delimiter);
298 if (pos == std::string::npos) return false;
299 key= str.substr(0,pos);
300 value= str.substr(pos+1);
304 } // eo pair_split(const std::string&,std::string&,std::string&,char)
308 * splits a string by given delimiter
310 * @param[in] str the string which should be splitted.
311 * @param[out] result the list resulting from splitting @a str.
312 * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted.
313 * @param[in] omit_empty should empty parts not be stored?
314 * @param[in] trim_list list of characters the parts should be trimmed by.
315 * (empty string results in no trim)
318 const std::string& str,
319 std::list<std::string>& result,
320 const std::string& delimiter,
322 const std::string& trim_list
325 std::string::size_type pos, last_pos=0;
326 bool delimiter_found= false;
327 while ( last_pos < str.size() && last_pos != std::string::npos)
329 pos= str.find(delimiter, last_pos);
331 if (pos == std::string::npos)
333 part= str.substr(last_pos);
334 delimiter_found= false;
338 part= str.substr(last_pos, pos-last_pos);
339 delimiter_found=true;
341 if (pos != std::string::npos)
343 last_pos= pos+ delimiter.size();
347 last_pos= std::string::npos;
349 if (!trim_list.empty() ) trim_mod (part, trim_list);
350 if (omit_empty && part.empty() ) continue;
351 result.push_back( part );
353 // if the string ends with a delimiter we need to append an empty string if no omit_empty
355 // (this way we keep the split result consistent to a join operation)
356 if (delimiter_found && !omit_empty)
358 result.push_back("");
360 } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&)
364 * splits a string by a given delimiter
365 * @param str the string which should be splitted.
366 * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted.
367 * @param[in] omit_empty should empty parts not be stored?
368 * @param[in] trim_list list of characters the parts should be trimmed by.
369 * (empty string results in no trim)
370 * @return the list resulting from splitting @a str.
372 std::list<std::string> split_string(
373 const std::string& str,
374 const std::string& delimiter,
376 const std::string& trim_list
379 std::list<std::string> result;
380 split_string(str, result, delimiter, omit_empty, trim_list);
382 } // eo split_string(const std::string&,const std::string&,bool,const std::string&)
386 * @brief joins a list of strings into a single string.
388 * This funtion is (basically) the reverse operation of @a split_string.
390 * @param parts the list of strings.
391 * @param delimiter the delimiter which is inserted between the strings.
392 * @return the joined string.
394 std::string join_string(
395 const std::list< std::string >& parts,
396 const std::string& delimiter
400 if (! parts.empty() )
402 std::list< std::string >::const_iterator it= parts.begin();
404 while ( ++it != parts.end() )
411 } // eo join_string(const std::list< std::string >&,const std::string&)
421 * @brief returns a hex string from a binary string.
422 * @param str the (binary) string
423 * @param upper_case_digits determine whether to use upper case characters for digits A-F.
424 * @return the string in hex notation.
426 std::string convert_binary_to_hex(
427 const std::string& str,
428 bool upper_case_digits
432 std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower);
433 for ( std::string::const_iterator it= str.begin();
437 result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] );
438 result.push_back( hexDigits[ (*it) & 0x0f ] );
441 } // eo convert_binary_to_hex(const std::string&,bool)
445 * @brief converts a hex digit string to binary string.
446 * @param str hex digit string
447 * @return the binary string.
449 * The hex digit string may contains white spaces or colons which are treated
450 * as delimiters between hex digit groups.
452 * @todo rework the handling of half nibbles (consistency)!
454 std::string convert_hex_to_binary(
455 const std::string& str
457 throw (std::runtime_error)
461 bool hasNibble= false;
462 bool lastWasWS= true;
463 for ( std::string::const_iterator it= str.begin();
467 std::string::size_type p = hexDigitsLower.find( *it );
468 if (p== std::string::npos)
470 p= hexDigitsUpper.find( *it );
472 if (p == std::string::npos)
474 if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace?
475 or ( *it == ':') // or a colon?
478 // we treat that as a valid delimiter:
481 // 1 nibble before WS is treate as lower part:
490 if (p == std::string::npos )
492 throw runtime_error("illegal character in hex digit string: " + str);
506 //we already had a nibble, so a char is complete now:
507 result.push_back( c );
512 // this is the first nibble of a new char:
518 //well, there is one nibble left
519 // let's do some heuristics:
522 // if the preceeding character was a white space (or a colon)
523 // we treat the nibble as lower part:
524 //( this is consistent with shortened hex notations where leading zeros are not noted)
525 result.push_back( c );
529 // if it was part of a hex digit chain, we treat it as UPPER part (!!)
530 result.push_back( c << 4 );
534 } // eo convert_hex_to_binary(const std::string&)
537 } // eo namespace I2n
542 std::string iso_to_utf8(const std::string& isostring)
546 iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1");
548 if (iso_to_utf8 == (iconv_t)-1)
549 throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
551 size_t in_size=isostring.size();
552 size_t out_size=in_size*4;
554 char *buf = (char *)malloc(out_size+1);
556 throw runtime_error("out of memory for iconv buffer");
558 char *in = (char *)isostring.c_str();
560 iconv(i2utf8, &in, &in_size, &out, &out_size);
562 buf[isostring.size()*4-out_size]=0;
572 std::string utf8_to_iso(const std::string& utf8string)
576 iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8");
578 if (utf82iso == (iconv_t)-1)
579 throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
581 size_t in_size=utf8string.size();
582 size_t out_size=in_size;
584 char *buf = (char *)malloc(out_size+1);
586 throw runtime_error("out of memory for iconv buffer");
588 char *in = (char *)utf8string.c_str();
590 iconv(utf82iso, &in, &in_size, &out, &out_size);
592 buf[utf8string.size()-out_size]=0;
597 iconv_close(utf82iso);
602 wchar_t* utf8_to_wbuf(const std::string& utf8string)
604 iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8");
606 if (utf82wstr == (iconv_t)-1)
607 throw runtime_error("iconv can't convert from UTF-8 to UCS-4");
609 size_t in_size=utf8string.size();
610 size_t out_size= (in_size+1)*sizeof(wchar_t);
612 wchar_t *buf = (wchar_t *)malloc(out_size);
614 throw runtime_error("out of memory for iconv buffer");
616 char *in = (char *)utf8string.c_str();
617 char *out = (char*) buf;
618 if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == -1)
619 throw runtime_error("error converting char encodings");
621 buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0;
623 iconv_close(utf82wstr);
628 std::string utf7imap_to_utf8(const std::string& utf7imapstring)
632 iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP");
634 if (utf7imap2utf8 == (iconv_t)-1)
635 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
637 size_t in_size=utf7imapstring.size();
638 size_t out_size=in_size*4;
640 char *buf = (char *)malloc(out_size+1);
642 throw runtime_error("out of memory for iconv buffer");
644 char *in = (char *)utf7imapstring.c_str();
646 iconv(utf7imap2utf8, &in, &in_size, &out, &out_size);
648 buf[utf7imapstring.size()*4-out_size]=0;
653 iconv_close(utf7imap2utf8);
658 std::string utf8_to_utf7imap(const std::string& utf8string)
662 iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8");
664 if (utf82utf7imap == (iconv_t)-1)
665 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
667 // UTF-7 is base64 encoded, a buffer 10x as large
668 // as the utf-8 buffer should be enough. If not the string will be truncated.
669 size_t in_size=utf8string.size();
670 size_t out_size=in_size*10;
672 char *buf = (char *)malloc(out_size+1);
674 throw runtime_error("out of memory for iconv buffer");
676 char *in = (char *)utf8string.c_str();
678 iconv(utf82utf7imap, &in, &in_size, &out, &out_size);
680 buf[utf8string.size()*10-out_size]= 0;
685 iconv_close(utf82utf7imap);
690 // Tokenize string by (html) tags
691 void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
693 string::size_type pos, len = input.size();
694 bool inside_tag = false;
697 for (pos = 0; pos < len; pos++)
699 if (input[pos] == '<')
703 if (!current.empty() )
705 tokenized.push_back( make_pair(current, false) );
709 current += input[pos];
711 else if (input[pos] == '>' && inside_tag)
713 current += input[pos];
715 if (!current.empty() )
717 tokenized.push_back( make_pair(current, true) );
722 current += input[pos];
725 // String left over in buffer?
726 if (!current.empty() )
727 tokenized.push_back( make_pair(current, false) );
728 } // eo tokenize_by_tag
731 std::string strip_html_tags(const std::string &input)
733 // Pair first: string, second: isTag
734 vector<pair<string,bool> > tokenized;
735 tokenize_by_tag (tokenized, input);
738 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
739 for (token = tokenized.begin(); token != tokens_end; token++)
741 output += token->first;
744 } // eo strip_html_tags
747 // Smart-encode HTML en
748 string smart_html_entities(const std::string &input)
750 // Pair first: string, second: isTag
751 vector<pair<string,bool> > tokenized;
752 tokenize_by_tag (tokenized, input);
755 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
756 for (token = tokenized.begin(); token != tokens_end; token++)
758 // keep HTML tags as they are
760 output += token->first;
762 output += html_entities(token->first);
769 string::size_type find_8bit(const std::string &str)
771 string::size_type l=str.size();
772 for (string::size_type p=0; p < l; p++)
773 if (static_cast<unsigned char>(str[p]) > 127)
779 // encoded UTF-8 chars into HTML entities
780 string html_entities(std::string str)
783 replace_all (str, "&", "&");
784 replace_all (str, "<", "<");
785 replace_all (str, ">", ">");
786 replace_all (str, "\"", """);
787 replace_all (str, "'", "'");
788 replace_all (str, "/", "/");
791 replace_all (str, "\xC3\xA4", "ä");
792 replace_all (str, "\xC3\xB6", "ö");
793 replace_all (str, "\xC3\xBC", "ü");
794 replace_all (str, "\xC3\x84", "Ä");
795 replace_all (str, "\xC3\x96", "Ö");
796 replace_all (str, "\xC3\x9C", "Ü");
799 replace_all (str, "\xC3\x9F", "ß");
801 // conversion of remaining non-ASCII chars needed?
802 // just do if needed because of performance
803 if (find_8bit(str) != string::npos)
805 // convert to fixed-size encoding UTF-32
806 wchar_t* wbuf=utf8_to_wbuf(str);
807 ostringstream target;
809 // replace all non-ASCII chars with HTML representation
810 for (int p=0; wbuf[p] != 0; p++)
812 unsigned int c=wbuf[p];
815 target << static_cast<unsigned char>(c);
817 target << "&#" << c << ';';
826 } // eo html_entities(std::string)
829 bool replace_all(string &base, const char *ist, const char *soll)
833 return replace_all(base,&i,&s);
836 bool replace_all(string &base, const string &ist, const char *soll)
839 return replace_all(base,&ist,&s);
842 bool replace_all(string &base, const string *ist, const string *soll)
844 return replace_all(base,*ist,*soll);
847 bool replace_all(string &base, const char *ist, const string *soll)
850 return replace_all(base,&i,soll);
853 bool replace_all(string &base, const string &ist, const string &soll)
855 bool found_ist = false;
856 string::size_type a=0;
859 throw runtime_error ("replace_all called with empty search string");
861 while ( (a=base.find(ist,a) ) != string::npos)
863 base.replace(a,ist.size(),soll);
872 string to_lower(const string &src)
876 string::size_type pos, end = dst.size();
877 for (pos = 0; pos < end; pos++)
878 dst[pos] = tolower(dst[pos]);
883 string to_upper(const string &src)
887 string::size_type pos, end = dst.size();
888 for (pos = 0; pos < end; pos++)
889 dst[pos] = toupper(dst[pos]);
896 const int MAX_SYMBOL_FORMATS = 9;
898 const string symbolFormatShort[MAX_SYMBOL_FORMATS] = {
910 const string symbolFormatLong[MAX_SYMBOL_FORMATS] = {
922 string nice_unit_format(
924 const UnitSystem system,
925 const UnitSymbolFormat symbolformat
928 // select the system of units (decimal or binary)
939 long double size = input;
941 // check the size of the input number to fit in the appropriate symbol
943 while (size > multiple)
945 size = size / multiple;
949 // round the input number
953 tmp = (int64_t) (tmp);
954 tmp = (long double) (tmp) / (long double) (10);
957 // format the input number, placing the appropriate symbol
959 out.setf (ios::fixed);
960 if (symbolformat == USF_SHORT)
963 out << size << i18n( symbolFormatShort[sizecount].c_str() );
968 out << size << i18n( symbolFormatLong[sizecount].c_str() );
972 } // eo nice_unit_format(int input)
975 string format_kb(long long bytes)
978 long double calcTraffic = bytes;
981 * Solange durch 1024 Teilen bis
982 * der naechste Teilschritt < 1 waere oder
983 * Abbruch bei Maximalumrechnung ->TB
985 while (calcTraffic > 1024 && sizecount<=6) {
986 calcTraffic = calcTraffic / 1024;
990 long double tmp; // round
991 tmp = calcTraffic*10;
993 tmp = (long long) (tmp);
994 tmp = (long double)(tmp)/(long double)(10);
999 out.setf (ios::fixed);
1001 switch (sizecount) {
1003 out << calcTraffic << " KB";
1006 out << calcTraffic << " MB";
1009 out << calcTraffic << " GB";
1012 out << calcTraffic << " TB";
1015 out << calcTraffic << " PB";
1018 out << calcTraffic << " EB";
1021 out << calcTraffic << " B";
1029 string escape(const string &s)
1032 string::size_type p;
1035 while ( (p=out.find_first_of("\"\\",p) ) !=out.npos)
1037 out.insert (p,"\\");
1042 while ( (p=out.find_first_of("\r",p) ) !=out.npos)
1044 out.replace (p,1,"\\r");
1049 while ( (p=out.find_first_of("\n",p) ) !=out.npos)
1051 out.replace (p,1,"\\n");
1058 } // eo scape(const std::string&)
1061 string descape(const string &s, int startpos, int &endpos)
1065 if (s.at(startpos) != '"')
1066 throw out_of_range("value not type escaped string");
1068 out=s.substr(startpos+1);
1069 string::size_type p=0;
1071 // search for the end of the string
1072 while ( (p=out.find("\"",p) ) !=out.npos)
1077 // the " might be escaped with a backslash
1078 while (e>=0 && out.at (e) =='\\')
1080 if (escaped == false)
1094 // we now have the end of the string
1095 out=out.substr(0,p);
1097 // tell calling prog about the endposition
1098 endpos=startpos+p+1;
1100 // descape all \ stuff inside the string now
1102 while ( (p=out.find_first_of("\\",p) ) !=out.npos)
1104 switch (out.at(p+1) )
1107 out.replace(p,2,"\r");
1110 out.replace(p,2,"\n");
1119 } // eo descape(const std::string&,int,int&)
1122 string escape_shellarg(const string &input)
1124 string output = "'";
1125 string::const_iterator it, it_end = input.end();
1126 for (it = input.begin(); it != it_end; it++)