developer.intra2net.com Git - libi2ncommon/blob - src/stringfunc.cpp

   1 /** @file
   2  *
   3  * (c) Copyright 2007-2008 by Intra2net AG
   4  *
   5  * info@intra2net.com
   6  */
   7
   8 #include <iostream>
   9 #include <string>
  10 #include <sstream>
  11 #include <stdexcept>
  12
  13 #include <wchar.h>
  14 #include <stdlib.h>
  15 #include <iconv.h>
  16 #include <i18n.h>
  17
  18 #include <stringfunc.hxx>
  19
  20 using namespace std;
  21
  22 namespace I2n
  23 {
  24
  25
  26 namespace
  27 {
  28
  29 const std::string hexDigitsLower("0123456789abcdef");
  30 const std::string hexDigitsUpper("0123456789ABCDEF");
  31
  32
  33 struct UpperFunc
  34 {
  35    char operator() (char c)
  36    {
  37       return std::toupper(c);
  38    }
  39 }; // eo struct UpperFunc
  40
  41
  42 struct LowerFunc
  43 {
  44    char operator() (char c)
  45    {
  46       return std::tolower(c);
  47    }
  48 }; // eo struct LowerFunc
  49
  50
  51 } // eo namespace <anonymous>
  52
  53
  54
  55 /**
  56  * default list of Whitespaces (" \t\r\n");
  57  */
  58 const std::string Whitespaces = " \t\r\n";
  59
  60 /**
  61  * default list of lineendings ("\r\n");
  62  */
  63 const std::string LineEndings= "\r\n";
  64
  65
  66
  67 /**
  68  * @brief checks if a string begins with a given prefix.
  69  * @param[in,out] str the string which is tested
  70  * @param prefix the prefix which should be tested for.
  71  * @return @a true iff the prefix is not empty and the string begins with that prefix.
  72  */
  73 bool has_prefix(const std::string& str, const std::string& prefix)
  74 {
  75    if (prefix.empty() || str.empty() || str.size() < prefix.size() )
  76    {
  77       return false;
  78    }
  79    return str.compare(0, prefix.size(), prefix) == 0;
  80 } // eo has_prefix(const std::string&,const std::string&)
  81
  82
  83 /**
  84  * @brief checks if a string ends with a given suffix.
  85  * @param[in,out] str the string which is tested
  86  * @param suffix the suffix which should be tested for.
  87  * @return @a true iff the suffix is not empty and the string ends with that suffix.
  88  */
  89 bool has_suffix(const std::string& str, const std::string& suffix)
  90 {
  91    if (suffix.empty() || str.empty() || str.size() < suffix.size() )
  92    {
  93       return false;
  94    }
  95    return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
  96 } // eo has_suffix(const std::string&,const std::string&)
  97
  98
  99 /**
 100  * cut off characters from a given list from front and end of a string.
 101  * @param[in,out] str the string which should be trimmed.
 102  * @param charlist the list of characters to remove from beginning and end of string
 103  * @return the result string.
 104  */
 105 std::string trim_mod(std::string& str, const std::string& charlist)
 106 {
 107    // first: trim the beginning:
 108    std::string::size_type pos= str.find_first_not_of (charlist);
 109    if (pos == std::string::npos)
 110    {
 111       // whole string consists of charlist (or is already empty)
 112       str.clear();
 113       return str;
 114    }
 115    else if (pos>0)
 116    {
 117       // str starts with charlist
 118       str.erase(0,pos);
 119    }
 120    // now let's look at the tail:
 121    pos= str.find_last_not_of(charlist) +1;  // note: we already know there is at least one other char!
 122    if ( pos < str.size() )
 123    {
 124       str.erase(pos, str.size()-pos);
 125    }
 126    return str;
 127 } // eo trim_mod(std::string&,const std::string&)
 128
 129
 130
 131 /**
 132  * removes last character from a string when it is in a list of chars to be removed.
 133  * @param[in,out] str the string.
 134  * @param what the list of chars which will be tested for.
 135  * @return the resulting string with last char removed (if applicable)
 136  */
 137 std::string chomp_mod(std::string& str, const std::string& what)
 138 {
 139    if (str.empty() || what.empty() )
 140    {
 141       return str;
 142    }
 143    if (what.find(str.at (str.size()-1) ) != std::string::npos)
 144    {
 145       str.erase(str.size() - 1);
 146    }
 147    return str;
 148 } // eo chomp_mod(std::string&,const std::string&)
 149
 150
 151 /**
 152  * @brief converts a string to lower case.
 153  * @param[in,out] str the string to modify.
 154  * @return the string
 155  */
 156 std::string to_lower_mod(std::string& str)
 157 {
 158    std::transform(str.begin(), str.end(), str.begin(), LowerFunc() );
 159    return str;
 160 } // eo to_lower_mod(std::string&)
 161
 162
 163 /**
 164  * @brief converts a string to upper case.
 165  * @param[in,out] str the string to modify.
 166  * @return the string
 167  */
 168 std::string to_upper_mod(std::string& str)
 169 {
 170    std::transform( str.begin(), str.end(), str.begin(), UpperFunc() );
 171    return str;
 172 } // eo to_upper_mod(std::string&)
 173
 174
 175
 176 /**
 177  * cut off characters from a given list from front and end of a string.
 178  * @param str the string which should be trimmed.
 179  * @param charlist the list of characters to remove from beginning and end of string
 180  * @return the result string.
 181  */
 182 std::string trim (const std::string& str, const std::string& charlist)
 183 {
 184    // first: trim the beginning:
 185    std::string::size_type pos0= str.find_first_not_of(charlist);
 186    if (pos0 == std::string::npos)
 187    {
 188       // whole string consists of charlist (or is already empty)
 189       return std::string();
 190    }
 191    // now let's look at the end:
 192    std::string::size_type pos1= str.find_last_not_of(charlist);
 193    return str.substr(pos0, pos1 - pos0 + 1);
 194 } // eo trim(const std:.string&,const std::string&)
 195
 196
 197 /**
 198  * removes last character from a string when it is in a list of chars to be removed.
 199  * @param str the string.
 200  * @param what the list of chars which will be tested for.
 201  * @return the resulting string with last char removed (if applicable)
 202  */
 203 std::string chomp (const std::string& str, const std::string& what)
 204 {
 205    if (str.empty() || what.empty() )
 206    {
 207       return str;
 208    }
 209    if (what.find(str.at (str.size()-1) ) != std::string::npos)
 210    {
 211       return str.substr(0, str.size()-1);
 212    }
 213    return str;
 214 } // eo chomp(const std:.string&,const std::string&)
 215
 216
 217 /**
 218  * @brief returns a lower case version of a given string.
 219  * @param str the string
 220  * @return the lower case version of the string
 221  */
 222 std::string to_lower (const std::string& str)
 223 {
 224    std::string result(str);
 225    return to_lower_mod(result);
 226 } // eo to_lower(const std::string&)
 227
 228
 229 /**
 230  * @brief returns a upper case version of a given string.
 231  * @param str the string
 232  * @return the upper case version of the string
 233  */
 234 std::string to_upper(const std::string& str)
 235 {
 236    std::string result(str);
 237    return to_upper_mod(result);
 238 } // eo to_upper(const std::string&)
 239
 240
 241
 242 /**
 243  * @brief removes a given suffix from a string.
 244  * @param str the string.
 245  * @param suffix the suffix which should be removed if the string ends with it.
 246  * @return the string without the suffix.
 247  *
 248  * If the string ends with the suffix, it is removed. If the the string doesn't end
 249  * with the suffix the original string is returned.
 250  */
 251 std::string remove_suffix(const std::string& str, const std::string& suffix)
 252 {
 253    if (has_suffix(str,suffix) )
 254    {
 255       return str.substr(0, str.size()-suffix.size() );
 256    }
 257    return str;
 258 } // eo remove_suffix(const std::string&,const std::string&)
 259
 260
 261
 262 /**
 263  * @brief removes a given prefix from a string.
 264  * @param str the string.
 265  * @param prefix the prefix which should be removed if the string begins with it.
 266  * @return the string without the prefix.
 267  *
 268  * If the string begins with the prefix, it is removed. If the the string doesn't begin
 269  * with the prefix the original string is returned.
 270  */
 271 std::string remove_prefix(const std::string& str, const std::string& prefix)
 272 {
 273    if (has_prefix(str,prefix) )
 274    {
 275       return str.substr( prefix.size() );
 276    }
 277    return str;
 278 } // eo remove_prefix(const std::string&,const std::string&)
 279
 280
 281 /**
 282  * split a string to key and value delimited by a given delimiter.
 283  * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end).
 284  * @param str the string which should be splitted.
 285  * @param[out] key the resulting key
 286  * @param[out] value the resulting value
 287  * @param delimiter the delimiter between key and value; default is '='.
 288  * @return @a true if the split was successful.
 289  */
 290 bool pair_split(
 291    const std::string& str,
 292    std::string& key,
 293    std::string& value,
 294    char delimiter)
 295 {
 296    std::string::size_type pos = str.find (delimiter);
 297    if (pos == std::string::npos) return false;
 298    key= str.substr(0,pos);
 299    value= str.substr(pos+1);
 300    trim_mod(key);
 301    trim_mod(value);
 302    return true;
 303 } // eo pair_split(const std::string&,std::string&,std::string&,char)
 304
 305
 306 /**
 307  * splits a string by given delimiter
 308  *
 309  * @param[in] str the string which should be splitted.
 310  * @param[out] result the list resulting from splitting  @a str.
 311  * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted.
 312  * @param[in] omit_empty should empty parts not be stored?
 313  * @param[in] trim_list list of characters the parts should be trimmed by.
 314  *  (empty string results in no trim)
 315  */
 316 void split_string(
 317    const std::string& str,
 318    std::list<std::string>& result,
 319    const std::string& delimiter,
 320    bool omit_empty,
 321    const std::string& trim_list
 322 )
 323 {
 324    std::string::size_type pos, last_pos=0;
 325    bool delimiter_found= false;
 326    while ( last_pos < str.size()  && last_pos != std::string::npos)
 327    {
 328       pos= str.find(delimiter, last_pos);
 329       std::string part;
 330       if (pos == std::string::npos)
 331       {
 332          part= str.substr(last_pos);
 333          delimiter_found= false;
 334       }
 335       else
 336       {
 337          part= str.substr(last_pos, pos-last_pos);
 338          delimiter_found=true;
 339       }
 340       if (pos != std::string::npos)
 341       {
 342          last_pos= pos+ delimiter.size();
 343       }
 344       else
 345       {
 346          last_pos= std::string::npos;
 347       }
 348       if (!trim_list.empty() ) trim_mod (part, trim_list);
 349       if (omit_empty && part.empty() ) continue;
 350       result.push_back( part );
 351    }
 352    // if the string ends with a delimiter we need to append an empty string if no omit_empty
 353    // was given.
 354    // (this way we keep the split result consistent to a join operation)
 355    if (delimiter_found && !omit_empty)
 356    {
 357       result.push_back("");
 358    }
 359 } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&)
 360
 361
 362 /**
 363  * splits a string by a given delimiter
 364  * @param str the string which should be splitted.
 365  * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted.
 366  * @param[in] omit_empty should empty parts not be stored?
 367  * @param[in] trim_list list of characters the parts should be trimmed by.
 368  *  (empty string results in no trim)
 369  * @return the list resulting from splitting @a str.
 370  */
 371 std::list<std::string> split_string(
 372    const std::string& str,
 373    const std::string& delimiter,
 374    bool omit_empty,
 375    const std::string& trim_list
 376 )
 377 {
 378    std::list<std::string> result;
 379    split_string(str, result, delimiter, omit_empty, trim_list);
 380    return result;
 381 } // eo split_string(const std::string&,const std::string&,bool,const std::string&)
 382
 383
 384 /**
 385  * @brief joins a list of strings into a single string.
 386  *
 387  * This funtion is (basically) the reverse operation of @a split_string.
 388  *
 389  * @param parts the list of strings.
 390  * @param delimiter the delimiter which is inserted between the strings.
 391  * @return the joined string.
 392  */
 393 std::string join_string(
 394    const std::list< std::string >& parts,
 395    const std::string& delimiter
 396 )
 397 {
 398    std::string result;
 399    if (! parts.empty() )
 400    {
 401       std::list< std::string >::const_iterator it= parts.begin();
 402       result = *it;
 403       while ( ++it != parts.end() )
 404       {
 405          result+= delimiter;
 406          result+= *it;
 407       }
 408    }
 409    return result;
 410 } // eo join_string(const std::list< std::string >&,const std::string&)
 411
 412
 413
 414 /*
 415 ** conversions
 416 */
 417
 418
 419 /**
 420  * @brief returns a hex string from a binary string.
 421  * @param str the (binary) string
 422  * @param upper_case_digits determine whether to use upper case characters for digits A-F.
 423  * @return the string in hex notation.
 424  */
 425 std::string convert_binary_to_hex(
 426    const std::string& str,
 427    bool upper_case_digits
 428 )
 429 {
 430    std::string result;
 431    std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower);
 432    for ( std::string::const_iterator it= str.begin();
 433          it != str.end();
 434          ++it)
 435    {
 436       result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] );
 437       result.push_back( hexDigits[ (*it) & 0x0f ] );
 438    }
 439    return result;
 440 } // eo convert_binary_to_hex(const std::string&,bool)
 441
 442
 443 /**
 444  * @brief converts a hex digit string to binary string.
 445  * @param str hex digit string
 446  * @return the binary string.
 447  *
 448  * The hex digit string may contains white spaces or colons which are treated
 449  * as delimiters between hex digit groups.
 450  *
 451  * @todo rework the handling of half nibbles (consistency)!
 452  */
 453 std::string convert_hex_to_binary(
 454    const std::string& str
 455 )
 456 throw (std::runtime_error)
 457 {
 458    std::string result;
 459    char c= 0;
 460    bool hasNibble= false;
 461    bool lastWasWS= true;
 462    for ( std::string::const_iterator it= str.begin();
 463          it != str.end();
 464          ++it)
 465    {
 466       std::string::size_type p = hexDigitsLower.find( *it );
 467       if (p== std::string::npos)
 468       {
 469          p= hexDigitsUpper.find( *it );
 470       }
 471       if (p == std::string::npos)
 472       {
 473          if (   ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace?
 474                 or ( *it == ':') // or a colon?
 475             )
 476          {
 477             // we treat that as a valid delimiter:
 478             if (hasNibble)
 479             {
 480                // 1 nibble before WS is treate as lower part:
 481                result.push_back(c);
 482                // reset state:
 483                hasNibble= false;
 484             }
 485             lastWasWS= true;
 486             continue;
 487          }
 488       }
 489       if (p == std::string::npos )
 490       {
 491          throw runtime_error("illegal character in hex digit string: " + str);
 492       }
 493       lastWasWS= false;
 494       if (hasNibble)
 495       {
 496          c<<=4;
 497       }
 498       else
 499       {
 500          c=0;
 501       }
 502       c+= (p & 0x0f);
 503       if (hasNibble)
 504       {
 505          //we already had a nibble, so a char is complete now:
 506          result.push_back( c );
 507          hasNibble=false;
 508       }
 509       else
 510       {
 511          // this is the first nibble of a new char:
 512          hasNibble=true;
 513       }
 514    }
 515    if (hasNibble)
 516    {
 517       //well, there is one nibble left
 518       // let's do some heuristics:
 519       if (lastWasWS)
 520       {
 521          // if the preceeding character was a white space (or a colon)
 522          // we treat the nibble as lower part:
 523          //( this is consistent with shortened hex notations where leading zeros are not noted)
 524          result.push_back( c );
 525       }
 526       else
 527       {
 528          // if it was part of a hex digit chain, we treat it as UPPER part (!!)
 529          result.push_back( c << 4 );
 530       }
 531    }
 532    return result;
 533 } // eo convert_hex_to_binary(const std::string&)
 534
 535
 536 } // eo namespace I2n
 537
 538
 539
 540
 541 std::string iso_to_utf8(const std::string& isostring)
 542 {
 543    string result;
 544
 545    iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1");
 546
 547    if (iso_to_utf8 == (iconv_t)-1)
 548       throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
 549
 550    size_t in_size=isostring.size();
 551    size_t out_size=in_size*4;
 552
 553    char *buf = (char *)malloc(out_size+1);
 554    if (buf == NULL)
 555       throw runtime_error("out of memory for iconv buffer");
 556
 557    const char *in = isostring.c_str();
 558    char *out = buf;
 559    iconv(i2utf8, &in, &in_size, &out, &out_size);
 560
 561    buf[isostring.size()*4-out_size]=0;
 562
 563    result=buf;
 564
 565    free(buf);
 566    iconv_close(i2utf8);
 567
 568    return result;
 569 }
 570
 571 std::string utf8_to_iso(const std::string& utf8string)
 572 {
 573    string result;
 574
 575    iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8");
 576
 577    if (utf82iso == (iconv_t)-1)
 578       throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
 579
 580    size_t in_size=utf8string.size();
 581    size_t out_size=in_size;
 582
 583    char *buf = (char *)malloc(out_size+1);
 584    if (buf == NULL)
 585       throw runtime_error("out of memory for iconv buffer");
 586
 587    const char *in = utf8string.c_str();
 588    char *out = buf;
 589    iconv(utf82iso, &in, &in_size, &out, &out_size);
 590
 591    buf[utf8string.size()-out_size]=0;
 592
 593    result=buf;
 594
 595    free(buf);
 596    iconv_close(utf82iso);
 597
 598    return result;
 599 }
 600
 601 wchar_t* utf8_to_wbuf(const std::string& utf8string)
 602 {
 603    iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8");
 604
 605    if (utf82wstr == (iconv_t)-1)
 606       throw runtime_error("iconv can't convert from UTF-8 to UCS-4");
 607
 608    size_t in_size=utf8string.size();
 609    size_t out_size= (in_size+1)*sizeof(wchar_t);
 610
 611    wchar_t *buf = (wchar_t *)malloc(out_size);
 612    if (buf == NULL)
 613       throw runtime_error("out of memory for iconv buffer");
 614
 615    const char *in = utf8string.c_str();
 616    char *out = (char*) buf;
 617    if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == -1)
 618       throw runtime_error("error converting char encodings");
 619
 620    buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0;
 621
 622    iconv_close(utf82wstr);
 623
 624    return buf;
 625 }
 626
 627 std::string utf7imap_to_utf8(const std::string& utf7imapstring)
 628 {
 629    string result;
 630
 631    iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP");
 632
 633    if (utf7imap2utf8 == (iconv_t)-1)
 634       throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
 635
 636    size_t in_size=utf7imapstring.size();
 637    size_t out_size=in_size*4;
 638
 639    char *buf = (char *)malloc(out_size+1);
 640    if (buf == NULL)
 641       throw runtime_error("out of memory for iconv buffer");
 642
 643    const char *in = utf7imapstring.c_str();
 644    char *out = buf;
 645    iconv(utf7imap2utf8, &in, &in_size, &out, &out_size);
 646
 647    buf[utf7imapstring.size()*4-out_size]=0;
 648
 649    result=buf;
 650
 651    free(buf);
 652    iconv_close(utf7imap2utf8);
 653
 654    return result;
 655 }
 656
 657 std::string utf8_to_utf7imap(const std::string& utf8string)
 658 {
 659    string result;
 660
 661    iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8");
 662
 663    if (utf82utf7imap == (iconv_t)-1)
 664       throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
 665
 666    // UTF-7 is base64 encoded, a buffer 10x as large
 667    // as the utf-8 buffer should be enough. If not the string will be truncated.
 668    size_t in_size=utf8string.size();
 669    size_t out_size=in_size*10;
 670
 671    char *buf = (char *)malloc(out_size+1);
 672    if (buf == NULL)
 673       throw runtime_error("out of memory for iconv buffer");
 674
 675    const char *in = utf8string.c_str();
 676    char *out = buf;
 677    iconv(utf82utf7imap, &in, &in_size, &out, &out_size);
 678
 679    buf[utf8string.size()*10-out_size]= 0;
 680
 681    result=buf;
 682
 683    free(buf);
 684    iconv_close(utf82utf7imap);
 685
 686    return result;
 687 }
 688
 689 // Tokenize string by (html) tags
 690 void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
 691 {
 692    string::size_type pos, len = input.size();
 693    bool inside_tag = false;
 694    string current;
 695
 696    for (pos = 0; pos < len; pos++)
 697    {
 698       if (input[pos] == '<')
 699       {
 700          inside_tag = true;
 701
 702          if (!current.empty() )
 703          {
 704             tokenized.push_back( make_pair(current, false) );
 705             current = "";
 706          }
 707
 708          current += input[pos];
 709       }
 710       else if (input[pos] == '>' && inside_tag)
 711       {
 712          current += input[pos];
 713          inside_tag = false;
 714          if (!current.empty() )
 715          {
 716             tokenized.push_back( make_pair(current, true) );
 717             current = "";
 718          }
 719       }
 720       else
 721          current += input[pos];
 722    }
 723
 724    // String left over in buffer?
 725    if (!current.empty() )
 726       tokenized.push_back( make_pair(current, false) );
 727 } // eo tokenize_by_tag
 728
 729
 730 std::string strip_html_tags(const std::string &input)
 731 {
 732    // Pair first: string, second: isTag
 733    vector<pair<string,bool> > tokenized;
 734    tokenize_by_tag (tokenized, input);
 735
 736    string output;
 737    vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
 738    for (token = tokenized.begin(); token != tokens_end; token++)
 739       if (!token->second)
 740          output += token->first;
 741
 742    return output;
 743 } // eo strip_html_tags
 744
 745
 746 // Smart-encode HTML en
 747 string smart_html_entities(const std::string &input)
 748 {
 749    // Pair first: string, second: isTag
 750    vector<pair<string,bool> > tokenized;
 751    tokenize_by_tag (tokenized, input);
 752
 753    string output;
 754    vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
 755    for (token = tokenized.begin(); token != tokens_end; token++)
 756    {
 757       // keep HTML tags as they are
 758       if (token->second)
 759          output += token->first;
 760       else
 761          output += html_entities(token->first);
 762    }
 763
 764    return output;
 765 }
 766
 767
 768 string::size_type find_8bit(const std::string &str)
 769 {
 770    string::size_type l=str.size();
 771    for (string::size_type p=0; p < l; p++)
 772       if (static_cast<unsigned char>(str[p]) > 127)
 773          return p;
 774
 775    return string::npos;
 776 }
 777
 778 // encoded UTF-8 chars into HTML entities
 779 string html_entities(std::string str)
 780 {
 781    // Normal chars
 782    replace_all (str, "&", "&amp;");
 783    replace_all (str, "\"", "&quot;");
 784    replace_all (str, "<", "&lt;");
 785    replace_all (str, ">", "&gt;");
 786
 787    // Umlauts
 788    replace_all (str, "\xC3\xA4", "&auml;");
 789    replace_all (str, "\xC3\xB6", "&ouml;");
 790    replace_all (str, "\xC3\xBC", "&uuml;");
 791    replace_all (str, "\xC3\x84", "&Auml;");
 792    replace_all (str, "\xC3\x96", "&Ouml;");
 793    replace_all (str, "\xC3\x9C", "&Uuml;");
 794
 795    // Misc
 796    replace_all (str, "\xC3\x9F", "&szlig;");
 797
 798    // conversion of remaining non-ASCII chars needed?
 799    // just do if needed because of performance
 800    if (find_8bit(str) != string::npos)
 801    {
 802       // convert to fixed-size encoding UTF-32
 803       wchar_t* wbuf=utf8_to_wbuf(str);
 804       ostringstream target;
 805
 806       // replace all non-ASCII chars with HTML representation
 807       for (int p=0; wbuf[p] != 0; p++)
 808       {
 809          unsigned int c=wbuf[p];
 810
 811          if (c <= 127)
 812             target << static_cast<unsigned char>(c);
 813          else
 814             target << "&#" << c << ';';
 815       }
 816
 817       free(wbuf);
 818
 819       str=target.str();
 820    }
 821
 822    return str;
 823 } // eo html_entities(std::string)
 824
 825
 826 bool replace_all(string &base, const char *ist, const char *soll)
 827 {
 828    string i=ist;
 829    string s=soll;
 830    return replace_all(base,&i,&s);
 831 }
 832
 833 bool replace_all(string &base, const string &ist, const char *soll)
 834 {
 835    string s=soll;
 836    return replace_all(base,&ist,&s);
 837 }
 838
 839 bool replace_all(string &base, const string *ist, const string *soll)
 840 {
 841    return replace_all(base,*ist,*soll);
 842 }
 843
 844 bool replace_all(string &base, const char *ist, const string *soll)
 845 {
 846    string i=ist;
 847    return replace_all(base,&i,soll);
 848 }
 849
 850 bool replace_all(string &base, const string &ist, const string &soll)
 851 {
 852    bool found_ist = false;
 853    string::size_type a=0;
 854
 855    if (ist.empty() )
 856       throw runtime_error ("replace_all called with empty search string");
 857
 858    while ( (a=base.find(ist,a) ) != string::npos)
 859    {
 860       base.replace(a,ist.size(),soll);
 861       a=a+soll.size();
 862       found_ist = true;
 863    }
 864
 865    return found_ist;
 866 }
 867
 868 #if 0
 869 string to_lower(const string &src)
 870 {
 871    string dst = src;
 872
 873    string::size_type pos, end = dst.size();
 874    for (pos = 0; pos < end; pos++)
 875       dst[pos] = tolower(dst[pos]);
 876
 877    return dst;
 878 }
 879
 880 string to_upper(const string &src)
 881 {
 882    string dst = src;
 883
 884    string::size_type pos, end = dst.size();
 885    for (pos = 0; pos < end; pos++)
 886       dst[pos] = toupper(dst[pos]);
 887
 888    return dst;
 889 }
 890 #endif
 891
 892 string nice_unit_format(int input)
 893 {
 894    float size = input;
 895    int sizecount = 0;
 896
 897    while (size > 1000)
 898    {
 899       size = size / 1000;
 900       sizecount++;
 901    }
 902
 903    float tmp;                       // round
 904    tmp = size*10;
 905    tmp += 0.5;
 906    tmp = int (tmp);
 907    tmp = float (tmp) /float (10);
 908    size = tmp;
 909
 910    ostringstream out;
 911
 912    out.setf (ios::fixed);
 913    out.precision (2);
 914    switch (sizecount)
 915    {
 916       case 0:
 917          out << size << i18n (" Bytes");
 918          break;
 919       case 1:
 920          out << size << i18n (" KBytes");
 921          break;
 922       case 2:
 923          out << size << i18n (" MBytes");
 924          break;
 925       case 3:
 926          out << size << i18n (" GBytes");
 927          break;
 928       case 4:
 929          out << size << i18n (" TBytes");
 930          break;
 931       case 5:
 932          out << size << i18n (" PBytes");
 933          break;
 934       case 6:
 935          out << size << i18n (" EBytes");
 936          break;
 937       case 7:
 938          out << size << i18n (" ZBytes");
 939          break;
 940       case 8:
 941          out << size << i18n (" YBytes");
 942          break;
 943       default:
 944          out << size << "*10^" << (sizecount*3)<< i18n (" Bytes");
 945          break;
 946    }
 947
 948    return out.str();
 949 } // eo nice_unit_format(int input)
 950
 951
 952 string escape(const string &s)
 953 {
 954    string out(s);
 955    string::size_type p;
 956
 957    p=0;
 958    while ( (p=out.find_first_of("\"\\",p) ) !=out.npos)
 959    {
 960       out.insert (p,"\\");
 961       p+=2;
 962    }
 963
 964    p=0;
 965    while ( (p=out.find_first_of("\r",p) ) !=out.npos)
 966    {
 967       out.replace (p,1,"\\r");
 968       p+=2;
 969    }
 970
 971    p=0;
 972    while ( (p=out.find_first_of("\n",p) ) !=out.npos)
 973    {
 974       out.replace (p,1,"\\n");
 975       p+=2;
 976    }
 977
 978    out='"'+out+'"';
 979
 980    return out;
 981 } // eo scape(const std::string&)
 982
 983
 984 string descape(const string &s, int startpos, int &endpos)
 985 {
 986    string out;
 987
 988    if (s.at(startpos) != '"')
 989       throw out_of_range("value not type escaped string");
 990
 991    out=s.substr(startpos+1);
 992    string::size_type p=0;
 993
 994    // search for the end of the string
 995    while ( (p=out.find("\"",p) ) !=out.npos)
 996    {
 997       int e=p-1;
 998       bool escaped=false;
 999
1000       // the " might be escaped with a backslash
1001       while (e>=0 && out.at (e) =='\\')
1002       {
1003          if (escaped == false)
1004             escaped=true;
1005          else
1006             escaped=false;
1007
1008          e--;
1009       }
1010
1011       if (escaped==false)
1012          break;
1013       else
1014          p++;
1015    }
1016
1017    // we now have the end of the string
1018    out=out.substr(0,p);
1019
1020    // tell calling prog about the endposition
1021    endpos=startpos+p+1;
1022
1023    // descape all \ stuff inside the string now
1024    p=0;
1025    while ( (p=out.find_first_of("\\",p) ) !=out.npos)
1026    {
1027       switch (out.at(p+1) )
1028       {
1029          case 'r':
1030             out.replace(p,2,"\r");
1031             break;
1032          case 'n':
1033             out.replace(p,2,"\n");
1034             break;
1035          default:
1036             out.erase(p,1);
1037       }
1038       p++;
1039    }
1040
1041    return out;
1042 } // eo descape(const std::string&,int,int&)
1043
1044
1045 string escape_shellarg(const string &input)
1046 {
1047    string output = "'";
1048    string::const_iterator it, it_end = input.end();
1049    for (it = input.begin(); it != it_end; it++)
1050    {
1051       if ( (*it) == '\'')
1052          output += "'\\'";
1053
1054       output += *it;
1055    }
1056
1057    output += "'";
1058    return output;
1059 }