| 1 | /* |
| 2 | The software in this package is distributed under the GNU General |
| 3 | Public License version 2 (with a special exception described below). |
| 4 | |
| 5 | A copy of GNU General Public License (GPL) is included in this distribution, |
| 6 | in the file COPYING.GPL. |
| 7 | |
| 8 | As a special exception, if other files instantiate templates or use macros |
| 9 | or inline functions from this file, or you compile this file and link it |
| 10 | with other works to produce a work based on this file, this file |
| 11 | does not by itself cause the resulting work to be covered |
| 12 | by the GNU General Public License. |
| 13 | |
| 14 | However the source code for this file must still be made available |
| 15 | in accordance with section (3) of the GNU General Public License. |
| 16 | |
| 17 | This exception does not invalidate any other reasons why a work based |
| 18 | on this file might be covered by the GNU General Public License. |
| 19 | */ |
| 20 | /** @file |
| 21 | * |
| 22 | * (c) Copyright 2007-2008 by Intra2net AG |
| 23 | */ |
| 24 | |
| 25 | #include <iostream> |
| 26 | #include <string> |
| 27 | #include <sstream> |
| 28 | #include <stdexcept> |
| 29 | #include <algorithm> |
| 30 | #include <cmath> // for round() |
| 31 | #include <climits> |
| 32 | |
| 33 | #include <wchar.h> |
| 34 | #include <stdlib.h> |
| 35 | #include <iconv.h> |
| 36 | #include <i18n.h> |
| 37 | |
| 38 | #include <boost/numeric/conversion/cast.hpp> |
| 39 | #include <boost/foreach.hpp> |
| 40 | |
| 41 | #include <boost/assert.hpp> |
| 42 | #include <boost/shared_ptr.hpp> |
| 43 | #include <openssl/bio.h> |
| 44 | #include <openssl/evp.h> |
| 45 | |
| 46 | #include <stringfunc.hxx> |
| 47 | |
| 48 | using namespace std; |
| 49 | |
| 50 | namespace I2n |
| 51 | { |
| 52 | |
| 53 | |
| 54 | namespace |
| 55 | { |
| 56 | |
| 57 | const std::string hexDigitsLower("0123456789abcdef"); |
| 58 | const std::string hexDigitsUpper("0123456789ABCDEF"); |
| 59 | |
| 60 | |
| 61 | struct UpperFunc |
| 62 | { |
| 63 | char operator() (char c) |
| 64 | { |
| 65 | return std::toupper(c); |
| 66 | } |
| 67 | }; // eo struct UpperFunc |
| 68 | |
| 69 | |
| 70 | struct LowerFunc |
| 71 | { |
| 72 | char operator() (char c) |
| 73 | { |
| 74 | return std::tolower(c); |
| 75 | } |
| 76 | }; // eo struct LowerFunc |
| 77 | |
| 78 | |
| 79 | } // eo namespace <anonymous> |
| 80 | |
| 81 | |
| 82 | |
| 83 | /** |
| 84 | * default list of Whitespaces (" \t\r\n"); |
| 85 | */ |
| 86 | const std::string Whitespaces = " \t\r\n"; |
| 87 | |
| 88 | /** |
| 89 | * default list of lineendings ("\r\n"); |
| 90 | */ |
| 91 | const std::string LineEndings= "\r\n"; |
| 92 | |
| 93 | |
| 94 | |
| 95 | /** |
| 96 | * @brief checks if a string begins with a given prefix. |
| 97 | * @param[in,out] str the string which is tested |
| 98 | * @param prefix the prefix which should be tested for. |
| 99 | * @return @a true iff the prefix is not empty and the string begins with that prefix. |
| 100 | */ |
| 101 | bool has_prefix(const std::string& str, const std::string& prefix) |
| 102 | { |
| 103 | if (prefix.empty() || str.empty() || str.size() < prefix.size() ) |
| 104 | { |
| 105 | return false; |
| 106 | } |
| 107 | return str.compare(0, prefix.size(), prefix) == 0; |
| 108 | } // eo has_prefix(const std::string&,const std::string&) |
| 109 | |
| 110 | |
| 111 | /** |
| 112 | * @brief checks if a string ends with a given suffix. |
| 113 | * @param[in,out] str the string which is tested |
| 114 | * @param suffix the suffix which should be tested for. |
| 115 | * @return @a true iff the suffix is not empty and the string ends with that suffix. |
| 116 | */ |
| 117 | bool has_suffix(const std::string& str, const std::string& suffix) |
| 118 | { |
| 119 | if (suffix.empty() || str.empty() || str.size() < suffix.size() ) |
| 120 | { |
| 121 | return false; |
| 122 | } |
| 123 | return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; |
| 124 | } // eo has_suffix(const std::string&,const std::string&) |
| 125 | |
| 126 | |
| 127 | /** |
| 128 | * cut off characters from a given list from front and end of a string. |
| 129 | * @param[in,out] str the string which should be trimmed. |
| 130 | * @param charlist the list of characters to remove from beginning and end of string |
| 131 | * @return the result string. |
| 132 | */ |
| 133 | std::string trim_mod(std::string& str, const std::string& charlist) |
| 134 | { |
| 135 | // first: trim the beginning: |
| 136 | std::string::size_type pos= str.find_first_not_of (charlist); |
| 137 | if (pos == std::string::npos) |
| 138 | { |
| 139 | // whole string consists of charlist (or is already empty) |
| 140 | str.clear(); |
| 141 | return str; |
| 142 | } |
| 143 | else if (pos>0) |
| 144 | { |
| 145 | // str starts with charlist |
| 146 | str.erase(0,pos); |
| 147 | } |
| 148 | // now let's look at the tail: |
| 149 | pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char! |
| 150 | if ( pos < str.size() ) |
| 151 | { |
| 152 | str.erase(pos, str.size()-pos); |
| 153 | } |
| 154 | return str; |
| 155 | } // eo trim_mod(std::string&,const std::string&) |
| 156 | |
| 157 | |
| 158 | |
| 159 | /** |
| 160 | * removes last character from a string when it is in a list of chars to be removed. |
| 161 | * @param[in,out] str the string. |
| 162 | * @param what the list of chars which will be tested for. |
| 163 | * @return the resulting string with last char removed (if applicable) |
| 164 | */ |
| 165 | std::string chomp_mod(std::string& str, const std::string& what) |
| 166 | { |
| 167 | if (str.empty() || what.empty() ) |
| 168 | { |
| 169 | return str; |
| 170 | } |
| 171 | if (what.find(str.at (str.size()-1) ) != std::string::npos) |
| 172 | { |
| 173 | str.erase(str.size() - 1); |
| 174 | } |
| 175 | return str; |
| 176 | } // eo chomp_mod(std::string&,const std::string&) |
| 177 | |
| 178 | |
| 179 | /** |
| 180 | * @brief converts a string to lower case. |
| 181 | * @param[in,out] str the string to modify. |
| 182 | * @return the string |
| 183 | */ |
| 184 | std::string to_lower_mod(std::string& str) |
| 185 | { |
| 186 | std::transform(str.begin(), str.end(), str.begin(), LowerFunc() ); |
| 187 | return str; |
| 188 | } // eo to_lower_mod(std::string&) |
| 189 | |
| 190 | |
| 191 | /** |
| 192 | * @brief converts a string to upper case. |
| 193 | * @param[in,out] str the string to modify. |
| 194 | * @return the string |
| 195 | */ |
| 196 | std::string to_upper_mod(std::string& str) |
| 197 | { |
| 198 | std::transform( str.begin(), str.end(), str.begin(), UpperFunc() ); |
| 199 | return str; |
| 200 | } // eo to_upper_mod(std::string&) |
| 201 | |
| 202 | |
| 203 | |
| 204 | /** |
| 205 | * cut off characters from a given list from front and end of a string. |
| 206 | * @param str the string which should be trimmed. |
| 207 | * @param charlist the list of characters to remove from beginning and end of string |
| 208 | * @return the result string. |
| 209 | */ |
| 210 | std::string trim (const std::string& str, const std::string& charlist) |
| 211 | { |
| 212 | // first: trim the beginning: |
| 213 | std::string::size_type pos0= str.find_first_not_of(charlist); |
| 214 | if (pos0 == std::string::npos) |
| 215 | { |
| 216 | // whole string consists of charlist (or is already empty) |
| 217 | return std::string(); |
| 218 | } |
| 219 | // now let's look at the end: |
| 220 | std::string::size_type pos1= str.find_last_not_of(charlist); |
| 221 | return str.substr(pos0, pos1 - pos0 + 1); |
| 222 | } // eo trim(const std:.string&,const std::string&) |
| 223 | |
| 224 | |
| 225 | /** |
| 226 | * removes last character from a string when it is in a list of chars to be removed. |
| 227 | * @param str the string. |
| 228 | * @param what the list of chars which will be tested for. |
| 229 | * @return the resulting string with last char removed (if applicable) |
| 230 | */ |
| 231 | std::string chomp (const std::string& str, const std::string& what) |
| 232 | { |
| 233 | if (str.empty() || what.empty() ) |
| 234 | { |
| 235 | return str; |
| 236 | } |
| 237 | if (what.find(str.at (str.size()-1) ) != std::string::npos) |
| 238 | { |
| 239 | return str.substr(0, str.size()-1); |
| 240 | } |
| 241 | return str; |
| 242 | } // eo chomp(const std:.string&,const std::string&) |
| 243 | |
| 244 | |
| 245 | /** |
| 246 | * @brief returns a lower case version of a given string. |
| 247 | * @param str the string |
| 248 | * @return the lower case version of the string |
| 249 | */ |
| 250 | std::string to_lower (const std::string& str) |
| 251 | { |
| 252 | std::string result(str); |
| 253 | return to_lower_mod(result); |
| 254 | } // eo to_lower(const std::string&) |
| 255 | |
| 256 | |
| 257 | /** |
| 258 | * @brief returns a upper case version of a given string. |
| 259 | * @param str the string |
| 260 | * @return the upper case version of the string |
| 261 | */ |
| 262 | std::string to_upper(const std::string& str) |
| 263 | { |
| 264 | std::string result(str); |
| 265 | return to_upper_mod(result); |
| 266 | } // eo to_upper(const std::string&) |
| 267 | |
| 268 | |
| 269 | |
| 270 | /** |
| 271 | * @brief removes a given suffix from a string. |
| 272 | * @param str the string. |
| 273 | * @param suffix the suffix which should be removed if the string ends with it. |
| 274 | * @return the string without the suffix. |
| 275 | * |
| 276 | * If the string ends with the suffix, it is removed. If the the string doesn't end |
| 277 | * with the suffix the original string is returned. |
| 278 | */ |
| 279 | std::string remove_suffix(const std::string& str, const std::string& suffix) |
| 280 | { |
| 281 | if (has_suffix(str,suffix) ) |
| 282 | { |
| 283 | return str.substr(0, str.size()-suffix.size() ); |
| 284 | } |
| 285 | return str; |
| 286 | } // eo remove_suffix(const std::string&,const std::string&) |
| 287 | |
| 288 | |
| 289 | |
| 290 | /** |
| 291 | * @brief removes a given prefix from a string. |
| 292 | * @param str the string. |
| 293 | * @param prefix the prefix which should be removed if the string begins with it. |
| 294 | * @return the string without the prefix. |
| 295 | * |
| 296 | * If the string begins with the prefix, it is removed. If the the string doesn't begin |
| 297 | * with the prefix the original string is returned. |
| 298 | */ |
| 299 | std::string remove_prefix(const std::string& str, const std::string& prefix) |
| 300 | { |
| 301 | if (has_prefix(str,prefix) ) |
| 302 | { |
| 303 | return str.substr( prefix.size() ); |
| 304 | } |
| 305 | return str; |
| 306 | } // eo remove_prefix(const std::string&,const std::string&) |
| 307 | |
| 308 | |
| 309 | /** |
| 310 | * split a string to key and value delimited by a given delimiter. |
| 311 | * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end). |
| 312 | * @param str the string which should be splitted. |
| 313 | * @param[out] key the resulting key |
| 314 | * @param[out] value the resulting value |
| 315 | * @param delimiter the delimiter between key and value; default is '='. |
| 316 | * @return @a true if the split was successful. |
| 317 | */ |
| 318 | bool pair_split( |
| 319 | const std::string& str, |
| 320 | std::string& key, |
| 321 | std::string& value, |
| 322 | char delimiter) |
| 323 | { |
| 324 | std::string::size_type pos = str.find (delimiter); |
| 325 | if (pos == std::string::npos) return false; |
| 326 | key= str.substr(0,pos); |
| 327 | value= str.substr(pos+1); |
| 328 | trim_mod(key); |
| 329 | trim_mod(value); |
| 330 | return true; |
| 331 | } // eo pair_split(const std::string&,std::string&,std::string&,char) |
| 332 | |
| 333 | |
| 334 | /** |
| 335 | * splits a string by given delimiter |
| 336 | * |
| 337 | * @param[in] str the string which should be splitted. |
| 338 | * @param[out] result the list resulting from splitting @a str. |
| 339 | * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted. |
| 340 | * @param[in] omit_empty should empty parts not be stored? |
| 341 | * @param[in] trim_list list of characters the parts should be trimmed by. |
| 342 | * (empty string results in no trim) |
| 343 | */ |
| 344 | void split_string( |
| 345 | const std::string& str, |
| 346 | std::list<std::string>& result, |
| 347 | const std::string& delimiter, |
| 348 | bool omit_empty, |
| 349 | const std::string& trim_list |
| 350 | ) |
| 351 | { |
| 352 | std::string::size_type pos, last_pos=0; |
| 353 | bool delimiter_found= false; |
| 354 | while ( last_pos < str.size() && last_pos != std::string::npos) |
| 355 | { |
| 356 | pos= str.find(delimiter, last_pos); |
| 357 | std::string part; |
| 358 | if (pos == std::string::npos) |
| 359 | { |
| 360 | part= str.substr(last_pos); |
| 361 | delimiter_found= false; |
| 362 | } |
| 363 | else |
| 364 | { |
| 365 | part= str.substr(last_pos, pos-last_pos); |
| 366 | delimiter_found=true; |
| 367 | } |
| 368 | if (pos != std::string::npos) |
| 369 | { |
| 370 | last_pos= pos+ delimiter.size(); |
| 371 | } |
| 372 | else |
| 373 | { |
| 374 | last_pos= std::string::npos; |
| 375 | } |
| 376 | if (!trim_list.empty() ) trim_mod (part, trim_list); |
| 377 | if (omit_empty && part.empty() ) continue; |
| 378 | result.push_back( part ); |
| 379 | } |
| 380 | // if the string ends with a delimiter we need to append an empty string if no omit_empty |
| 381 | // was given. |
| 382 | // (this way we keep the split result consistent to a join operation) |
| 383 | if (delimiter_found && !omit_empty) |
| 384 | { |
| 385 | result.push_back(""); |
| 386 | } |
| 387 | } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&) |
| 388 | |
| 389 | |
| 390 | /** call split_string with list<string>, converts result to vector; vector is clear()-ed first |
| 391 | * |
| 392 | * Note: Uses 3 O(n)-operations: list.size, vector.resize and std::swap_ranges; |
| 393 | * not sure whether there is a better way to do this |
| 394 | * */ |
| 395 | void split_string( |
| 396 | const std::string& str, |
| 397 | std::vector<std::string>& result, |
| 398 | const std::string& delimiter, |
| 399 | bool omit_empty, |
| 400 | const std::string& trim_list |
| 401 | ) |
| 402 | { |
| 403 | std::list<std::string> tmp; |
| 404 | split_string(str, tmp, delimiter, omit_empty, trim_list); |
| 405 | std::size_t size = tmp.size(); // this is O(n) |
| 406 | result.clear(); |
| 407 | result.resize(size); // also O(n) |
| 408 | std::swap_ranges(tmp.begin(), tmp.end(), result.begin()); // also O(n) |
| 409 | } |
| 410 | |
| 411 | /** |
| 412 | * splits a string by a given delimiter |
| 413 | * @param str the string which should be splitted. |
| 414 | * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted. |
| 415 | * @param[in] omit_empty should empty parts not be stored? |
| 416 | * @param[in] trim_list list of characters the parts should be trimmed by. |
| 417 | * (empty string results in no trim) |
| 418 | * @return the list resulting from splitting @a str. |
| 419 | */ |
| 420 | std::list<std::string> split_string( |
| 421 | const std::string& str, |
| 422 | const std::string& delimiter, |
| 423 | bool omit_empty, |
| 424 | const std::string& trim_list |
| 425 | ) |
| 426 | { |
| 427 | std::list<std::string> result; |
| 428 | split_string(str, result, delimiter, omit_empty, trim_list); |
| 429 | return result; |
| 430 | } // eo split_string(const std::string&,const std::string&,bool,const std::string&) |
| 431 | |
| 432 | |
| 433 | std::string join_string ( |
| 434 | const char *const parts[], /* assumed NULL-terminated */ |
| 435 | const std::string& delimiter |
| 436 | ) |
| 437 | { |
| 438 | std::string result; |
| 439 | |
| 440 | if (parts != NULL) |
| 441 | { |
| 442 | const char *const *cur = parts; |
| 443 | |
| 444 | if (*cur != NULL) { |
| 445 | result = std::string (*cur); |
| 446 | |
| 447 | while (*++cur != NULL) { |
| 448 | result += delimiter; |
| 449 | result += std::string (*cur); |
| 450 | } |
| 451 | } |
| 452 | } |
| 453 | |
| 454 | return result; |
| 455 | } |
| 456 | |
| 457 | |
| 458 | |
| 459 | /* |
| 460 | ** conversions |
| 461 | */ |
| 462 | |
| 463 | |
| 464 | /** |
| 465 | * @brief returns a hex string from a binary string. |
| 466 | * @param str the (binary) string |
| 467 | * @param upper_case_digits determine whether to use upper case characters for digits A-F. |
| 468 | * @return the string in hex notation. |
| 469 | */ |
| 470 | std::string convert_binary_to_hex( |
| 471 | const std::string& str, |
| 472 | bool upper_case_digits |
| 473 | ) |
| 474 | { |
| 475 | std::string result; |
| 476 | std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower); |
| 477 | for ( std::string::const_iterator it= str.begin(); |
| 478 | it != str.end(); |
| 479 | ++it) |
| 480 | { |
| 481 | result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] ); |
| 482 | result.push_back( hexDigits[ (*it) & 0x0f ] ); |
| 483 | } |
| 484 | return result; |
| 485 | } // eo convert_binary_to_hex(const std::string&,bool) |
| 486 | |
| 487 | |
| 488 | /** |
| 489 | * @brief converts a hex digit string to binary string. |
| 490 | * @param str hex digit string |
| 491 | * @return the binary string. |
| 492 | * |
| 493 | * The hex digit string may contains white spaces or colons which are treated |
| 494 | * as delimiters between hex digit groups. |
| 495 | * |
| 496 | * @todo rework the handling of half nibbles (consistency)! |
| 497 | */ |
| 498 | std::string convert_hex_to_binary( |
| 499 | const std::string& str |
| 500 | ) |
| 501 | { |
| 502 | std::string result; |
| 503 | char c= 0; |
| 504 | bool hasNibble= false; |
| 505 | bool lastWasWS= true; |
| 506 | for ( std::string::const_iterator it= str.begin(); |
| 507 | it != str.end(); |
| 508 | ++it) |
| 509 | { |
| 510 | std::string::size_type p = hexDigitsLower.find( *it ); |
| 511 | if (p== std::string::npos) |
| 512 | { |
| 513 | p= hexDigitsUpper.find( *it ); |
| 514 | } |
| 515 | if (p == std::string::npos) |
| 516 | { |
| 517 | if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace? |
| 518 | or ( *it == ':') // or a colon? |
| 519 | ) |
| 520 | { |
| 521 | // we treat that as a valid delimiter: |
| 522 | if (hasNibble) |
| 523 | { |
| 524 | // 1 nibble before WS is treate as lower part: |
| 525 | result.push_back(c); |
| 526 | // reset state: |
| 527 | hasNibble= false; |
| 528 | } |
| 529 | lastWasWS= true; |
| 530 | continue; |
| 531 | } |
| 532 | } |
| 533 | if (p == std::string::npos ) |
| 534 | { |
| 535 | throw runtime_error("illegal character in hex digit string: " + str); |
| 536 | } |
| 537 | lastWasWS= false; |
| 538 | if (hasNibble) |
| 539 | { |
| 540 | c<<=4; |
| 541 | } |
| 542 | else |
| 543 | { |
| 544 | c=0; |
| 545 | } |
| 546 | c+= (p & 0x0f); |
| 547 | if (hasNibble) |
| 548 | { |
| 549 | //we already had a nibble, so a char is complete now: |
| 550 | result.push_back( c ); |
| 551 | hasNibble=false; |
| 552 | } |
| 553 | else |
| 554 | { |
| 555 | // this is the first nibble of a new char: |
| 556 | hasNibble=true; |
| 557 | } |
| 558 | } |
| 559 | if (hasNibble) |
| 560 | { |
| 561 | //well, there is one nibble left |
| 562 | // let's do some heuristics: |
| 563 | if (lastWasWS) |
| 564 | { |
| 565 | // if the preceeding character was a white space (or a colon) |
| 566 | // we treat the nibble as lower part: |
| 567 | //( this is consistent with shortened hex notations where leading zeros are not noted) |
| 568 | result.push_back( c ); |
| 569 | } |
| 570 | else |
| 571 | { |
| 572 | // if it was part of a hex digit chain, we treat it as UPPER part (!!) |
| 573 | result.push_back( c << 4 ); |
| 574 | } |
| 575 | } |
| 576 | return result; |
| 577 | } // eo convert_hex_to_binary(const std::string&) |
| 578 | |
| 579 | |
| 580 | static list<string>& alloc_template_starts() |
| 581 | { |
| 582 | static list<string> result; |
| 583 | if (result.empty()) |
| 584 | { |
| 585 | result.push_back("std::list"); |
| 586 | result.push_back("std::vector"); |
| 587 | } |
| 588 | return result; |
| 589 | } |
| 590 | |
| 591 | string shorten_stl_types(const string &input) |
| 592 | { |
| 593 | string output = input; |
| 594 | |
| 595 | // first: replace fixed string for std::string |
| 596 | replace_all(output, "std::basic_string<char, std::char_traits<char>, std::allocator<char> >", |
| 597 | "std::string"); |
| 598 | |
| 599 | // loop over list/vector/... that have an allocator, e.g. |
| 600 | // std::list< some_type_here, std::allocator<some_type_here> > |
| 601 | string::size_type start, comma, end, len, start_text_len; |
| 602 | int n_open_brackets; |
| 603 | string allocator_text; |
| 604 | BOOST_FOREACH(const string &start_text, alloc_template_starts()) |
| 605 | { |
| 606 | start = 0; |
| 607 | comma = 0; |
| 608 | end = 0; |
| 609 | start_text_len = start_text.length(); |
| 610 | while( (start=output.find(start_text+"<", start)) != string::npos ) |
| 611 | { |
| 612 | len = output.length(); |
| 613 | start += start_text_len+1; // start next iter and tests here after opening bracket |
| 614 | |
| 615 | // now comes the tricky part: find matching ',' and the closing '>' even if "subtype" is template again |
| 616 | comma = start; |
| 617 | n_open_brackets = 1; // the bracket right after start_text counts as first |
| 618 | while (comma < len && n_open_brackets > 0) |
| 619 | { |
| 620 | if (output[comma] == ',' && n_open_brackets == 1) |
| 621 | break; |
| 622 | else if (output[comma] == '<') |
| 623 | ++n_open_brackets; |
| 624 | else if (output[comma] == '>') |
| 625 | --n_open_brackets; |
| 626 | ++comma; |
| 627 | } |
| 628 | end = comma+1; |
| 629 | while (end < len && n_open_brackets > 0) |
| 630 | { |
| 631 | if (output[end] == '<') |
| 632 | ++n_open_brackets; |
| 633 | else if (output[end] == '>') |
| 634 | { |
| 635 | --n_open_brackets; |
| 636 | if (n_open_brackets == 0) |
| 637 | break; // do not increment end |
| 638 | } |
| 639 | ++end; |
| 640 | } |
| 641 | |
| 642 | // check that start < comma < end < len && n_open_brackets == 0 |
| 643 | if (start >= comma || comma >= end || end >= len || n_open_brackets != 0) |
| 644 | continue; // input seems to be of unexpected form |
| 645 | |
| 646 | // check that type in allocator is same as until comma |
| 647 | string type = output.substr(start, comma-start); |
| 648 | if (type[type.length()-1] == '>') |
| 649 | allocator_text = string("std::allocator<") + type + " > "; |
| 650 | else |
| 651 | allocator_text = string("std::allocator<") + type + "> "; |
| 652 | if (output.substr(comma+2, end-comma-2) == allocator_text) |
| 653 | output.replace(comma+2, end-comma-2, "_alloc_"); |
| 654 | } |
| 655 | } |
| 656 | |
| 657 | return output; |
| 658 | } |
| 659 | |
| 660 | typedef boost::shared_ptr<BIO> BIO_Ptr; |
| 661 | |
| 662 | /** |
| 663 | * @brief Converts openssl generic input/output to std::string |
| 664 | * |
| 665 | * Code adapted from keymakerd. |
| 666 | * |
| 667 | * @param bio Openssl's generic input/output |
| 668 | * @return :string STL string |
| 669 | **/ |
| 670 | static std::string _convert_BIO_to_string(BIO *input) |
| 671 | { |
| 672 | std::string rtn; |
| 673 | |
| 674 | char *output = NULL; |
| 675 | long written = BIO_get_mem_data(input, &output); |
| 676 | if (written <= 0 || output == NULL) |
| 677 | return rtn; |
| 678 | |
| 679 | rtn.assign(output, written); //lint !e534 !e732 |
| 680 | return rtn; |
| 681 | } //lint !e1764 |
| 682 | |
| 683 | /** |
| 684 | * @brief base64 encode a string using OpenSSL base64 functions |
| 685 | * |
| 686 | * Data size limit is 2GB on 32 bit (LONG_MAX) |
| 687 | * |
| 688 | * @param input String to encode |
| 689 | * @param one_line Encode all data as one line, no wrapping with line feeds |
| 690 | * @return base64 encoded string |
| 691 | */ |
| 692 | std::string base64_encode(const std::string &input, bool one_line) |
| 693 | { |
| 694 | // check for empty buffer |
| 695 | if (input.empty()) |
| 696 | return input; |
| 697 | |
| 698 | // safety check to ensure our check afer BIO_write() works |
| 699 | if (input.size() >= LONG_MAX) |
| 700 | throw runtime_error("base64 encode: Too much data"); |
| 701 | |
| 702 | // setup encoder. Note: BIO_free_all frees both BIOs. |
| 703 | BIO_Ptr base64_encoder(BIO_new(BIO_f_base64()), BIO_free_all); |
| 704 | BIO *encoder_bio = base64_encoder.get(); |
| 705 | if (one_line) |
| 706 | BIO_set_flags(encoder_bio, BIO_FLAGS_BASE64_NO_NL); |
| 707 | |
| 708 | // chain output buffer and encoder together |
| 709 | BIO *encoded_result = BIO_new(BIO_s_mem()); |
| 710 | BIO_push(encoder_bio, encoded_result); |
| 711 | |
| 712 | // encode |
| 713 | long written = BIO_write(encoder_bio, input.c_str(), input.size()); |
| 714 | if ((unsigned)written != input.size()) |
| 715 | { |
| 716 | ostringstream out; |
| 717 | out << "base64 encoding failed: input size: " |
| 718 | << input.size() << " vs. output size: " << written; |
| 719 | throw runtime_error(out.str()); |
| 720 | } |
| 721 | if (BIO_flush(encoder_bio) != 1) |
| 722 | throw runtime_error("base64 encode: BIO_flush() failed"); |
| 723 | |
| 724 | return _convert_BIO_to_string(encoded_result); |
| 725 | } |
| 726 | |
| 727 | /** |
| 728 | * @brief base64 decode a string using OpenSSL base64 functions |
| 729 | * |
| 730 | * @param input String to decode |
| 731 | * @param one_line Expect all base64 data in one line. Input with line feeds will fail. |
| 732 | * @return base64 decoded string |
| 733 | */ |
| 734 | std::string base64_decode(const std::string &input, bool one_line) |
| 735 | { |
| 736 | // check for empty buffer |
| 737 | if (input.empty()) |
| 738 | return input; |
| 739 | |
| 740 | // safety check for BIO_new_mem_buf() |
| 741 | if (input.size() >= INT_MAX) |
| 742 | throw runtime_error("base64 decode: Too much data"); |
| 743 | |
| 744 | // setup encoder. Note: BIO_free_all frees both BIOs. |
| 745 | BIO_Ptr base64_decoder(BIO_new(BIO_f_base64()), BIO_free_all); |
| 746 | BIO *bio_base64 = base64_decoder.get(); |
| 747 | if (one_line) |
| 748 | BIO_set_flags(bio_base64, BIO_FLAGS_BASE64_NO_NL); |
| 749 | |
| 750 | // chain input buffer and decoder together |
| 751 | BIO *bio_input = BIO_new_mem_buf((void*)input.c_str(), input.size()); |
| 752 | bio_input = BIO_push(bio_base64, bio_input); |
| 753 | |
| 754 | BIO_Ptr decoded_result(BIO_new(BIO_s_mem()), BIO_free_all); |
| 755 | BIO *bio_decoded = decoded_result.get(); |
| 756 | const int convbuf_size = 512; |
| 757 | char convbuf[convbuf_size]; |
| 758 | |
| 759 | long read_bytes = 0; |
| 760 | while((read_bytes = BIO_read(bio_input, convbuf, convbuf_size)) > 0) |
| 761 | { |
| 762 | BOOST_ASSERT(read_bytes <= convbuf_size); |
| 763 | long written_bytes = BIO_write(bio_decoded, convbuf, read_bytes); |
| 764 | if (written_bytes != read_bytes) |
| 765 | { |
| 766 | ostringstream out; |
| 767 | out << "base64 decoding failed: read_bytes: " |
| 768 | << read_bytes << " vs. written_bytes: " << written_bytes; |
| 769 | throw runtime_error(out.str()); |
| 770 | } |
| 771 | } |
| 772 | if (read_bytes == -2 || read_bytes == -1) |
| 773 | throw runtime_error("base64 decode: Error during decoding"); |
| 774 | |
| 775 | return _convert_BIO_to_string(bio_decoded); |
| 776 | } |
| 777 | |
| 778 | } // eo namespace I2n |
| 779 | |
| 780 | |
| 781 | |
| 782 | |
| 783 | std::string iso_to_utf8(const std::string& isostring) |
| 784 | { |
| 785 | string result; |
| 786 | |
| 787 | iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1"); |
| 788 | |
| 789 | if (iso_to_utf8 == (iconv_t)-1) |
| 790 | throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8"); |
| 791 | |
| 792 | size_t in_size=isostring.size(); |
| 793 | size_t out_size=in_size*4; |
| 794 | |
| 795 | char *buf = (char *)malloc(out_size+1); |
| 796 | if (buf == NULL) |
| 797 | { |
| 798 | iconv_close(i2utf8); |
| 799 | throw runtime_error("out of memory for iconv buffer"); |
| 800 | } |
| 801 | |
| 802 | char *in = (char *)isostring.c_str(); |
| 803 | char *out = buf; |
| 804 | iconv(i2utf8, &in, &in_size, &out, &out_size); |
| 805 | |
| 806 | buf[isostring.size()*4-out_size]=0; |
| 807 | |
| 808 | result=buf; |
| 809 | |
| 810 | free(buf); |
| 811 | iconv_close(i2utf8); |
| 812 | |
| 813 | return result; |
| 814 | } |
| 815 | |
| 816 | std::string utf8_to_iso(const std::string& utf8string) |
| 817 | { |
| 818 | string result; |
| 819 | |
| 820 | iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8"); |
| 821 | |
| 822 | if (utf82iso == (iconv_t)-1) |
| 823 | throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1"); |
| 824 | |
| 825 | size_t in_size=utf8string.size(); |
| 826 | size_t out_size=in_size; |
| 827 | |
| 828 | char *buf = (char *)malloc(out_size+1); |
| 829 | if (buf == NULL) |
| 830 | { |
| 831 | iconv_close(utf82iso); |
| 832 | throw runtime_error("out of memory for iconv buffer"); |
| 833 | } |
| 834 | |
| 835 | char *in = (char *)utf8string.c_str(); |
| 836 | char *out = buf; |
| 837 | iconv(utf82iso, &in, &in_size, &out, &out_size); |
| 838 | |
| 839 | buf[utf8string.size()-out_size]=0; |
| 840 | |
| 841 | result=buf; |
| 842 | |
| 843 | free(buf); |
| 844 | iconv_close(utf82iso); |
| 845 | |
| 846 | return result; |
| 847 | } |
| 848 | |
| 849 | wchar_t* utf8_to_wbuf(const std::string& utf8string) |
| 850 | { |
| 851 | iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8"); |
| 852 | |
| 853 | if (utf82wstr == (iconv_t)-1) |
| 854 | throw runtime_error("iconv can't convert from UTF-8 to UCS-4"); |
| 855 | |
| 856 | size_t in_size=utf8string.size(); |
| 857 | size_t out_size= (in_size+1)*sizeof(wchar_t); |
| 858 | |
| 859 | wchar_t *buf = (wchar_t *)malloc(out_size); |
| 860 | if (buf == NULL) |
| 861 | { |
| 862 | iconv_close(utf82wstr); |
| 863 | throw runtime_error("out of memory for iconv buffer"); |
| 864 | } |
| 865 | |
| 866 | char *in = (char *)utf8string.c_str(); |
| 867 | char *out = (char*) buf; |
| 868 | if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1) |
| 869 | throw runtime_error("error converting char encodings"); |
| 870 | |
| 871 | buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0; |
| 872 | |
| 873 | iconv_close(utf82wstr); |
| 874 | |
| 875 | return buf; |
| 876 | } |
| 877 | |
| 878 | std::string utf7imap_to_utf8(const std::string& utf7imapstring) |
| 879 | { |
| 880 | string result; |
| 881 | |
| 882 | iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP"); |
| 883 | |
| 884 | if (utf7imap2utf8 == (iconv_t)-1) |
| 885 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); |
| 886 | |
| 887 | size_t in_size=utf7imapstring.size(); |
| 888 | size_t out_size=in_size*4; |
| 889 | |
| 890 | char *buf = (char *)malloc(out_size+1); |
| 891 | if (buf == NULL) |
| 892 | { |
| 893 | iconv_close(utf7imap2utf8); |
| 894 | throw runtime_error("out of memory for iconv buffer"); |
| 895 | } |
| 896 | |
| 897 | char *in = (char *)utf7imapstring.c_str(); |
| 898 | char *out = buf; |
| 899 | iconv(utf7imap2utf8, &in, &in_size, &out, &out_size); |
| 900 | |
| 901 | buf[utf7imapstring.size()*4-out_size]=0; |
| 902 | |
| 903 | result=buf; |
| 904 | |
| 905 | free(buf); |
| 906 | iconv_close(utf7imap2utf8); |
| 907 | |
| 908 | return result; |
| 909 | } |
| 910 | |
| 911 | std::string utf8_to_utf7imap(const std::string& utf8string) |
| 912 | { |
| 913 | string result; |
| 914 | |
| 915 | iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8"); |
| 916 | |
| 917 | if (utf82utf7imap == (iconv_t)-1) |
| 918 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); |
| 919 | |
| 920 | // UTF-7 is base64 encoded, a buffer 10x as large |
| 921 | // as the utf-8 buffer should be enough. If not the string will be truncated. |
| 922 | size_t in_size=utf8string.size(); |
| 923 | size_t out_size=in_size*10; |
| 924 | |
| 925 | char *buf = (char *)malloc(out_size+1); |
| 926 | if (buf == NULL) |
| 927 | { |
| 928 | iconv_close(utf82utf7imap); |
| 929 | throw runtime_error("out of memory for iconv buffer"); |
| 930 | } |
| 931 | |
| 932 | char *in = (char *)utf8string.c_str(); |
| 933 | char *out = buf; |
| 934 | iconv(utf82utf7imap, &in, &in_size, &out, &out_size); |
| 935 | |
| 936 | buf[utf8string.size()*10-out_size]= 0; |
| 937 | |
| 938 | result=buf; |
| 939 | |
| 940 | free(buf); |
| 941 | iconv_close(utf82utf7imap); |
| 942 | |
| 943 | return result; |
| 944 | } |
| 945 | |
| 946 | // Tokenize string by (html) tags |
| 947 | void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input) |
| 948 | { |
| 949 | string::size_type pos, len = input.size(); |
| 950 | bool inside_tag = false; |
| 951 | string current; |
| 952 | |
| 953 | for (pos = 0; pos < len; pos++) |
| 954 | { |
| 955 | if (input[pos] == '<') |
| 956 | { |
| 957 | inside_tag = true; |
| 958 | |
| 959 | if (!current.empty() ) |
| 960 | { |
| 961 | tokenized.push_back( make_pair(current, false) ); |
| 962 | current = ""; |
| 963 | } |
| 964 | |
| 965 | current += input[pos]; |
| 966 | } |
| 967 | else if (input[pos] == '>' && inside_tag) |
| 968 | { |
| 969 | current += input[pos]; |
| 970 | inside_tag = false; |
| 971 | if (!current.empty() ) |
| 972 | { |
| 973 | tokenized.push_back( make_pair(current, true) ); |
| 974 | current = ""; |
| 975 | } |
| 976 | } |
| 977 | else |
| 978 | current += input[pos]; |
| 979 | } |
| 980 | |
| 981 | // String left over in buffer? |
| 982 | if (!current.empty() ) |
| 983 | tokenized.push_back( make_pair(current, false) ); |
| 984 | } // eo tokenize_by_tag |
| 985 | |
| 986 | |
| 987 | std::string strip_html_tags(const std::string &input) |
| 988 | { |
| 989 | // Pair first: string, second: isTag |
| 990 | vector<pair<string,bool> > tokenized; |
| 991 | tokenize_by_tag (tokenized, input); |
| 992 | |
| 993 | string output; |
| 994 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); |
| 995 | for (token = tokenized.begin(); token != tokens_end; ++token) |
| 996 | if (!token->second) |
| 997 | output += token->first; |
| 998 | |
| 999 | return output; |
| 1000 | } // eo strip_html_tags |
| 1001 | |
| 1002 | |
| 1003 | // Smart-encode HTML en |
| 1004 | string smart_html_entities(const std::string &input) |
| 1005 | { |
| 1006 | // Pair first: string, second: isTag |
| 1007 | vector<pair<string,bool> > tokenized; |
| 1008 | tokenize_by_tag (tokenized, input); |
| 1009 | |
| 1010 | string output; |
| 1011 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); |
| 1012 | for (token = tokenized.begin(); token != tokens_end; ++token) |
| 1013 | { |
| 1014 | // keep HTML tags as they are |
| 1015 | if (token->second) |
| 1016 | output += token->first; |
| 1017 | else |
| 1018 | output += html_entities(token->first); |
| 1019 | } |
| 1020 | |
| 1021 | return output; |
| 1022 | } |
| 1023 | |
| 1024 | |
| 1025 | string::size_type find_8bit(const std::string &str) |
| 1026 | { |
| 1027 | string::size_type l=str.size(); |
| 1028 | for (string::size_type p=0; p < l; p++) |
| 1029 | if (static_cast<unsigned char>(str[p]) > 127) |
| 1030 | return p; |
| 1031 | |
| 1032 | return string::npos; |
| 1033 | } |
| 1034 | |
| 1035 | // encoded UTF-8 chars into HTML entities |
| 1036 | string html_entities(std::string str) |
| 1037 | { |
| 1038 | // Normal chars |
| 1039 | replace_all (str, "&", "&"); |
| 1040 | replace_all (str, "<", "<"); |
| 1041 | replace_all (str, ">", ">"); |
| 1042 | replace_all (str, "\"", """); |
| 1043 | replace_all (str, "'", "'"); |
| 1044 | replace_all (str, "/", "/"); |
| 1045 | |
| 1046 | // Umlauts |
| 1047 | replace_all (str, "\xC3\xA4", "ä"); |
| 1048 | replace_all (str, "\xC3\xB6", "ö"); |
| 1049 | replace_all (str, "\xC3\xBC", "ü"); |
| 1050 | replace_all (str, "\xC3\x84", "Ä"); |
| 1051 | replace_all (str, "\xC3\x96", "Ö"); |
| 1052 | replace_all (str, "\xC3\x9C", "Ü"); |
| 1053 | |
| 1054 | // Misc |
| 1055 | replace_all (str, "\xC3\x9F", "ß"); |
| 1056 | |
| 1057 | // conversion of remaining non-ASCII chars needed? |
| 1058 | // just do if needed because of performance |
| 1059 | if (find_8bit(str) != string::npos) |
| 1060 | { |
| 1061 | // convert to fixed-size encoding UTF-32 |
| 1062 | wchar_t* wbuf=utf8_to_wbuf(str); |
| 1063 | ostringstream target; |
| 1064 | |
| 1065 | // replace all non-ASCII chars with HTML representation |
| 1066 | for (int p=0; wbuf[p] != 0; p++) |
| 1067 | { |
| 1068 | unsigned int c=wbuf[p]; |
| 1069 | |
| 1070 | if (c <= 127) |
| 1071 | target << static_cast<unsigned char>(c); |
| 1072 | else |
| 1073 | target << "&#" << c << ';'; |
| 1074 | } |
| 1075 | |
| 1076 | free(wbuf); |
| 1077 | |
| 1078 | str=target.str(); |
| 1079 | } |
| 1080 | |
| 1081 | return str; |
| 1082 | } // eo html_entities(std::string) |
| 1083 | |
| 1084 | // convert HTML entities to something that can be viewed on a basic text console (restricted to ASCII-7) |
| 1085 | string html_entities_to_console(std::string str) |
| 1086 | { |
| 1087 | // Normal chars |
| 1088 | replace_all (str, "&", "&"); |
| 1089 | replace_all (str, "<", "<"); |
| 1090 | replace_all (str, ">", ">"); |
| 1091 | replace_all (str, """, "\""); |
| 1092 | replace_all (str, "'", "'"); |
| 1093 | replace_all (str, "/", "/"); |
| 1094 | |
| 1095 | // Umlauts |
| 1096 | replace_all (str, "ä", "ae"); |
| 1097 | replace_all (str, "ö", "oe"); |
| 1098 | replace_all (str, "ü", "ue"); |
| 1099 | replace_all (str, "Ä", "Ae"); |
| 1100 | replace_all (str, "Ö", "Oe"); |
| 1101 | replace_all (str, "Ü", "Ue"); |
| 1102 | |
| 1103 | // Misc |
| 1104 | replace_all (str, "ß", "ss"); |
| 1105 | |
| 1106 | return str; |
| 1107 | } |
| 1108 | |
| 1109 | // find_html_comments + remove_html_comments(str, comments) |
| 1110 | void remove_html_comments(string &str) |
| 1111 | { |
| 1112 | vector<CommentZone> comments = find_html_comments(str); |
| 1113 | remove_html_comments(str, comments); |
| 1114 | } |
| 1115 | |
| 1116 | // find all html comments, behaving correctly if they are nested; ignores comment tags ("<!--FOO .... BAR-->") |
| 1117 | // If there are invalid comments ("-->" before "<!--" or different number of closing and opening tags), |
| 1118 | // then the unknown index of corresponding start/end tag will be represented by a string::npos |
| 1119 | // Indices are from start of start tag until first index after closing tag |
| 1120 | vector<CommentZone> find_html_comments(const std::string &str) |
| 1121 | { |
| 1122 | static const string START = "<!--"; |
| 1123 | static const string CLOSE = "-->"; |
| 1124 | static const string::size_type START_LEN = START.length(); |
| 1125 | static const string::size_type CLOSE_LEN = CLOSE.length(); |
| 1126 | |
| 1127 | vector<CommentZone> comments; |
| 1128 | |
| 1129 | // in order to find nested comments, need either recursion or a stack |
| 1130 | vector<string::size_type> starts; // stack of start tags |
| 1131 | |
| 1132 | string::size_type pos = 0; |
| 1133 | string::size_type len = str.length(); |
| 1134 | string::size_type next_start, next_close; |
| 1135 | |
| 1136 | while (pos < len) // not really needed but just in case |
| 1137 | { |
| 1138 | next_start = str.find(START, pos); |
| 1139 | next_close = str.find(CLOSE, pos); |
| 1140 | |
| 1141 | if ( (next_start == string::npos) && (next_close == string::npos) ) |
| 1142 | break; // we are done |
| 1143 | |
| 1144 | else if ( (next_start == string::npos) || (next_close < next_start) ) // close one comment (pop) |
| 1145 | { |
| 1146 | if (starts.empty()) // closing tag without a start |
| 1147 | comments.push_back(CommentZone(string::npos, next_close+CLOSE_LEN)); |
| 1148 | else |
| 1149 | { |
| 1150 | comments.push_back(CommentZone(starts.back(), next_close+CLOSE_LEN)); |
| 1151 | starts.pop_back(); |
| 1152 | } |
| 1153 | pos = next_close + CLOSE_LEN; |
| 1154 | } |
| 1155 | |
| 1156 | else if ( (next_close == string::npos) || (next_start < next_close) ) // start a new comment (push) |
| 1157 | { |
| 1158 | starts.push_back(next_start); |
| 1159 | pos = next_start + START_LEN; |
| 1160 | } |
| 1161 | } |
| 1162 | |
| 1163 | // add comments that have no closing tag from back to front (important for remove_html_comments!) |
| 1164 | while (!starts.empty()) |
| 1165 | { |
| 1166 | comments.push_back(CommentZone(starts.back(), string::npos)); |
| 1167 | starts.pop_back(); |
| 1168 | } |
| 1169 | |
| 1170 | return comments; |
| 1171 | } |
| 1172 | |
| 1173 | // remove all html comments foundby find_html_comments |
| 1174 | void remove_html_comments(std::string &str, const vector<CommentZone> &comments) |
| 1175 | { |
| 1176 | // remember position where last removal started |
| 1177 | string::size_type last_removal_start = str.length(); |
| 1178 | |
| 1179 | // Go from back to front to not mess up indices. |
| 1180 | // This requires that bigger comments, that contain smaller comments, come AFTER |
| 1181 | // the small contained comments in the comments vector (i.e. comments are ordered by |
| 1182 | // their closing tag, not their opening tag). This is true for results from find_html_comments |
| 1183 | BOOST_REVERSE_FOREACH(const CommentZone &comment, comments) |
| 1184 | { |
| 1185 | if (comment.first == string::npos) |
| 1186 | { |
| 1187 | str = str.replace(0, comment.second, ""); // comment starts "before" str --> delete from start |
| 1188 | break; // there can be no more |
| 1189 | } |
| 1190 | else if (comment.first >= last_removal_start) |
| 1191 | { |
| 1192 | continue; // this comment is inside another comment that we have removed already |
| 1193 | } |
| 1194 | else if (comment.second == string::npos) // comment ends "after" str --> delete until end |
| 1195 | { |
| 1196 | str = str.replace(comment.first, string::npos, ""); |
| 1197 | last_removal_start = comment.first; |
| 1198 | } |
| 1199 | else |
| 1200 | { |
| 1201 | str = str.replace(comment.first, comment.second-comment.first, ""); |
| 1202 | last_removal_start = comment.first; |
| 1203 | } |
| 1204 | } |
| 1205 | } |
| 1206 | |
| 1207 | bool replace_all(string &base, const char *ist, const char *soll) |
| 1208 | { |
| 1209 | string i=ist; |
| 1210 | string s=soll; |
| 1211 | return replace_all(base,&i,&s); |
| 1212 | } |
| 1213 | |
| 1214 | bool replace_all(string &base, const string &ist, const char *soll) |
| 1215 | { |
| 1216 | string s=soll; |
| 1217 | return replace_all(base,&ist,&s); |
| 1218 | } |
| 1219 | |
| 1220 | bool replace_all(string &base, const string *ist, const string *soll) |
| 1221 | { |
| 1222 | return replace_all(base,*ist,*soll); |
| 1223 | } |
| 1224 | |
| 1225 | bool replace_all(string &base, const char *ist, const string *soll) |
| 1226 | { |
| 1227 | string i=ist; |
| 1228 | return replace_all(base,&i,soll); |
| 1229 | } |
| 1230 | |
| 1231 | bool replace_all(string &base, const string &ist, const string &soll) |
| 1232 | { |
| 1233 | bool found_ist = false; |
| 1234 | string::size_type a=0; |
| 1235 | |
| 1236 | if (ist.empty() ) |
| 1237 | throw runtime_error ("replace_all called with empty search string"); |
| 1238 | |
| 1239 | while ( (a=base.find(ist,a) ) != string::npos) |
| 1240 | { |
| 1241 | base.replace(a,ist.size(),soll); |
| 1242 | a=a+soll.size(); |
| 1243 | found_ist = true; |
| 1244 | } |
| 1245 | |
| 1246 | return found_ist; |
| 1247 | } |
| 1248 | |
| 1249 | /** |
| 1250 | * @brief replaces all characters that could be problematic or impose a security risk when being logged |
| 1251 | * @param str the original string |
| 1252 | * @param replace_with the character to replace the unsafe chars with |
| 1253 | * @return a string that is safe to send to syslog or other logfiles |
| 1254 | * |
| 1255 | * All chars between 0x20 (space) and 0x7E (~) (including) are considered safe for logging. |
| 1256 | * See e.g. RFC 5424, section 8.2 or the posix character class "printable". |
| 1257 | * This eliminates all possible problems with NUL, control characters, 8 bit chars, UTF8. |
| 1258 | * |
| 1259 | */ |
| 1260 | std::string sanitize_for_logging(const std::string &str, const char replace_with) |
| 1261 | { |
| 1262 | std::string output=str; |
| 1263 | |
| 1264 | const string::size_type len = output.size(); |
| 1265 | for (std::string::size_type p=0; p < len; p++) |
| 1266 | if (output[p] < 0x20 || output[p] > 0x7E) |
| 1267 | output[p]=replace_with; |
| 1268 | |
| 1269 | return output; |
| 1270 | } |
| 1271 | |
| 1272 | #if 0 |
| 1273 | string to_lower(const string &src) |
| 1274 | { |
| 1275 | string dst = src; |
| 1276 | |
| 1277 | string::size_type pos, end = dst.size(); |
| 1278 | for (pos = 0; pos < end; pos++) |
| 1279 | dst[pos] = tolower(dst[pos]); |
| 1280 | |
| 1281 | return dst; |
| 1282 | } |
| 1283 | |
| 1284 | string to_upper(const string &src) |
| 1285 | { |
| 1286 | string dst = src; |
| 1287 | |
| 1288 | string::size_type pos, end = dst.size(); |
| 1289 | for (pos = 0; pos < end; pos++) |
| 1290 | dst[pos] = toupper(dst[pos]); |
| 1291 | |
| 1292 | return dst; |
| 1293 | } |
| 1294 | #endif |
| 1295 | |
| 1296 | const int MAX_UNIT_FORMAT_SYMBOLS = 6; |
| 1297 | |
| 1298 | const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = { |
| 1299 | " B", |
| 1300 | " KB", |
| 1301 | " MB", |
| 1302 | " GB", |
| 1303 | " TB", |
| 1304 | " PB" |
| 1305 | }; |
| 1306 | |
| 1307 | const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = { |
| 1308 | i18n_noop(" Bytes"), |
| 1309 | i18n_noop(" KBytes"), |
| 1310 | i18n_noop(" MBytes"), |
| 1311 | i18n_noop(" GBytes"), |
| 1312 | i18n_noop(" TBytes"), |
| 1313 | i18n_noop(" PBytes") |
| 1314 | }; |
| 1315 | |
| 1316 | |
| 1317 | static long double rounding_upwards( |
| 1318 | const long double number, |
| 1319 | const int rounding_multiplier |
| 1320 | ) |
| 1321 | { |
| 1322 | long double rounded_number; |
| 1323 | rounded_number = number * rounding_multiplier; |
| 1324 | rounded_number += 0.5; |
| 1325 | rounded_number = (int64_t) (rounded_number); |
| 1326 | rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier); |
| 1327 | |
| 1328 | return rounded_number; |
| 1329 | } |
| 1330 | |
| 1331 | |
| 1332 | string nice_unit_format( |
| 1333 | const int64_t input, |
| 1334 | const UnitFormat format, |
| 1335 | const UnitBase base |
| 1336 | ) |
| 1337 | { |
| 1338 | // select the system of units (decimal or binary) |
| 1339 | int multiple = 0; |
| 1340 | if (base == UnitBase1000) |
| 1341 | { |
| 1342 | multiple = 1000; |
| 1343 | } |
| 1344 | else |
| 1345 | { |
| 1346 | multiple = 1024; |
| 1347 | } |
| 1348 | |
| 1349 | long double size = input; |
| 1350 | |
| 1351 | // check the size of the input number to fit in the appropriate symbol |
| 1352 | int sizecount = 0; |
| 1353 | while (size > multiple) |
| 1354 | { |
| 1355 | size = size / multiple; |
| 1356 | sizecount++; |
| 1357 | |
| 1358 | // rollback to the previous values and stop the loop when cannot |
| 1359 | // represent the number length. |
| 1360 | if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS) |
| 1361 | { |
| 1362 | size = size * multiple; |
| 1363 | sizecount--; |
| 1364 | break; |
| 1365 | } |
| 1366 | } |
| 1367 | |
| 1368 | // round the input number "half up" to multiples of 10 |
| 1369 | const int rounding_multiplier = 10; |
| 1370 | size = rounding_upwards(size, rounding_multiplier); |
| 1371 | |
| 1372 | // format the input number, placing the appropriate symbol |
| 1373 | ostringstream out; |
| 1374 | out.setf (ios::fixed); |
| 1375 | if (format == ShortUnitFormat) |
| 1376 | { |
| 1377 | out.precision(1); |
| 1378 | out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() ); |
| 1379 | } |
| 1380 | else |
| 1381 | { |
| 1382 | out.precision (2); |
| 1383 | out << size << i18n( longUnitFormatSymbols[sizecount].c_str() ); |
| 1384 | } |
| 1385 | |
| 1386 | return out.str(); |
| 1387 | } // eo nice_unit_format(int input) |
| 1388 | |
| 1389 | |
| 1390 | string nice_unit_format( |
| 1391 | const double input, |
| 1392 | const UnitFormat format, |
| 1393 | const UnitBase base |
| 1394 | ) |
| 1395 | { |
| 1396 | // round as double and cast to int64_t |
| 1397 | // cast raised overflow error near max val of int64_t (~9.2e18, see unittest) |
| 1398 | int64_t input_casted_and_rounded = |
| 1399 | boost::numeric_cast<int64_t>( round(input) ); |
| 1400 | |
| 1401 | // now call other |
| 1402 | return nice_unit_format( input_casted_and_rounded, format, base ); |
| 1403 | } // eo nice_unit_format(double input) |
| 1404 | |
| 1405 | |
| 1406 | string escape(const string &s) |
| 1407 | { |
| 1408 | string out(s); |
| 1409 | string::size_type p; |
| 1410 | |
| 1411 | p=0; |
| 1412 | while ( (p=out.find_first_of("\"\\",p) ) !=out.npos) |
| 1413 | { |
| 1414 | out.insert (p,"\\"); |
| 1415 | p+=2; |
| 1416 | } |
| 1417 | |
| 1418 | p=0; |
| 1419 | while ( (p=out.find_first_of("\r",p) ) !=out.npos) |
| 1420 | { |
| 1421 | out.replace (p,1,"\\r"); |
| 1422 | p+=2; |
| 1423 | } |
| 1424 | |
| 1425 | p=0; |
| 1426 | while ( (p=out.find_first_of("\n",p) ) !=out.npos) |
| 1427 | { |
| 1428 | out.replace (p,1,"\\n"); |
| 1429 | p+=2; |
| 1430 | } |
| 1431 | |
| 1432 | out='"'+out+'"'; |
| 1433 | |
| 1434 | return out; |
| 1435 | } // eo scape(const std::string&) |
| 1436 | |
| 1437 | |
| 1438 | string descape(const string &s, int startpos, int &endpos) |
| 1439 | { |
| 1440 | string out; |
| 1441 | |
| 1442 | if (s.at(startpos) != '"') |
| 1443 | throw out_of_range("value not type escaped string"); |
| 1444 | |
| 1445 | out=s.substr(startpos+1); |
| 1446 | string::size_type p=0; |
| 1447 | |
| 1448 | // search for the end of the string |
| 1449 | while ( (p=out.find("\"",p) ) !=out.npos) |
| 1450 | { |
| 1451 | int e=p-1; |
| 1452 | bool escaped=false; |
| 1453 | |
| 1454 | // the " might be escaped with a backslash |
| 1455 | while (e>=0 && out.at (e) =='\\') |
| 1456 | { |
| 1457 | if (escaped == false) |
| 1458 | escaped=true; |
| 1459 | else |
| 1460 | escaped=false; |
| 1461 | |
| 1462 | e--; |
| 1463 | } |
| 1464 | |
| 1465 | if (escaped==false) |
| 1466 | break; |
| 1467 | else |
| 1468 | p++; |
| 1469 | } |
| 1470 | |
| 1471 | // we now have the end of the string |
| 1472 | out=out.substr(0,p); |
| 1473 | |
| 1474 | // tell calling prog about the endposition |
| 1475 | endpos=startpos+p+1; |
| 1476 | |
| 1477 | // descape all \ stuff inside the string now |
| 1478 | p=0; |
| 1479 | while ( (p=out.find_first_of("\\",p) ) !=out.npos) |
| 1480 | { |
| 1481 | switch (out.at(p+1) ) |
| 1482 | { |
| 1483 | case 'r': |
| 1484 | out.replace(p,2,"\r"); |
| 1485 | break; |
| 1486 | case 'n': |
| 1487 | out.replace(p,2,"\n"); |
| 1488 | break; |
| 1489 | default: |
| 1490 | out.erase(p,1); |
| 1491 | } |
| 1492 | p++; |
| 1493 | } |
| 1494 | |
| 1495 | return out; |
| 1496 | } // eo descape(const std::string&,int,int&) |
| 1497 | |
| 1498 | |
| 1499 | string escape_shellarg(const string &input) |
| 1500 | { |
| 1501 | string output = "'"; |
| 1502 | string::const_iterator it, it_end = input.end(); |
| 1503 | for (it = input.begin(); it != it_end; ++it) |
| 1504 | { |
| 1505 | if ( (*it) == '\'') |
| 1506 | output += "'\\'"; |
| 1507 | |
| 1508 | output += *it; |
| 1509 | } |
| 1510 | |
| 1511 | output += "'"; |
| 1512 | return output; |
| 1513 | } |