Replace bzero() with memset(). bzero() is removed in POSIX-2008
[libi2ncommon] / src / stringfunc.cpp
CommitLineData
0e23f538
TJ
1/*
2The software in this package is distributed under the GNU General
3Public License version 2 (with a special exception described below).
4
5A copy of GNU General Public License (GPL) is included in this distribution,
6in the file COPYING.GPL.
7
8As a special exception, if other files instantiate templates or use macros
9or inline functions from this file, or you compile this file and link it
10with other works to produce a work based on this file, this file
11does not by itself cause the resulting work to be covered
12by the GNU General Public License.
13
14However the source code for this file must still be made available
15in accordance with section (3) of the GNU General Public License.
16
17This exception does not invalidate any other reasons why a work based
18on this file might be covered by the GNU General Public License.
19*/
6a93d84a
TJ
20/** @file
21 *
22 * (c) Copyright 2007-2008 by Intra2net AG
6a93d84a 23 */
e93545dd
GE
24
25#include <iostream>
26#include <string>
27#include <sstream>
28#include <stdexcept>
5efd35b1 29#include <algorithm>
e93545dd 30
a5f3af6e 31#include <wchar.h>
e93545dd
GE
32#include <stdlib.h>
33#include <iconv.h>
34#include <i18n.h>
35
36#include <stringfunc.hxx>
37
38using namespace std;
39
6ab3bc95
RP
40namespace I2n
41{
6a93d84a
TJ
42
43
6ab3bc95
RP
44namespace
45{
6a93d84a
TJ
46
47const std::string hexDigitsLower("0123456789abcdef");
48const std::string hexDigitsUpper("0123456789ABCDEF");
49
50
51struct UpperFunc
52{
6ab3bc95
RP
53 char operator() (char c)
54 {
55 return std::toupper(c);
56 }
6a93d84a
TJ
57}; // eo struct UpperFunc
58
59
60struct LowerFunc
61{
6ab3bc95
RP
62 char operator() (char c)
63 {
64 return std::tolower(c);
65 }
6a93d84a
TJ
66}; // eo struct LowerFunc
67
68
69} // eo namespace <anonymous>
70
71
72
73/**
6ab3bc95 74 * default list of Whitespaces (" \t\r\n");
6a93d84a 75 */
6ab3bc95 76const std::string Whitespaces = " \t\r\n";
6a93d84a
TJ
77
78/**
79 * default list of lineendings ("\r\n");
80 */
6ab3bc95 81const std::string LineEndings= "\r\n";
6a93d84a
TJ
82
83
84
85/**
86 * @brief checks if a string begins with a given prefix.
87 * @param[in,out] str the string which is tested
88 * @param prefix the prefix which should be tested for.
89 * @return @a true iff the prefix is not empty and the string begins with that prefix.
90 */
6ab3bc95 91bool has_prefix(const std::string& str, const std::string& prefix)
6a93d84a 92{
6ab3bc95
RP
93 if (prefix.empty() || str.empty() || str.size() < prefix.size() )
94 {
95 return false;
96 }
97 return str.compare(0, prefix.size(), prefix) == 0;
98} // eo has_prefix(const std::string&,const std::string&)
6a93d84a
TJ
99
100
101/**
102 * @brief checks if a string ends with a given suffix.
103 * @param[in,out] str the string which is tested
104 * @param suffix the suffix which should be tested for.
105 * @return @a true iff the suffix is not empty and the string ends with that suffix.
106 */
6ab3bc95 107bool has_suffix(const std::string& str, const std::string& suffix)
6a93d84a 108{
6ab3bc95
RP
109 if (suffix.empty() || str.empty() || str.size() < suffix.size() )
110 {
111 return false;
112 }
113 return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
114} // eo has_suffix(const std::string&,const std::string&)
6a93d84a
TJ
115
116
117/**
118 * cut off characters from a given list from front and end of a string.
119 * @param[in,out] str the string which should be trimmed.
120 * @param charlist the list of characters to remove from beginning and end of string
121 * @return the result string.
122 */
6ab3bc95
RP
123std::string trim_mod(std::string& str, const std::string& charlist)
124{
125 // first: trim the beginning:
126 std::string::size_type pos= str.find_first_not_of (charlist);
127 if (pos == std::string::npos)
128 {
129 // whole string consists of charlist (or is already empty)
130 str.clear();
131 return str;
132 }
133 else if (pos>0)
134 {
135 // str starts with charlist
136 str.erase(0,pos);
137 }
138 // now let's look at the tail:
139 pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char!
140 if ( pos < str.size() )
141 {
142 str.erase(pos, str.size()-pos);
143 }
144 return str;
145} // eo trim_mod(std::string&,const std::string&)
6a93d84a
TJ
146
147
148
149/**
150 * removes last character from a string when it is in a list of chars to be removed.
151 * @param[in,out] str the string.
152 * @param what the list of chars which will be tested for.
153 * @return the resulting string with last char removed (if applicable)
154 */
6ab3bc95 155std::string chomp_mod(std::string& str, const std::string& what)
6a93d84a 156{
6ab3bc95
RP
157 if (str.empty() || what.empty() )
158 {
159 return str;
160 }
161 if (what.find(str.at (str.size()-1) ) != std::string::npos)
162 {
163 str.erase(str.size() - 1);
164 }
165 return str;
166} // eo chomp_mod(std::string&,const std::string&)
6a93d84a
TJ
167
168
169/**
170 * @brief converts a string to lower case.
171 * @param[in,out] str the string to modify.
172 * @return the string
173 */
6ab3bc95 174std::string to_lower_mod(std::string& str)
6a93d84a 175{
6ab3bc95
RP
176 std::transform(str.begin(), str.end(), str.begin(), LowerFunc() );
177 return str;
178} // eo to_lower_mod(std::string&)
6a93d84a
TJ
179
180
181/**
182 * @brief converts a string to upper case.
183 * @param[in,out] str the string to modify.
184 * @return the string
185 */
6ab3bc95 186std::string to_upper_mod(std::string& str)
6a93d84a 187{
6ab3bc95
RP
188 std::transform( str.begin(), str.end(), str.begin(), UpperFunc() );
189 return str;
190} // eo to_upper_mod(std::string&)
6a93d84a
TJ
191
192
193
194/**
195 * cut off characters from a given list from front and end of a string.
196 * @param str the string which should be trimmed.
197 * @param charlist the list of characters to remove from beginning and end of string
198 * @return the result string.
199 */
6ab3bc95
RP
200std::string trim (const std::string& str, const std::string& charlist)
201{
202 // first: trim the beginning:
203 std::string::size_type pos0= str.find_first_not_of(charlist);
204 if (pos0 == std::string::npos)
205 {
206 // whole string consists of charlist (or is already empty)
207 return std::string();
208 }
209 // now let's look at the end:
210 std::string::size_type pos1= str.find_last_not_of(charlist);
211 return str.substr(pos0, pos1 - pos0 + 1);
6a93d84a
TJ
212} // eo trim(const std:.string&,const std::string&)
213
214
215/**
216 * removes last character from a string when it is in a list of chars to be removed.
217 * @param str the string.
218 * @param what the list of chars which will be tested for.
219 * @return the resulting string with last char removed (if applicable)
220 */
6ab3bc95
RP
221std::string chomp (const std::string& str, const std::string& what)
222{
223 if (str.empty() || what.empty() )
224 {
225 return str;
226 }
227 if (what.find(str.at (str.size()-1) ) != std::string::npos)
228 {
229 return str.substr(0, str.size()-1);
230 }
231 return str;
6a93d84a
TJ
232} // eo chomp(const std:.string&,const std::string&)
233
234
235/**
236 * @brief returns a lower case version of a given string.
237 * @param str the string
238 * @return the lower case version of the string
239 */
6ab3bc95 240std::string to_lower (const std::string& str)
6a93d84a 241{
6ab3bc95
RP
242 std::string result(str);
243 return to_lower_mod(result);
244} // eo to_lower(const std::string&)
6a93d84a
TJ
245
246
247/**
248 * @brief returns a upper case version of a given string.
249 * @param str the string
250 * @return the upper case version of the string
251 */
6ab3bc95 252std::string to_upper(const std::string& str)
6a93d84a 253{
6ab3bc95
RP
254 std::string result(str);
255 return to_upper_mod(result);
256} // eo to_upper(const std::string&)
6a93d84a
TJ
257
258
259
260/**
261 * @brief removes a given suffix from a string.
262 * @param str the string.
263 * @param suffix the suffix which should be removed if the string ends with it.
264 * @return the string without the suffix.
265 *
266 * If the string ends with the suffix, it is removed. If the the string doesn't end
267 * with the suffix the original string is returned.
268 */
6ab3bc95 269std::string remove_suffix(const std::string& str, const std::string& suffix)
6a93d84a 270{
6ab3bc95
RP
271 if (has_suffix(str,suffix) )
272 {
273 return str.substr(0, str.size()-suffix.size() );
274 }
275 return str;
276} // eo remove_suffix(const std::string&,const std::string&)
6a93d84a
TJ
277
278
279
280/**
281 * @brief removes a given prefix from a string.
282 * @param str the string.
283 * @param prefix the prefix which should be removed if the string begins with it.
284 * @return the string without the prefix.
285 *
286 * If the string begins with the prefix, it is removed. If the the string doesn't begin
287 * with the prefix the original string is returned.
288 */
6ab3bc95 289std::string remove_prefix(const std::string& str, const std::string& prefix)
6a93d84a 290{
6ab3bc95
RP
291 if (has_prefix(str,prefix) )
292 {
293 return str.substr( prefix.size() );
294 }
295 return str;
296} // eo remove_prefix(const std::string&,const std::string&)
6a93d84a
TJ
297
298
299/**
300 * split a string to key and value delimited by a given delimiter.
6ab3bc95 301 * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end).
6a93d84a
TJ
302 * @param str the string which should be splitted.
303 * @param[out] key the resulting key
304 * @param[out] value the resulting value
305 * @param delimiter the delimiter between key and value; default is '='.
306 * @return @a true if the split was successful.
307 */
6ab3bc95
RP
308bool pair_split(
309 const std::string& str,
310 std::string& key,
311 std::string& value,
312 char delimiter)
313{
314 std::string::size_type pos = str.find (delimiter);
315 if (pos == std::string::npos) return false;
316 key= str.substr(0,pos);
317 value= str.substr(pos+1);
318 trim_mod(key);
319 trim_mod(value);
320 return true;
321} // eo pair_split(const std::string&,std::string&,std::string&,char)
6a93d84a
TJ
322
323
324/**
325 * splits a string by given delimiter
326 *
327 * @param[in] str the string which should be splitted.
328 * @param[out] result the list resulting from splitting @a str.
329 * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted.
330 * @param[in] omit_empty should empty parts not be stored?
331 * @param[in] trim_list list of characters the parts should be trimmed by.
332 * (empty string results in no trim)
333 */
6ab3bc95
RP
334void split_string(
335 const std::string& str,
336 std::list<std::string>& result,
337 const std::string& delimiter,
338 bool omit_empty,
339 const std::string& trim_list
6a93d84a
TJ
340)
341{
6ab3bc95
RP
342 std::string::size_type pos, last_pos=0;
343 bool delimiter_found= false;
344 while ( last_pos < str.size() && last_pos != std::string::npos)
345 {
346 pos= str.find(delimiter, last_pos);
347 std::string part;
348 if (pos == std::string::npos)
349 {
350 part= str.substr(last_pos);
351 delimiter_found= false;
352 }
353 else
354 {
355 part= str.substr(last_pos, pos-last_pos);
356 delimiter_found=true;
357 }
358 if (pos != std::string::npos)
359 {
360 last_pos= pos+ delimiter.size();
361 }
362 else
363 {
364 last_pos= std::string::npos;
365 }
366 if (!trim_list.empty() ) trim_mod (part, trim_list);
367 if (omit_empty && part.empty() ) continue;
368 result.push_back( part );
369 }
370 // if the string ends with a delimiter we need to append an empty string if no omit_empty
371 // was given.
372 // (this way we keep the split result consistent to a join operation)
373 if (delimiter_found && !omit_empty)
374 {
375 result.push_back("");
376 }
377} // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&)
6a93d84a
TJ
378
379
380/**
381 * splits a string by a given delimiter
382 * @param str the string which should be splitted.
383 * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted.
384 * @param[in] omit_empty should empty parts not be stored?
385 * @param[in] trim_list list of characters the parts should be trimmed by.
386 * (empty string results in no trim)
387 * @return the list resulting from splitting @a str.
388 */
6ab3bc95
RP
389std::list<std::string> split_string(
390 const std::string& str,
391 const std::string& delimiter,
392 bool omit_empty,
393 const std::string& trim_list
6a93d84a
TJ
394)
395{
6ab3bc95
RP
396 std::list<std::string> result;
397 split_string(str, result, delimiter, omit_empty, trim_list);
398 return result;
399} // eo split_string(const std::string&,const std::string&,bool,const std::string&)
6a93d84a
TJ
400
401
402/**
403 * @brief joins a list of strings into a single string.
404 *
6ab3bc95
RP
405 * This funtion is (basically) the reverse operation of @a split_string.
406 *
6a93d84a
TJ
407 * @param parts the list of strings.
408 * @param delimiter the delimiter which is inserted between the strings.
409 * @return the joined string.
410 */
6ab3bc95
RP
411std::string join_string(
412 const std::list< std::string >& parts,
413 const std::string& delimiter
6a93d84a
TJ
414)
415{
6ab3bc95
RP
416 std::string result;
417 if (! parts.empty() )
418 {
419 std::list< std::string >::const_iterator it= parts.begin();
420 result = *it;
421 while ( ++it != parts.end() )
422 {
423 result+= delimiter;
424 result+= *it;
425 }
426 }
427 return result;
428} // eo join_string(const std::list< std::string >&,const std::string&)
6a93d84a
TJ
429
430
431
432/*
433** conversions
434*/
435
436
437/**
438 * @brief returns a hex string from a binary string.
439 * @param str the (binary) string
440 * @param upper_case_digits determine whether to use upper case characters for digits A-F.
441 * @return the string in hex notation.
442 */
6ab3bc95
RP
443std::string convert_binary_to_hex(
444 const std::string& str,
445 bool upper_case_digits
6a93d84a
TJ
446)
447{
6ab3bc95
RP
448 std::string result;
449 std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower);
450 for ( std::string::const_iterator it= str.begin();
451 it != str.end();
452 ++it)
453 {
454 result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] );
455 result.push_back( hexDigits[ (*it) & 0x0f ] );
456 }
457 return result;
458} // eo convert_binary_to_hex(const std::string&,bool)
6a93d84a
TJ
459
460
461/**
462 * @brief converts a hex digit string to binary string.
463 * @param str hex digit string
464 * @return the binary string.
465 *
466 * The hex digit string may contains white spaces or colons which are treated
467 * as delimiters between hex digit groups.
468 *
469 * @todo rework the handling of half nibbles (consistency)!
470 */
6ab3bc95
RP
471std::string convert_hex_to_binary(
472 const std::string& str
6a93d84a 473)
6ab3bc95
RP
474throw (std::runtime_error)
475{
476 std::string result;
477 char c= 0;
478 bool hasNibble= false;
479 bool lastWasWS= true;
480 for ( std::string::const_iterator it= str.begin();
481 it != str.end();
482 ++it)
483 {
484 std::string::size_type p = hexDigitsLower.find( *it );
485 if (p== std::string::npos)
486 {
487 p= hexDigitsUpper.find( *it );
488 }
489 if (p == std::string::npos)
490 {
491 if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace?
6a93d84a 492 or ( *it == ':') // or a colon?
6ab3bc95
RP
493 )
494 {
495 // we treat that as a valid delimiter:
496 if (hasNibble)
6a93d84a 497 {
6ab3bc95
RP
498 // 1 nibble before WS is treate as lower part:
499 result.push_back(c);
500 // reset state:
501 hasNibble= false;
6a93d84a 502 }
6ab3bc95
RP
503 lastWasWS= true;
504 continue;
505 }
506 }
507 if (p == std::string::npos )
508 {
509 throw runtime_error("illegal character in hex digit string: " + str);
510 }
511 lastWasWS= false;
512 if (hasNibble)
513 {
514 c<<=4;
515 }
516 else
517 {
518 c=0;
519 }
520 c+= (p & 0x0f);
521 if (hasNibble)
522 {
523 //we already had a nibble, so a char is complete now:
524 result.push_back( c );
525 hasNibble=false;
526 }
527 else
528 {
529 // this is the first nibble of a new char:
530 hasNibble=true;
531 }
532 }
533 if (hasNibble)
534 {
535 //well, there is one nibble left
536 // let's do some heuristics:
537 if (lastWasWS)
538 {
539 // if the preceeding character was a white space (or a colon)
540 // we treat the nibble as lower part:
541 //( this is consistent with shortened hex notations where leading zeros are not noted)
542 result.push_back( c );
543 }
544 else
545 {
546 // if it was part of a hex digit chain, we treat it as UPPER part (!!)
547 result.push_back( c << 4 );
548 }
549 }
550 return result;
551} // eo convert_hex_to_binary(const std::string&)
552
553
554} // eo namespace I2n
555
556
557
6a93d84a 558
e93545dd
GE
559std::string iso_to_utf8(const std::string& isostring)
560{
6ab3bc95 561 string result;
118e216e 562
6ab3bc95 563 iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1");
118e216e 564
6ab3bc95
RP
565 if (iso_to_utf8 == (iconv_t)-1)
566 throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
118e216e 567
6ab3bc95
RP
568 size_t in_size=isostring.size();
569 size_t out_size=in_size*4;
118e216e 570
6ab3bc95
RP
571 char *buf = (char *)malloc(out_size+1);
572 if (buf == NULL)
573 throw runtime_error("out of memory for iconv buffer");
e93545dd 574
5a4ecb51 575 char *in = (char *)isostring.c_str();
6ab3bc95
RP
576 char *out = buf;
577 iconv(i2utf8, &in, &in_size, &out, &out_size);
118e216e 578
6ab3bc95 579 buf[isostring.size()*4-out_size]=0;
118e216e 580
6ab3bc95 581 result=buf;
118e216e 582
6ab3bc95
RP
583 free(buf);
584 iconv_close(i2utf8);
118e216e 585
6ab3bc95 586 return result;
e93545dd
GE
587}
588
589std::string utf8_to_iso(const std::string& utf8string)
590{
6ab3bc95 591 string result;
118e216e 592
6ab3bc95 593 iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8");
118e216e 594
6ab3bc95
RP
595 if (utf82iso == (iconv_t)-1)
596 throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
118e216e 597
6ab3bc95
RP
598 size_t in_size=utf8string.size();
599 size_t out_size=in_size;
118e216e 600
6ab3bc95
RP
601 char *buf = (char *)malloc(out_size+1);
602 if (buf == NULL)
603 throw runtime_error("out of memory for iconv buffer");
e93545dd 604
5a4ecb51 605 char *in = (char *)utf8string.c_str();
6ab3bc95
RP
606 char *out = buf;
607 iconv(utf82iso, &in, &in_size, &out, &out_size);
118e216e 608
6ab3bc95 609 buf[utf8string.size()-out_size]=0;
118e216e 610
6ab3bc95 611 result=buf;
118e216e 612
6ab3bc95
RP
613 free(buf);
614 iconv_close(utf82iso);
e93545dd 615
6ab3bc95 616 return result;
e93545dd
GE
617}
618
a5f3af6e
GE
619wchar_t* utf8_to_wbuf(const std::string& utf8string)
620{
6ab3bc95 621 iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8");
a5f3af6e 622
6ab3bc95
RP
623 if (utf82wstr == (iconv_t)-1)
624 throw runtime_error("iconv can't convert from UTF-8 to UCS-4");
a5f3af6e 625
6ab3bc95
RP
626 size_t in_size=utf8string.size();
627 size_t out_size= (in_size+1)*sizeof(wchar_t);
a5f3af6e 628
6ab3bc95
RP
629 wchar_t *buf = (wchar_t *)malloc(out_size);
630 if (buf == NULL)
631 throw runtime_error("out of memory for iconv buffer");
a5f3af6e 632
5a4ecb51 633 char *in = (char *)utf8string.c_str();
6ab3bc95 634 char *out = (char*) buf;
dbd6d77c 635 if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1)
6ab3bc95 636 throw runtime_error("error converting char encodings");
a5f3af6e 637
6ab3bc95 638 buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0;
a5f3af6e 639
6ab3bc95 640 iconv_close(utf82wstr);
a5f3af6e 641
6ab3bc95 642 return buf;
a5f3af6e
GE
643}
644
13cc4db1 645std::string utf7imap_to_utf8(const std::string& utf7imapstring)
d116a071 646{
6ab3bc95 647 string result;
118e216e 648
6ab3bc95 649 iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP");
118e216e 650
6ab3bc95
RP
651 if (utf7imap2utf8 == (iconv_t)-1)
652 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
118e216e 653
6ab3bc95
RP
654 size_t in_size=utf7imapstring.size();
655 size_t out_size=in_size*4;
118e216e 656
6ab3bc95
RP
657 char *buf = (char *)malloc(out_size+1);
658 if (buf == NULL)
659 throw runtime_error("out of memory for iconv buffer");
d116a071 660
5a4ecb51 661 char *in = (char *)utf7imapstring.c_str();
6ab3bc95
RP
662 char *out = buf;
663 iconv(utf7imap2utf8, &in, &in_size, &out, &out_size);
118e216e 664
6ab3bc95 665 buf[utf7imapstring.size()*4-out_size]=0;
118e216e 666
6ab3bc95 667 result=buf;
118e216e 668
6ab3bc95
RP
669 free(buf);
670 iconv_close(utf7imap2utf8);
118e216e 671
6ab3bc95 672 return result;
118e216e
TJ
673}
674
6a2b6dd1
TJ
675std::string utf8_to_utf7imap(const std::string& utf8string)
676{
6ab3bc95 677 string result;
6a2b6dd1 678
6ab3bc95 679 iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8");
6a2b6dd1 680
6ab3bc95
RP
681 if (utf82utf7imap == (iconv_t)-1)
682 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
6a2b6dd1 683
6ab3bc95
RP
684 // UTF-7 is base64 encoded, a buffer 10x as large
685 // as the utf-8 buffer should be enough. If not the string will be truncated.
686 size_t in_size=utf8string.size();
687 size_t out_size=in_size*10;
6a2b6dd1 688
6ab3bc95
RP
689 char *buf = (char *)malloc(out_size+1);
690 if (buf == NULL)
691 throw runtime_error("out of memory for iconv buffer");
6a2b6dd1 692
5a4ecb51 693 char *in = (char *)utf8string.c_str();
6ab3bc95
RP
694 char *out = buf;
695 iconv(utf82utf7imap, &in, &in_size, &out, &out_size);
6a2b6dd1 696
6ab3bc95 697 buf[utf8string.size()*10-out_size]= 0;
6a2b6dd1 698
6ab3bc95 699 result=buf;
6a2b6dd1 700
6ab3bc95
RP
701 free(buf);
702 iconv_close(utf82utf7imap);
6a2b6dd1 703
6ab3bc95 704 return result;
6a2b6dd1
TJ
705}
706
118e216e
TJ
707// Tokenize string by (html) tags
708void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
709{
6ab3bc95
RP
710 string::size_type pos, len = input.size();
711 bool inside_tag = false;
712 string current;
713
714 for (pos = 0; pos < len; pos++)
715 {
716 if (input[pos] == '<')
717 {
718 inside_tag = true;
719
720 if (!current.empty() )
721 {
722 tokenized.push_back( make_pair(current, false) );
723 current = "";
724 }
725
726 current += input[pos];
727 }
728 else if (input[pos] == '>' && inside_tag)
729 {
730 current += input[pos];
731 inside_tag = false;
732 if (!current.empty() )
733 {
734 tokenized.push_back( make_pair(current, true) );
735 current = "";
736 }
737 }
738 else
739 current += input[pos];
740 }
741
742 // String left over in buffer?
743 if (!current.empty() )
744 tokenized.push_back( make_pair(current, false) );
745} // eo tokenize_by_tag
118e216e 746
118e216e
TJ
747
748std::string strip_html_tags(const std::string &input)
749{
6ab3bc95
RP
750 // Pair first: string, second: isTag
751 vector<pair<string,bool> > tokenized;
752 tokenize_by_tag (tokenized, input);
118e216e 753
6ab3bc95
RP
754 string output;
755 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
756 for (token = tokenized.begin(); token != tokens_end; token++)
757 if (!token->second)
758 output += token->first;
759
760 return output;
761} // eo strip_html_tags
118e216e 762
118e216e
TJ
763
764// Smart-encode HTML en
765string smart_html_entities(const std::string &input)
766{
6ab3bc95
RP
767 // Pair first: string, second: isTag
768 vector<pair<string,bool> > tokenized;
769 tokenize_by_tag (tokenized, input);
770
771 string output;
772 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
773 for (token = tokenized.begin(); token != tokens_end; token++)
774 {
775 // keep HTML tags as they are
776 if (token->second)
777 output += token->first;
778 else
779 output += html_entities(token->first);
780 }
781
782 return output;
118e216e
TJ
783}
784
6ab3bc95 785
a5f3af6e
GE
786string::size_type find_8bit(const std::string &str)
787{
6ab3bc95
RP
788 string::size_type l=str.size();
789 for (string::size_type p=0; p < l; p++)
790 if (static_cast<unsigned char>(str[p]) > 127)
791 return p;
a5f3af6e 792
6ab3bc95 793 return string::npos;
a5f3af6e
GE
794}
795
118e216e
TJ
796// encoded UTF-8 chars into HTML entities
797string html_entities(std::string str)
798{
6ab3bc95
RP
799 // Normal chars
800 replace_all (str, "&", "&amp;");
6ab3bc95
RP
801 replace_all (str, "<", "&lt;");
802 replace_all (str, ">", "&gt;");
980577e1
TJ
803 replace_all (str, "\"", "&quot;");
804 replace_all (str, "'", "&#x27;");
805 replace_all (str, "/", "&#x2F;");
6ab3bc95
RP
806
807 // Umlauts
808 replace_all (str, "\xC3\xA4", "&auml;");
809 replace_all (str, "\xC3\xB6", "&ouml;");
810 replace_all (str, "\xC3\xBC", "&uuml;");
811 replace_all (str, "\xC3\x84", "&Auml;");
812 replace_all (str, "\xC3\x96", "&Ouml;");
813 replace_all (str, "\xC3\x9C", "&Uuml;");
814
815 // Misc
816 replace_all (str, "\xC3\x9F", "&szlig;");
817
818 // conversion of remaining non-ASCII chars needed?
819 // just do if needed because of performance
820 if (find_8bit(str) != string::npos)
821 {
822 // convert to fixed-size encoding UTF-32
823 wchar_t* wbuf=utf8_to_wbuf(str);
824 ostringstream target;
825
826 // replace all non-ASCII chars with HTML representation
827 for (int p=0; wbuf[p] != 0; p++)
828 {
829 unsigned int c=wbuf[p];
830
831 if (c <= 127)
832 target << static_cast<unsigned char>(c);
833 else
834 target << "&#" << c << ';';
835 }
836
837 free(wbuf);
838
839 str=target.str();
840 }
841
842 return str;
843} // eo html_entities(std::string)
844
118e216e 845
e93545dd
GE
846bool replace_all(string &base, const char *ist, const char *soll)
847{
6ab3bc95
RP
848 string i=ist;
849 string s=soll;
850 return replace_all(base,&i,&s);
e93545dd
GE
851}
852
853bool replace_all(string &base, const string &ist, const char *soll)
854{
6ab3bc95
RP
855 string s=soll;
856 return replace_all(base,&ist,&s);
e93545dd
GE
857}
858
859bool replace_all(string &base, const string *ist, const string *soll)
860{
6ab3bc95 861 return replace_all(base,*ist,*soll);
e93545dd
GE
862}
863
864bool replace_all(string &base, const char *ist, const string *soll)
865{
6ab3bc95
RP
866 string i=ist;
867 return replace_all(base,&i,soll);
e93545dd
GE
868}
869
870bool replace_all(string &base, const string &ist, const string &soll)
871{
6ab3bc95
RP
872 bool found_ist = false;
873 string::size_type a=0;
874
875 if (ist.empty() )
876 throw runtime_error ("replace_all called with empty search string");
e93545dd 877
6ab3bc95
RP
878 while ( (a=base.find(ist,a) ) != string::npos)
879 {
880 base.replace(a,ist.size(),soll);
881 a=a+soll.size();
882 found_ist = true;
883 }
1ec2064e 884
6ab3bc95 885 return found_ist;
e93545dd
GE
886}
887
e5b21dbb 888#if 0
e93545dd
GE
889string to_lower(const string &src)
890{
6ab3bc95 891 string dst = src;
e93545dd 892
6ab3bc95
RP
893 string::size_type pos, end = dst.size();
894 for (pos = 0; pos < end; pos++)
895 dst[pos] = tolower(dst[pos]);
e93545dd 896
6ab3bc95 897 return dst;
e93545dd
GE
898}
899
900string to_upper(const string &src)
901{
6ab3bc95 902 string dst = src;
e93545dd 903
6ab3bc95
RP
904 string::size_type pos, end = dst.size();
905 for (pos = 0; pos < end; pos++)
906 dst[pos] = toupper(dst[pos]);
e93545dd 907
6ab3bc95 908 return dst;
e93545dd 909}
e5b21dbb 910#endif
e93545dd 911
83809f5e 912const int MAX_UNIT_FORMAT_SYMBOLS = 6;
d1ea9075 913
2cb9a9c5 914const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
d1ea9075
GMF
915 " B",
916 " KB",
917 " MB",
918 " GB",
919 " TB",
83809f5e 920 " PB"
d1ea9075
GMF
921};
922
2cb9a9c5 923const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
5cb766b9
GMF
924 i18n_noop(" Bytes"),
925 i18n_noop(" KBytes"),
926 i18n_noop(" MBytes"),
927 i18n_noop(" GBytes"),
928 i18n_noop(" TBytes"),
83809f5e 929 i18n_noop(" PBytes")
d1ea9075
GMF
930};
931
72a94426
GMF
932
933long double rounding_upwards(
934 long double number,
935 const int rounding_multiplier
936)
937{
938 long double rounded_number;
939 rounded_number = number * rounding_multiplier;
940 rounded_number += 0.5;
941 rounded_number = (int64_t) (rounded_number);
942 rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier);
943
944 return rounded_number;
945}
946
947
81267544
GMF
948string nice_unit_format(
949 const int64_t input,
70fc0674
GMF
950 const UnitFormat format,
951 const UnitBase base
81267544 952)
6ab3bc95 953{
d1ea9075 954 // select the system of units (decimal or binary)
81267544 955 int multiple = 0;
a398513a 956 if (base == UnitBase1000)
81267544
GMF
957 {
958 multiple = 1000;
959 }
960 else
961 {
962 multiple = 1024;
963 }
964
965 long double size = input;
6ab3bc95 966
d1ea9075
GMF
967 // check the size of the input number to fit in the appropriate symbol
968 int sizecount = 0;
81267544 969 while (size > multiple)
6ab3bc95 970 {
81267544
GMF
971 size = size / multiple;
972 sizecount++;
83809f5e
GMF
973
974 // rollback to the previous values and stop the loop when cannot
975 // represent the number length.
976 if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS)
977 {
978 size = size * multiple;
979 sizecount--;
980 break;
981 }
6ab3bc95
RP
982 }
983
a398513a
GMF
984 // round the input number "half up" to multiples of 10
985 const int rounding_multiplier = 10;
72a94426 986 size = rounding_upwards(size, rounding_multiplier);
6ab3bc95 987
d1ea9075 988 // format the input number, placing the appropriate symbol
6ab3bc95 989 ostringstream out;
6ab3bc95 990 out.setf (ios::fixed);
a398513a 991 if (format == ShortUnitFormat)
d1ea9075
GMF
992 {
993 out.precision(1);
68d37a5c 994 out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() );
d1ea9075
GMF
995 }
996 else
6ab3bc95 997 {
d1ea9075 998 out.precision (2);
68d37a5c 999 out << size << i18n( longUnitFormatSymbols[sizecount].c_str() );
6ab3bc95
RP
1000 }
1001
1002 return out.str();
1003} // eo nice_unit_format(int input)
1004
e93545dd 1005
47c07fba
GE
1006string escape(const string &s)
1007{
6ab3bc95
RP
1008 string out(s);
1009 string::size_type p;
47c07fba 1010
6ab3bc95
RP
1011 p=0;
1012 while ( (p=out.find_first_of("\"\\",p) ) !=out.npos)
1013 {
1014 out.insert (p,"\\");
1015 p+=2;
1016 }
47c07fba 1017
6ab3bc95
RP
1018 p=0;
1019 while ( (p=out.find_first_of("\r",p) ) !=out.npos)
1020 {
1021 out.replace (p,1,"\\r");
1022 p+=2;
1023 }
47c07fba 1024
6ab3bc95
RP
1025 p=0;
1026 while ( (p=out.find_first_of("\n",p) ) !=out.npos)
1027 {
1028 out.replace (p,1,"\\n");
1029 p+=2;
1030 }
47c07fba 1031
6ab3bc95 1032 out='"'+out+'"';
47c07fba 1033
6ab3bc95
RP
1034 return out;
1035} // eo scape(const std::string&)
47c07fba 1036
47c07fba 1037
6ab3bc95
RP
1038string descape(const string &s, int startpos, int &endpos)
1039{
1040 string out;
1041
1042 if (s.at(startpos) != '"')
1043 throw out_of_range("value not type escaped string");
1044
1045 out=s.substr(startpos+1);
1046 string::size_type p=0;
1047
1048 // search for the end of the string
1049 while ( (p=out.find("\"",p) ) !=out.npos)
1050 {
1051 int e=p-1;
1052 bool escaped=false;
1053
1054 // the " might be escaped with a backslash
1055 while (e>=0 && out.at (e) =='\\')
1056 {
1057 if (escaped == false)
1058 escaped=true;
1059 else
1060 escaped=false;
1061
1062 e--;
1063 }
1064
1065 if (escaped==false)
1066 break;
1067 else
1068 p++;
1069 }
1070
1071 // we now have the end of the string
1072 out=out.substr(0,p);
1073
1074 // tell calling prog about the endposition
1075 endpos=startpos+p+1;
1076
1077 // descape all \ stuff inside the string now
1078 p=0;
1079 while ( (p=out.find_first_of("\\",p) ) !=out.npos)
1080 {
1081 switch (out.at(p+1) )
1082 {
1083 case 'r':
47c07fba
GE
1084 out.replace(p,2,"\r");
1085 break;
6ab3bc95 1086 case 'n':
47c07fba
GE
1087 out.replace(p,2,"\n");
1088 break;
6ab3bc95 1089 default:
47c07fba 1090 out.erase(p,1);
6ab3bc95
RP
1091 }
1092 p++;
1093 }
1094
1095 return out;
1096} // eo descape(const std::string&,int,int&)
47c07fba 1097
e93545dd 1098
47c07fba
GE
1099string escape_shellarg(const string &input)
1100{
6ab3bc95
RP
1101 string output = "'";
1102 string::const_iterator it, it_end = input.end();
1103 for (it = input.begin(); it != it_end; it++)
1104 {
1105 if ( (*it) == '\'')
1106 output += "'\\'";
1107
1108 output += *it;
1109 }
1110
1111 output += "'";
1112 return output;
47c07fba 1113}