Removed the exabytes, zettabytes and yottabytes representations from nice_unit_format()
[libi2ncommon] / src / stringfunc.cpp
CommitLineData
6a93d84a
TJ
1/** @file
2 *
3 * (c) Copyright 2007-2008 by Intra2net AG
6ab3bc95 4 *
6a93d84a
TJ
5 * info@intra2net.com
6 */
e93545dd
GE
7
8#include <iostream>
9#include <string>
10#include <sstream>
11#include <stdexcept>
5efd35b1 12#include <algorithm>
e93545dd 13
a5f3af6e 14#include <wchar.h>
e93545dd
GE
15#include <stdlib.h>
16#include <iconv.h>
17#include <i18n.h>
18
19#include <stringfunc.hxx>
20
21using namespace std;
22
6ab3bc95
RP
23namespace I2n
24{
6a93d84a
TJ
25
26
6ab3bc95
RP
27namespace
28{
6a93d84a
TJ
29
30const std::string hexDigitsLower("0123456789abcdef");
31const std::string hexDigitsUpper("0123456789ABCDEF");
32
33
34struct UpperFunc
35{
6ab3bc95
RP
36 char operator() (char c)
37 {
38 return std::toupper(c);
39 }
6a93d84a
TJ
40}; // eo struct UpperFunc
41
42
43struct LowerFunc
44{
6ab3bc95
RP
45 char operator() (char c)
46 {
47 return std::tolower(c);
48 }
6a93d84a
TJ
49}; // eo struct LowerFunc
50
51
52} // eo namespace <anonymous>
53
54
55
56/**
6ab3bc95 57 * default list of Whitespaces (" \t\r\n");
6a93d84a 58 */
6ab3bc95 59const std::string Whitespaces = " \t\r\n";
6a93d84a
TJ
60
61/**
62 * default list of lineendings ("\r\n");
63 */
6ab3bc95 64const std::string LineEndings= "\r\n";
6a93d84a
TJ
65
66
67
68/**
69 * @brief checks if a string begins with a given prefix.
70 * @param[in,out] str the string which is tested
71 * @param prefix the prefix which should be tested for.
72 * @return @a true iff the prefix is not empty and the string begins with that prefix.
73 */
6ab3bc95 74bool has_prefix(const std::string& str, const std::string& prefix)
6a93d84a 75{
6ab3bc95
RP
76 if (prefix.empty() || str.empty() || str.size() < prefix.size() )
77 {
78 return false;
79 }
80 return str.compare(0, prefix.size(), prefix) == 0;
81} // eo has_prefix(const std::string&,const std::string&)
6a93d84a
TJ
82
83
84/**
85 * @brief checks if a string ends with a given suffix.
86 * @param[in,out] str the string which is tested
87 * @param suffix the suffix which should be tested for.
88 * @return @a true iff the suffix is not empty and the string ends with that suffix.
89 */
6ab3bc95 90bool has_suffix(const std::string& str, const std::string& suffix)
6a93d84a 91{
6ab3bc95
RP
92 if (suffix.empty() || str.empty() || str.size() < suffix.size() )
93 {
94 return false;
95 }
96 return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
97} // eo has_suffix(const std::string&,const std::string&)
6a93d84a
TJ
98
99
100/**
101 * cut off characters from a given list from front and end of a string.
102 * @param[in,out] str the string which should be trimmed.
103 * @param charlist the list of characters to remove from beginning and end of string
104 * @return the result string.
105 */
6ab3bc95
RP
106std::string trim_mod(std::string& str, const std::string& charlist)
107{
108 // first: trim the beginning:
109 std::string::size_type pos= str.find_first_not_of (charlist);
110 if (pos == std::string::npos)
111 {
112 // whole string consists of charlist (or is already empty)
113 str.clear();
114 return str;
115 }
116 else if (pos>0)
117 {
118 // str starts with charlist
119 str.erase(0,pos);
120 }
121 // now let's look at the tail:
122 pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char!
123 if ( pos < str.size() )
124 {
125 str.erase(pos, str.size()-pos);
126 }
127 return str;
128} // eo trim_mod(std::string&,const std::string&)
6a93d84a
TJ
129
130
131
132/**
133 * removes last character from a string when it is in a list of chars to be removed.
134 * @param[in,out] str the string.
135 * @param what the list of chars which will be tested for.
136 * @return the resulting string with last char removed (if applicable)
137 */
6ab3bc95 138std::string chomp_mod(std::string& str, const std::string& what)
6a93d84a 139{
6ab3bc95
RP
140 if (str.empty() || what.empty() )
141 {
142 return str;
143 }
144 if (what.find(str.at (str.size()-1) ) != std::string::npos)
145 {
146 str.erase(str.size() - 1);
147 }
148 return str;
149} // eo chomp_mod(std::string&,const std::string&)
6a93d84a
TJ
150
151
152/**
153 * @brief converts a string to lower case.
154 * @param[in,out] str the string to modify.
155 * @return the string
156 */
6ab3bc95 157std::string to_lower_mod(std::string& str)
6a93d84a 158{
6ab3bc95
RP
159 std::transform(str.begin(), str.end(), str.begin(), LowerFunc() );
160 return str;
161} // eo to_lower_mod(std::string&)
6a93d84a
TJ
162
163
164/**
165 * @brief converts a string to upper case.
166 * @param[in,out] str the string to modify.
167 * @return the string
168 */
6ab3bc95 169std::string to_upper_mod(std::string& str)
6a93d84a 170{
6ab3bc95
RP
171 std::transform( str.begin(), str.end(), str.begin(), UpperFunc() );
172 return str;
173} // eo to_upper_mod(std::string&)
6a93d84a
TJ
174
175
176
177/**
178 * cut off characters from a given list from front and end of a string.
179 * @param str the string which should be trimmed.
180 * @param charlist the list of characters to remove from beginning and end of string
181 * @return the result string.
182 */
6ab3bc95
RP
183std::string trim (const std::string& str, const std::string& charlist)
184{
185 // first: trim the beginning:
186 std::string::size_type pos0= str.find_first_not_of(charlist);
187 if (pos0 == std::string::npos)
188 {
189 // whole string consists of charlist (or is already empty)
190 return std::string();
191 }
192 // now let's look at the end:
193 std::string::size_type pos1= str.find_last_not_of(charlist);
194 return str.substr(pos0, pos1 - pos0 + 1);
6a93d84a
TJ
195} // eo trim(const std:.string&,const std::string&)
196
197
198/**
199 * removes last character from a string when it is in a list of chars to be removed.
200 * @param str the string.
201 * @param what the list of chars which will be tested for.
202 * @return the resulting string with last char removed (if applicable)
203 */
6ab3bc95
RP
204std::string chomp (const std::string& str, const std::string& what)
205{
206 if (str.empty() || what.empty() )
207 {
208 return str;
209 }
210 if (what.find(str.at (str.size()-1) ) != std::string::npos)
211 {
212 return str.substr(0, str.size()-1);
213 }
214 return str;
6a93d84a
TJ
215} // eo chomp(const std:.string&,const std::string&)
216
217
218/**
219 * @brief returns a lower case version of a given string.
220 * @param str the string
221 * @return the lower case version of the string
222 */
6ab3bc95 223std::string to_lower (const std::string& str)
6a93d84a 224{
6ab3bc95
RP
225 std::string result(str);
226 return to_lower_mod(result);
227} // eo to_lower(const std::string&)
6a93d84a
TJ
228
229
230/**
231 * @brief returns a upper case version of a given string.
232 * @param str the string
233 * @return the upper case version of the string
234 */
6ab3bc95 235std::string to_upper(const std::string& str)
6a93d84a 236{
6ab3bc95
RP
237 std::string result(str);
238 return to_upper_mod(result);
239} // eo to_upper(const std::string&)
6a93d84a
TJ
240
241
242
243/**
244 * @brief removes a given suffix from a string.
245 * @param str the string.
246 * @param suffix the suffix which should be removed if the string ends with it.
247 * @return the string without the suffix.
248 *
249 * If the string ends with the suffix, it is removed. If the the string doesn't end
250 * with the suffix the original string is returned.
251 */
6ab3bc95 252std::string remove_suffix(const std::string& str, const std::string& suffix)
6a93d84a 253{
6ab3bc95
RP
254 if (has_suffix(str,suffix) )
255 {
256 return str.substr(0, str.size()-suffix.size() );
257 }
258 return str;
259} // eo remove_suffix(const std::string&,const std::string&)
6a93d84a
TJ
260
261
262
263/**
264 * @brief removes a given prefix from a string.
265 * @param str the string.
266 * @param prefix the prefix which should be removed if the string begins with it.
267 * @return the string without the prefix.
268 *
269 * If the string begins with the prefix, it is removed. If the the string doesn't begin
270 * with the prefix the original string is returned.
271 */
6ab3bc95 272std::string remove_prefix(const std::string& str, const std::string& prefix)
6a93d84a 273{
6ab3bc95
RP
274 if (has_prefix(str,prefix) )
275 {
276 return str.substr( prefix.size() );
277 }
278 return str;
279} // eo remove_prefix(const std::string&,const std::string&)
6a93d84a
TJ
280
281
282/**
283 * split a string to key and value delimited by a given delimiter.
6ab3bc95 284 * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end).
6a93d84a
TJ
285 * @param str the string which should be splitted.
286 * @param[out] key the resulting key
287 * @param[out] value the resulting value
288 * @param delimiter the delimiter between key and value; default is '='.
289 * @return @a true if the split was successful.
290 */
6ab3bc95
RP
291bool pair_split(
292 const std::string& str,
293 std::string& key,
294 std::string& value,
295 char delimiter)
296{
297 std::string::size_type pos = str.find (delimiter);
298 if (pos == std::string::npos) return false;
299 key= str.substr(0,pos);
300 value= str.substr(pos+1);
301 trim_mod(key);
302 trim_mod(value);
303 return true;
304} // eo pair_split(const std::string&,std::string&,std::string&,char)
6a93d84a
TJ
305
306
307/**
308 * splits a string by given delimiter
309 *
310 * @param[in] str the string which should be splitted.
311 * @param[out] result the list resulting from splitting @a str.
312 * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted.
313 * @param[in] omit_empty should empty parts not be stored?
314 * @param[in] trim_list list of characters the parts should be trimmed by.
315 * (empty string results in no trim)
316 */
6ab3bc95
RP
317void split_string(
318 const std::string& str,
319 std::list<std::string>& result,
320 const std::string& delimiter,
321 bool omit_empty,
322 const std::string& trim_list
6a93d84a
TJ
323)
324{
6ab3bc95
RP
325 std::string::size_type pos, last_pos=0;
326 bool delimiter_found= false;
327 while ( last_pos < str.size() && last_pos != std::string::npos)
328 {
329 pos= str.find(delimiter, last_pos);
330 std::string part;
331 if (pos == std::string::npos)
332 {
333 part= str.substr(last_pos);
334 delimiter_found= false;
335 }
336 else
337 {
338 part= str.substr(last_pos, pos-last_pos);
339 delimiter_found=true;
340 }
341 if (pos != std::string::npos)
342 {
343 last_pos= pos+ delimiter.size();
344 }
345 else
346 {
347 last_pos= std::string::npos;
348 }
349 if (!trim_list.empty() ) trim_mod (part, trim_list);
350 if (omit_empty && part.empty() ) continue;
351 result.push_back( part );
352 }
353 // if the string ends with a delimiter we need to append an empty string if no omit_empty
354 // was given.
355 // (this way we keep the split result consistent to a join operation)
356 if (delimiter_found && !omit_empty)
357 {
358 result.push_back("");
359 }
360} // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&)
6a93d84a
TJ
361
362
363/**
364 * splits a string by a given delimiter
365 * @param str the string which should be splitted.
366 * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted.
367 * @param[in] omit_empty should empty parts not be stored?
368 * @param[in] trim_list list of characters the parts should be trimmed by.
369 * (empty string results in no trim)
370 * @return the list resulting from splitting @a str.
371 */
6ab3bc95
RP
372std::list<std::string> split_string(
373 const std::string& str,
374 const std::string& delimiter,
375 bool omit_empty,
376 const std::string& trim_list
6a93d84a
TJ
377)
378{
6ab3bc95
RP
379 std::list<std::string> result;
380 split_string(str, result, delimiter, omit_empty, trim_list);
381 return result;
382} // eo split_string(const std::string&,const std::string&,bool,const std::string&)
6a93d84a
TJ
383
384
385/**
386 * @brief joins a list of strings into a single string.
387 *
6ab3bc95
RP
388 * This funtion is (basically) the reverse operation of @a split_string.
389 *
6a93d84a
TJ
390 * @param parts the list of strings.
391 * @param delimiter the delimiter which is inserted between the strings.
392 * @return the joined string.
393 */
6ab3bc95
RP
394std::string join_string(
395 const std::list< std::string >& parts,
396 const std::string& delimiter
6a93d84a
TJ
397)
398{
6ab3bc95
RP
399 std::string result;
400 if (! parts.empty() )
401 {
402 std::list< std::string >::const_iterator it= parts.begin();
403 result = *it;
404 while ( ++it != parts.end() )
405 {
406 result+= delimiter;
407 result+= *it;
408 }
409 }
410 return result;
411} // eo join_string(const std::list< std::string >&,const std::string&)
6a93d84a
TJ
412
413
414
415/*
416** conversions
417*/
418
419
420/**
421 * @brief returns a hex string from a binary string.
422 * @param str the (binary) string
423 * @param upper_case_digits determine whether to use upper case characters for digits A-F.
424 * @return the string in hex notation.
425 */
6ab3bc95
RP
426std::string convert_binary_to_hex(
427 const std::string& str,
428 bool upper_case_digits
6a93d84a
TJ
429)
430{
6ab3bc95
RP
431 std::string result;
432 std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower);
433 for ( std::string::const_iterator it= str.begin();
434 it != str.end();
435 ++it)
436 {
437 result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] );
438 result.push_back( hexDigits[ (*it) & 0x0f ] );
439 }
440 return result;
441} // eo convert_binary_to_hex(const std::string&,bool)
6a93d84a
TJ
442
443
444/**
445 * @brief converts a hex digit string to binary string.
446 * @param str hex digit string
447 * @return the binary string.
448 *
449 * The hex digit string may contains white spaces or colons which are treated
450 * as delimiters between hex digit groups.
451 *
452 * @todo rework the handling of half nibbles (consistency)!
453 */
6ab3bc95
RP
454std::string convert_hex_to_binary(
455 const std::string& str
6a93d84a 456)
6ab3bc95
RP
457throw (std::runtime_error)
458{
459 std::string result;
460 char c= 0;
461 bool hasNibble= false;
462 bool lastWasWS= true;
463 for ( std::string::const_iterator it= str.begin();
464 it != str.end();
465 ++it)
466 {
467 std::string::size_type p = hexDigitsLower.find( *it );
468 if (p== std::string::npos)
469 {
470 p= hexDigitsUpper.find( *it );
471 }
472 if (p == std::string::npos)
473 {
474 if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace?
6a93d84a 475 or ( *it == ':') // or a colon?
6ab3bc95
RP
476 )
477 {
478 // we treat that as a valid delimiter:
479 if (hasNibble)
6a93d84a 480 {
6ab3bc95
RP
481 // 1 nibble before WS is treate as lower part:
482 result.push_back(c);
483 // reset state:
484 hasNibble= false;
6a93d84a 485 }
6ab3bc95
RP
486 lastWasWS= true;
487 continue;
488 }
489 }
490 if (p == std::string::npos )
491 {
492 throw runtime_error("illegal character in hex digit string: " + str);
493 }
494 lastWasWS= false;
495 if (hasNibble)
496 {
497 c<<=4;
498 }
499 else
500 {
501 c=0;
502 }
503 c+= (p & 0x0f);
504 if (hasNibble)
505 {
506 //we already had a nibble, so a char is complete now:
507 result.push_back( c );
508 hasNibble=false;
509 }
510 else
511 {
512 // this is the first nibble of a new char:
513 hasNibble=true;
514 }
515 }
516 if (hasNibble)
517 {
518 //well, there is one nibble left
519 // let's do some heuristics:
520 if (lastWasWS)
521 {
522 // if the preceeding character was a white space (or a colon)
523 // we treat the nibble as lower part:
524 //( this is consistent with shortened hex notations where leading zeros are not noted)
525 result.push_back( c );
526 }
527 else
528 {
529 // if it was part of a hex digit chain, we treat it as UPPER part (!!)
530 result.push_back( c << 4 );
531 }
532 }
533 return result;
534} // eo convert_hex_to_binary(const std::string&)
535
536
537} // eo namespace I2n
538
539
540
6a93d84a 541
e93545dd
GE
542std::string iso_to_utf8(const std::string& isostring)
543{
6ab3bc95 544 string result;
118e216e 545
6ab3bc95 546 iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1");
118e216e 547
6ab3bc95
RP
548 if (iso_to_utf8 == (iconv_t)-1)
549 throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
118e216e 550
6ab3bc95
RP
551 size_t in_size=isostring.size();
552 size_t out_size=in_size*4;
118e216e 553
6ab3bc95
RP
554 char *buf = (char *)malloc(out_size+1);
555 if (buf == NULL)
556 throw runtime_error("out of memory for iconv buffer");
e93545dd 557
5a4ecb51 558 char *in = (char *)isostring.c_str();
6ab3bc95
RP
559 char *out = buf;
560 iconv(i2utf8, &in, &in_size, &out, &out_size);
118e216e 561
6ab3bc95 562 buf[isostring.size()*4-out_size]=0;
118e216e 563
6ab3bc95 564 result=buf;
118e216e 565
6ab3bc95
RP
566 free(buf);
567 iconv_close(i2utf8);
118e216e 568
6ab3bc95 569 return result;
e93545dd
GE
570}
571
572std::string utf8_to_iso(const std::string& utf8string)
573{
6ab3bc95 574 string result;
118e216e 575
6ab3bc95 576 iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8");
118e216e 577
6ab3bc95
RP
578 if (utf82iso == (iconv_t)-1)
579 throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
118e216e 580
6ab3bc95
RP
581 size_t in_size=utf8string.size();
582 size_t out_size=in_size;
118e216e 583
6ab3bc95
RP
584 char *buf = (char *)malloc(out_size+1);
585 if (buf == NULL)
586 throw runtime_error("out of memory for iconv buffer");
e93545dd 587
5a4ecb51 588 char *in = (char *)utf8string.c_str();
6ab3bc95
RP
589 char *out = buf;
590 iconv(utf82iso, &in, &in_size, &out, &out_size);
118e216e 591
6ab3bc95 592 buf[utf8string.size()-out_size]=0;
118e216e 593
6ab3bc95 594 result=buf;
118e216e 595
6ab3bc95
RP
596 free(buf);
597 iconv_close(utf82iso);
e93545dd 598
6ab3bc95 599 return result;
e93545dd
GE
600}
601
a5f3af6e
GE
602wchar_t* utf8_to_wbuf(const std::string& utf8string)
603{
6ab3bc95 604 iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8");
a5f3af6e 605
6ab3bc95
RP
606 if (utf82wstr == (iconv_t)-1)
607 throw runtime_error("iconv can't convert from UTF-8 to UCS-4");
a5f3af6e 608
6ab3bc95
RP
609 size_t in_size=utf8string.size();
610 size_t out_size= (in_size+1)*sizeof(wchar_t);
a5f3af6e 611
6ab3bc95
RP
612 wchar_t *buf = (wchar_t *)malloc(out_size);
613 if (buf == NULL)
614 throw runtime_error("out of memory for iconv buffer");
a5f3af6e 615
5a4ecb51 616 char *in = (char *)utf8string.c_str();
6ab3bc95
RP
617 char *out = (char*) buf;
618 if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == -1)
619 throw runtime_error("error converting char encodings");
a5f3af6e 620
6ab3bc95 621 buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0;
a5f3af6e 622
6ab3bc95 623 iconv_close(utf82wstr);
a5f3af6e 624
6ab3bc95 625 return buf;
a5f3af6e
GE
626}
627
13cc4db1 628std::string utf7imap_to_utf8(const std::string& utf7imapstring)
d116a071 629{
6ab3bc95 630 string result;
118e216e 631
6ab3bc95 632 iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP");
118e216e 633
6ab3bc95
RP
634 if (utf7imap2utf8 == (iconv_t)-1)
635 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
118e216e 636
6ab3bc95
RP
637 size_t in_size=utf7imapstring.size();
638 size_t out_size=in_size*4;
118e216e 639
6ab3bc95
RP
640 char *buf = (char *)malloc(out_size+1);
641 if (buf == NULL)
642 throw runtime_error("out of memory for iconv buffer");
d116a071 643
5a4ecb51 644 char *in = (char *)utf7imapstring.c_str();
6ab3bc95
RP
645 char *out = buf;
646 iconv(utf7imap2utf8, &in, &in_size, &out, &out_size);
118e216e 647
6ab3bc95 648 buf[utf7imapstring.size()*4-out_size]=0;
118e216e 649
6ab3bc95 650 result=buf;
118e216e 651
6ab3bc95
RP
652 free(buf);
653 iconv_close(utf7imap2utf8);
118e216e 654
6ab3bc95 655 return result;
118e216e
TJ
656}
657
6a2b6dd1
TJ
658std::string utf8_to_utf7imap(const std::string& utf8string)
659{
6ab3bc95 660 string result;
6a2b6dd1 661
6ab3bc95 662 iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8");
6a2b6dd1 663
6ab3bc95
RP
664 if (utf82utf7imap == (iconv_t)-1)
665 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
6a2b6dd1 666
6ab3bc95
RP
667 // UTF-7 is base64 encoded, a buffer 10x as large
668 // as the utf-8 buffer should be enough. If not the string will be truncated.
669 size_t in_size=utf8string.size();
670 size_t out_size=in_size*10;
6a2b6dd1 671
6ab3bc95
RP
672 char *buf = (char *)malloc(out_size+1);
673 if (buf == NULL)
674 throw runtime_error("out of memory for iconv buffer");
6a2b6dd1 675
5a4ecb51 676 char *in = (char *)utf8string.c_str();
6ab3bc95
RP
677 char *out = buf;
678 iconv(utf82utf7imap, &in, &in_size, &out, &out_size);
6a2b6dd1 679
6ab3bc95 680 buf[utf8string.size()*10-out_size]= 0;
6a2b6dd1 681
6ab3bc95 682 result=buf;
6a2b6dd1 683
6ab3bc95
RP
684 free(buf);
685 iconv_close(utf82utf7imap);
6a2b6dd1 686
6ab3bc95 687 return result;
6a2b6dd1
TJ
688}
689
118e216e
TJ
690// Tokenize string by (html) tags
691void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
692{
6ab3bc95
RP
693 string::size_type pos, len = input.size();
694 bool inside_tag = false;
695 string current;
696
697 for (pos = 0; pos < len; pos++)
698 {
699 if (input[pos] == '<')
700 {
701 inside_tag = true;
702
703 if (!current.empty() )
704 {
705 tokenized.push_back( make_pair(current, false) );
706 current = "";
707 }
708
709 current += input[pos];
710 }
711 else if (input[pos] == '>' && inside_tag)
712 {
713 current += input[pos];
714 inside_tag = false;
715 if (!current.empty() )
716 {
717 tokenized.push_back( make_pair(current, true) );
718 current = "";
719 }
720 }
721 else
722 current += input[pos];
723 }
724
725 // String left over in buffer?
726 if (!current.empty() )
727 tokenized.push_back( make_pair(current, false) );
728} // eo tokenize_by_tag
118e216e 729
118e216e
TJ
730
731std::string strip_html_tags(const std::string &input)
732{
6ab3bc95
RP
733 // Pair first: string, second: isTag
734 vector<pair<string,bool> > tokenized;
735 tokenize_by_tag (tokenized, input);
118e216e 736
6ab3bc95
RP
737 string output;
738 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
739 for (token = tokenized.begin(); token != tokens_end; token++)
740 if (!token->second)
741 output += token->first;
742
743 return output;
744} // eo strip_html_tags
118e216e 745
118e216e
TJ
746
747// Smart-encode HTML en
748string smart_html_entities(const std::string &input)
749{
6ab3bc95
RP
750 // Pair first: string, second: isTag
751 vector<pair<string,bool> > tokenized;
752 tokenize_by_tag (tokenized, input);
753
754 string output;
755 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
756 for (token = tokenized.begin(); token != tokens_end; token++)
757 {
758 // keep HTML tags as they are
759 if (token->second)
760 output += token->first;
761 else
762 output += html_entities(token->first);
763 }
764
765 return output;
118e216e
TJ
766}
767
6ab3bc95 768
a5f3af6e
GE
769string::size_type find_8bit(const std::string &str)
770{
6ab3bc95
RP
771 string::size_type l=str.size();
772 for (string::size_type p=0; p < l; p++)
773 if (static_cast<unsigned char>(str[p]) > 127)
774 return p;
a5f3af6e 775
6ab3bc95 776 return string::npos;
a5f3af6e
GE
777}
778
118e216e
TJ
779// encoded UTF-8 chars into HTML entities
780string html_entities(std::string str)
781{
6ab3bc95
RP
782 // Normal chars
783 replace_all (str, "&", "&amp;");
6ab3bc95
RP
784 replace_all (str, "<", "&lt;");
785 replace_all (str, ">", "&gt;");
980577e1
TJ
786 replace_all (str, "\"", "&quot;");
787 replace_all (str, "'", "&#x27;");
788 replace_all (str, "/", "&#x2F;");
6ab3bc95
RP
789
790 // Umlauts
791 replace_all (str, "\xC3\xA4", "&auml;");
792 replace_all (str, "\xC3\xB6", "&ouml;");
793 replace_all (str, "\xC3\xBC", "&uuml;");
794 replace_all (str, "\xC3\x84", "&Auml;");
795 replace_all (str, "\xC3\x96", "&Ouml;");
796 replace_all (str, "\xC3\x9C", "&Uuml;");
797
798 // Misc
799 replace_all (str, "\xC3\x9F", "&szlig;");
800
801 // conversion of remaining non-ASCII chars needed?
802 // just do if needed because of performance
803 if (find_8bit(str) != string::npos)
804 {
805 // convert to fixed-size encoding UTF-32
806 wchar_t* wbuf=utf8_to_wbuf(str);
807 ostringstream target;
808
809 // replace all non-ASCII chars with HTML representation
810 for (int p=0; wbuf[p] != 0; p++)
811 {
812 unsigned int c=wbuf[p];
813
814 if (c <= 127)
815 target << static_cast<unsigned char>(c);
816 else
817 target << "&#" << c << ';';
818 }
819
820 free(wbuf);
821
822 str=target.str();
823 }
824
825 return str;
826} // eo html_entities(std::string)
827
118e216e 828
e93545dd
GE
829bool replace_all(string &base, const char *ist, const char *soll)
830{
6ab3bc95
RP
831 string i=ist;
832 string s=soll;
833 return replace_all(base,&i,&s);
e93545dd
GE
834}
835
836bool replace_all(string &base, const string &ist, const char *soll)
837{
6ab3bc95
RP
838 string s=soll;
839 return replace_all(base,&ist,&s);
e93545dd
GE
840}
841
842bool replace_all(string &base, const string *ist, const string *soll)
843{
6ab3bc95 844 return replace_all(base,*ist,*soll);
e93545dd
GE
845}
846
847bool replace_all(string &base, const char *ist, const string *soll)
848{
6ab3bc95
RP
849 string i=ist;
850 return replace_all(base,&i,soll);
e93545dd
GE
851}
852
853bool replace_all(string &base, const string &ist, const string &soll)
854{
6ab3bc95
RP
855 bool found_ist = false;
856 string::size_type a=0;
857
858 if (ist.empty() )
859 throw runtime_error ("replace_all called with empty search string");
e93545dd 860
6ab3bc95
RP
861 while ( (a=base.find(ist,a) ) != string::npos)
862 {
863 base.replace(a,ist.size(),soll);
864 a=a+soll.size();
865 found_ist = true;
866 }
1ec2064e 867
6ab3bc95 868 return found_ist;
e93545dd
GE
869}
870
e5b21dbb 871#if 0
e93545dd
GE
872string to_lower(const string &src)
873{
6ab3bc95 874 string dst = src;
e93545dd 875
6ab3bc95
RP
876 string::size_type pos, end = dst.size();
877 for (pos = 0; pos < end; pos++)
878 dst[pos] = tolower(dst[pos]);
e93545dd 879
6ab3bc95 880 return dst;
e93545dd
GE
881}
882
883string to_upper(const string &src)
884{
6ab3bc95 885 string dst = src;
e93545dd 886
6ab3bc95
RP
887 string::size_type pos, end = dst.size();
888 for (pos = 0; pos < end; pos++)
889 dst[pos] = toupper(dst[pos]);
e93545dd 890
6ab3bc95 891 return dst;
e93545dd 892}
e5b21dbb 893#endif
e93545dd 894
83809f5e 895const int MAX_UNIT_FORMAT_SYMBOLS = 6;
d1ea9075 896
2cb9a9c5 897const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
d1ea9075
GMF
898 " B",
899 " KB",
900 " MB",
901 " GB",
902 " TB",
83809f5e 903 " PB"
d1ea9075
GMF
904};
905
2cb9a9c5 906const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
5cb766b9
GMF
907 i18n_noop(" Bytes"),
908 i18n_noop(" KBytes"),
909 i18n_noop(" MBytes"),
910 i18n_noop(" GBytes"),
911 i18n_noop(" TBytes"),
83809f5e 912 i18n_noop(" PBytes")
d1ea9075
GMF
913};
914
72a94426
GMF
915
916long double rounding_upwards(
917 long double number,
918 const int rounding_multiplier
919)
920{
921 long double rounded_number;
922 rounded_number = number * rounding_multiplier;
923 rounded_number += 0.5;
924 rounded_number = (int64_t) (rounded_number);
925 rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier);
926
927 return rounded_number;
928}
929
930
81267544
GMF
931string nice_unit_format(
932 const int64_t input,
a398513a
GMF
933 const UnitBase base,
934 const UnitFormat format
81267544 935)
6ab3bc95 936{
d1ea9075 937 // select the system of units (decimal or binary)
81267544 938 int multiple = 0;
a398513a 939 if (base == UnitBase1000)
81267544
GMF
940 {
941 multiple = 1000;
942 }
943 else
944 {
945 multiple = 1024;
946 }
947
948 long double size = input;
6ab3bc95 949
d1ea9075
GMF
950 // check the size of the input number to fit in the appropriate symbol
951 int sizecount = 0;
81267544 952 while (size > multiple)
6ab3bc95 953 {
81267544
GMF
954 size = size / multiple;
955 sizecount++;
83809f5e
GMF
956
957 // rollback to the previous values and stop the loop when cannot
958 // represent the number length.
959 if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS)
960 {
961 size = size * multiple;
962 sizecount--;
963 break;
964 }
6ab3bc95
RP
965 }
966
a398513a
GMF
967 // round the input number "half up" to multiples of 10
968 const int rounding_multiplier = 10;
72a94426 969 size = rounding_upwards(size, rounding_multiplier);
6ab3bc95 970
d1ea9075 971 // format the input number, placing the appropriate symbol
6ab3bc95 972 ostringstream out;
6ab3bc95 973 out.setf (ios::fixed);
a398513a 974 if (format == ShortUnitFormat)
d1ea9075
GMF
975 {
976 out.precision(1);
68d37a5c 977 out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() );
d1ea9075
GMF
978 }
979 else
6ab3bc95 980 {
d1ea9075 981 out.precision (2);
68d37a5c 982 out << size << i18n( longUnitFormatSymbols[sizecount].c_str() );
6ab3bc95
RP
983 }
984
985 return out.str();
986} // eo nice_unit_format(int input)
987
e93545dd 988
47c07fba
GE
989string escape(const string &s)
990{
6ab3bc95
RP
991 string out(s);
992 string::size_type p;
47c07fba 993
6ab3bc95
RP
994 p=0;
995 while ( (p=out.find_first_of("\"\\",p) ) !=out.npos)
996 {
997 out.insert (p,"\\");
998 p+=2;
999 }
47c07fba 1000
6ab3bc95
RP
1001 p=0;
1002 while ( (p=out.find_first_of("\r",p) ) !=out.npos)
1003 {
1004 out.replace (p,1,"\\r");
1005 p+=2;
1006 }
47c07fba 1007
6ab3bc95
RP
1008 p=0;
1009 while ( (p=out.find_first_of("\n",p) ) !=out.npos)
1010 {
1011 out.replace (p,1,"\\n");
1012 p+=2;
1013 }
47c07fba 1014
6ab3bc95 1015 out='"'+out+'"';
47c07fba 1016
6ab3bc95
RP
1017 return out;
1018} // eo scape(const std::string&)
47c07fba 1019
47c07fba 1020
6ab3bc95
RP
1021string descape(const string &s, int startpos, int &endpos)
1022{
1023 string out;
1024
1025 if (s.at(startpos) != '"')
1026 throw out_of_range("value not type escaped string");
1027
1028 out=s.substr(startpos+1);
1029 string::size_type p=0;
1030
1031 // search for the end of the string
1032 while ( (p=out.find("\"",p) ) !=out.npos)
1033 {
1034 int e=p-1;
1035 bool escaped=false;
1036
1037 // the " might be escaped with a backslash
1038 while (e>=0 && out.at (e) =='\\')
1039 {
1040 if (escaped == false)
1041 escaped=true;
1042 else
1043 escaped=false;
1044
1045 e--;
1046 }
1047
1048 if (escaped==false)
1049 break;
1050 else
1051 p++;
1052 }
1053
1054 // we now have the end of the string
1055 out=out.substr(0,p);
1056
1057 // tell calling prog about the endposition
1058 endpos=startpos+p+1;
1059
1060 // descape all \ stuff inside the string now
1061 p=0;
1062 while ( (p=out.find_first_of("\\",p) ) !=out.npos)
1063 {
1064 switch (out.at(p+1) )
1065 {
1066 case 'r':
47c07fba
GE
1067 out.replace(p,2,"\r");
1068 break;
6ab3bc95 1069 case 'n':
47c07fba
GE
1070 out.replace(p,2,"\n");
1071 break;
6ab3bc95 1072 default:
47c07fba 1073 out.erase(p,1);
6ab3bc95
RP
1074 }
1075 p++;
1076 }
1077
1078 return out;
1079} // eo descape(const std::string&,int,int&)
47c07fba 1080
e93545dd 1081
47c07fba
GE
1082string escape_shellarg(const string &input)
1083{
6ab3bc95
RP
1084 string output = "'";
1085 string::const_iterator it, it_end = input.end();
1086 for (it = input.begin(); it != it_end; it++)
1087 {
1088 if ( (*it) == '\'')
1089 output += "'\\'";
1090
1091 output += *it;
1092 }
1093
1094 output += "'";
1095 return output;
47c07fba 1096}