Added function format_date which is format_full_time without time
[libi2ncommon] / src / stringfunc.cpp
CommitLineData
0e23f538
TJ
1/*
2The software in this package is distributed under the GNU General
3Public License version 2 (with a special exception described below).
4
5A copy of GNU General Public License (GPL) is included in this distribution,
6in the file COPYING.GPL.
7
8As a special exception, if other files instantiate templates or use macros
9or inline functions from this file, or you compile this file and link it
10with other works to produce a work based on this file, this file
11does not by itself cause the resulting work to be covered
12by the GNU General Public License.
13
14However the source code for this file must still be made available
15in accordance with section (3) of the GNU General Public License.
16
17This exception does not invalidate any other reasons why a work based
18on this file might be covered by the GNU General Public License.
19*/
6a93d84a
TJ
20/** @file
21 *
22 * (c) Copyright 2007-2008 by Intra2net AG
6a93d84a 23 */
e93545dd
GE
24
25#include <iostream>
26#include <string>
27#include <sstream>
28#include <stdexcept>
5efd35b1 29#include <algorithm>
5cd64148 30#include <cmath> // for round()
e93545dd 31
a5f3af6e 32#include <wchar.h>
e93545dd
GE
33#include <stdlib.h>
34#include <iconv.h>
35#include <i18n.h>
36
5cd64148
CH
37#include <boost/numeric/conversion/cast.hpp>
38
e93545dd
GE
39#include <stringfunc.hxx>
40
41using namespace std;
42
6ab3bc95
RP
43namespace I2n
44{
6a93d84a
TJ
45
46
6ab3bc95
RP
47namespace
48{
6a93d84a
TJ
49
50const std::string hexDigitsLower("0123456789abcdef");
51const std::string hexDigitsUpper("0123456789ABCDEF");
52
53
54struct UpperFunc
55{
6ab3bc95
RP
56 char operator() (char c)
57 {
58 return std::toupper(c);
59 }
6a93d84a
TJ
60}; // eo struct UpperFunc
61
62
63struct LowerFunc
64{
6ab3bc95
RP
65 char operator() (char c)
66 {
67 return std::tolower(c);
68 }
6a93d84a
TJ
69}; // eo struct LowerFunc
70
71
72} // eo namespace <anonymous>
73
74
75
76/**
6ab3bc95 77 * default list of Whitespaces (" \t\r\n");
6a93d84a 78 */
6ab3bc95 79const std::string Whitespaces = " \t\r\n";
6a93d84a
TJ
80
81/**
82 * default list of lineendings ("\r\n");
83 */
6ab3bc95 84const std::string LineEndings= "\r\n";
6a93d84a
TJ
85
86
87
88/**
89 * @brief checks if a string begins with a given prefix.
90 * @param[in,out] str the string which is tested
91 * @param prefix the prefix which should be tested for.
92 * @return @a true iff the prefix is not empty and the string begins with that prefix.
93 */
6ab3bc95 94bool has_prefix(const std::string& str, const std::string& prefix)
6a93d84a 95{
6ab3bc95
RP
96 if (prefix.empty() || str.empty() || str.size() < prefix.size() )
97 {
98 return false;
99 }
100 return str.compare(0, prefix.size(), prefix) == 0;
101} // eo has_prefix(const std::string&,const std::string&)
6a93d84a
TJ
102
103
104/**
105 * @brief checks if a string ends with a given suffix.
106 * @param[in,out] str the string which is tested
107 * @param suffix the suffix which should be tested for.
108 * @return @a true iff the suffix is not empty and the string ends with that suffix.
109 */
6ab3bc95 110bool has_suffix(const std::string& str, const std::string& suffix)
6a93d84a 111{
6ab3bc95
RP
112 if (suffix.empty() || str.empty() || str.size() < suffix.size() )
113 {
114 return false;
115 }
116 return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
117} // eo has_suffix(const std::string&,const std::string&)
6a93d84a
TJ
118
119
120/**
121 * cut off characters from a given list from front and end of a string.
122 * @param[in,out] str the string which should be trimmed.
123 * @param charlist the list of characters to remove from beginning and end of string
124 * @return the result string.
125 */
6ab3bc95
RP
126std::string trim_mod(std::string& str, const std::string& charlist)
127{
128 // first: trim the beginning:
129 std::string::size_type pos= str.find_first_not_of (charlist);
130 if (pos == std::string::npos)
131 {
132 // whole string consists of charlist (or is already empty)
133 str.clear();
134 return str;
135 }
136 else if (pos>0)
137 {
138 // str starts with charlist
139 str.erase(0,pos);
140 }
141 // now let's look at the tail:
142 pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char!
143 if ( pos < str.size() )
144 {
145 str.erase(pos, str.size()-pos);
146 }
147 return str;
148} // eo trim_mod(std::string&,const std::string&)
6a93d84a
TJ
149
150
151
152/**
153 * removes last character from a string when it is in a list of chars to be removed.
154 * @param[in,out] str the string.
155 * @param what the list of chars which will be tested for.
156 * @return the resulting string with last char removed (if applicable)
157 */
6ab3bc95 158std::string chomp_mod(std::string& str, const std::string& what)
6a93d84a 159{
6ab3bc95
RP
160 if (str.empty() || what.empty() )
161 {
162 return str;
163 }
164 if (what.find(str.at (str.size()-1) ) != std::string::npos)
165 {
166 str.erase(str.size() - 1);
167 }
168 return str;
169} // eo chomp_mod(std::string&,const std::string&)
6a93d84a
TJ
170
171
172/**
173 * @brief converts a string to lower case.
174 * @param[in,out] str the string to modify.
175 * @return the string
176 */
6ab3bc95 177std::string to_lower_mod(std::string& str)
6a93d84a 178{
6ab3bc95
RP
179 std::transform(str.begin(), str.end(), str.begin(), LowerFunc() );
180 return str;
181} // eo to_lower_mod(std::string&)
6a93d84a
TJ
182
183
184/**
185 * @brief converts a string to upper case.
186 * @param[in,out] str the string to modify.
187 * @return the string
188 */
6ab3bc95 189std::string to_upper_mod(std::string& str)
6a93d84a 190{
6ab3bc95
RP
191 std::transform( str.begin(), str.end(), str.begin(), UpperFunc() );
192 return str;
193} // eo to_upper_mod(std::string&)
6a93d84a
TJ
194
195
196
197/**
198 * cut off characters from a given list from front and end of a string.
199 * @param str the string which should be trimmed.
200 * @param charlist the list of characters to remove from beginning and end of string
201 * @return the result string.
202 */
6ab3bc95
RP
203std::string trim (const std::string& str, const std::string& charlist)
204{
205 // first: trim the beginning:
206 std::string::size_type pos0= str.find_first_not_of(charlist);
207 if (pos0 == std::string::npos)
208 {
209 // whole string consists of charlist (or is already empty)
210 return std::string();
211 }
212 // now let's look at the end:
213 std::string::size_type pos1= str.find_last_not_of(charlist);
214 return str.substr(pos0, pos1 - pos0 + 1);
6a93d84a
TJ
215} // eo trim(const std:.string&,const std::string&)
216
217
218/**
219 * removes last character from a string when it is in a list of chars to be removed.
220 * @param str the string.
221 * @param what the list of chars which will be tested for.
222 * @return the resulting string with last char removed (if applicable)
223 */
6ab3bc95
RP
224std::string chomp (const std::string& str, const std::string& what)
225{
226 if (str.empty() || what.empty() )
227 {
228 return str;
229 }
230 if (what.find(str.at (str.size()-1) ) != std::string::npos)
231 {
232 return str.substr(0, str.size()-1);
233 }
234 return str;
6a93d84a
TJ
235} // eo chomp(const std:.string&,const std::string&)
236
237
238/**
239 * @brief returns a lower case version of a given string.
240 * @param str the string
241 * @return the lower case version of the string
242 */
6ab3bc95 243std::string to_lower (const std::string& str)
6a93d84a 244{
6ab3bc95
RP
245 std::string result(str);
246 return to_lower_mod(result);
247} // eo to_lower(const std::string&)
6a93d84a
TJ
248
249
250/**
251 * @brief returns a upper case version of a given string.
252 * @param str the string
253 * @return the upper case version of the string
254 */
6ab3bc95 255std::string to_upper(const std::string& str)
6a93d84a 256{
6ab3bc95
RP
257 std::string result(str);
258 return to_upper_mod(result);
259} // eo to_upper(const std::string&)
6a93d84a
TJ
260
261
262
263/**
264 * @brief removes a given suffix from a string.
265 * @param str the string.
266 * @param suffix the suffix which should be removed if the string ends with it.
267 * @return the string without the suffix.
268 *
269 * If the string ends with the suffix, it is removed. If the the string doesn't end
270 * with the suffix the original string is returned.
271 */
6ab3bc95 272std::string remove_suffix(const std::string& str, const std::string& suffix)
6a93d84a 273{
6ab3bc95
RP
274 if (has_suffix(str,suffix) )
275 {
276 return str.substr(0, str.size()-suffix.size() );
277 }
278 return str;
279} // eo remove_suffix(const std::string&,const std::string&)
6a93d84a
TJ
280
281
282
283/**
284 * @brief removes a given prefix from a string.
285 * @param str the string.
286 * @param prefix the prefix which should be removed if the string begins with it.
287 * @return the string without the prefix.
288 *
289 * If the string begins with the prefix, it is removed. If the the string doesn't begin
290 * with the prefix the original string is returned.
291 */
6ab3bc95 292std::string remove_prefix(const std::string& str, const std::string& prefix)
6a93d84a 293{
6ab3bc95
RP
294 if (has_prefix(str,prefix) )
295 {
296 return str.substr( prefix.size() );
297 }
298 return str;
299} // eo remove_prefix(const std::string&,const std::string&)
6a93d84a
TJ
300
301
302/**
303 * split a string to key and value delimited by a given delimiter.
6ab3bc95 304 * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end).
6a93d84a
TJ
305 * @param str the string which should be splitted.
306 * @param[out] key the resulting key
307 * @param[out] value the resulting value
308 * @param delimiter the delimiter between key and value; default is '='.
309 * @return @a true if the split was successful.
310 */
6ab3bc95
RP
311bool pair_split(
312 const std::string& str,
313 std::string& key,
314 std::string& value,
315 char delimiter)
316{
317 std::string::size_type pos = str.find (delimiter);
318 if (pos == std::string::npos) return false;
319 key= str.substr(0,pos);
320 value= str.substr(pos+1);
321 trim_mod(key);
322 trim_mod(value);
323 return true;
324} // eo pair_split(const std::string&,std::string&,std::string&,char)
6a93d84a
TJ
325
326
327/**
328 * splits a string by given delimiter
329 *
330 * @param[in] str the string which should be splitted.
331 * @param[out] result the list resulting from splitting @a str.
332 * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted.
333 * @param[in] omit_empty should empty parts not be stored?
334 * @param[in] trim_list list of characters the parts should be trimmed by.
335 * (empty string results in no trim)
336 */
6ab3bc95
RP
337void split_string(
338 const std::string& str,
339 std::list<std::string>& result,
340 const std::string& delimiter,
341 bool omit_empty,
342 const std::string& trim_list
6a93d84a
TJ
343)
344{
6ab3bc95
RP
345 std::string::size_type pos, last_pos=0;
346 bool delimiter_found= false;
347 while ( last_pos < str.size() && last_pos != std::string::npos)
348 {
349 pos= str.find(delimiter, last_pos);
350 std::string part;
351 if (pos == std::string::npos)
352 {
353 part= str.substr(last_pos);
354 delimiter_found= false;
355 }
356 else
357 {
358 part= str.substr(last_pos, pos-last_pos);
359 delimiter_found=true;
360 }
361 if (pos != std::string::npos)
362 {
363 last_pos= pos+ delimiter.size();
364 }
365 else
366 {
367 last_pos= std::string::npos;
368 }
369 if (!trim_list.empty() ) trim_mod (part, trim_list);
370 if (omit_empty && part.empty() ) continue;
371 result.push_back( part );
372 }
373 // if the string ends with a delimiter we need to append an empty string if no omit_empty
374 // was given.
375 // (this way we keep the split result consistent to a join operation)
376 if (delimiter_found && !omit_empty)
377 {
378 result.push_back("");
379 }
380} // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&)
6a93d84a
TJ
381
382
383/**
384 * splits a string by a given delimiter
385 * @param str the string which should be splitted.
386 * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted.
387 * @param[in] omit_empty should empty parts not be stored?
388 * @param[in] trim_list list of characters the parts should be trimmed by.
389 * (empty string results in no trim)
390 * @return the list resulting from splitting @a str.
391 */
6ab3bc95
RP
392std::list<std::string> split_string(
393 const std::string& str,
394 const std::string& delimiter,
395 bool omit_empty,
396 const std::string& trim_list
6a93d84a
TJ
397)
398{
6ab3bc95
RP
399 std::list<std::string> result;
400 split_string(str, result, delimiter, omit_empty, trim_list);
401 return result;
402} // eo split_string(const std::string&,const std::string&,bool,const std::string&)
6a93d84a
TJ
403
404
405/**
406 * @brief joins a list of strings into a single string.
407 *
6ab3bc95
RP
408 * This funtion is (basically) the reverse operation of @a split_string.
409 *
6a93d84a
TJ
410 * @param parts the list of strings.
411 * @param delimiter the delimiter which is inserted between the strings.
412 * @return the joined string.
413 */
6ab3bc95
RP
414std::string join_string(
415 const std::list< std::string >& parts,
416 const std::string& delimiter
6a93d84a
TJ
417)
418{
6ab3bc95
RP
419 std::string result;
420 if (! parts.empty() )
421 {
422 std::list< std::string >::const_iterator it= parts.begin();
423 result = *it;
424 while ( ++it != parts.end() )
425 {
426 result+= delimiter;
427 result+= *it;
428 }
429 }
430 return result;
431} // eo join_string(const std::list< std::string >&,const std::string&)
6a93d84a
TJ
432
433
376ec4fa
CH
434/** @brief same as join_string for list, except uses a vector */
435std::string join_string(
436 const std::vector< std::string >& parts,
437 const std::string& delimiter
438)
439{
440 std::string result;
441 if (! parts.empty() )
442 {
443 std::vector< std::string >::const_iterator it= parts.begin();
444 result = *it;
445 while ( ++it != parts.end() )
446 {
447 result+= delimiter;
448 result+= *it;
449 }
450 }
451 return result;
452} // eo join_string(const std::vector< std::string >&,const std::string&)
453
454
6a93d84a
TJ
455
456/*
457** conversions
458*/
459
460
461/**
462 * @brief returns a hex string from a binary string.
463 * @param str the (binary) string
464 * @param upper_case_digits determine whether to use upper case characters for digits A-F.
465 * @return the string in hex notation.
466 */
6ab3bc95
RP
467std::string convert_binary_to_hex(
468 const std::string& str,
469 bool upper_case_digits
6a93d84a
TJ
470)
471{
6ab3bc95
RP
472 std::string result;
473 std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower);
474 for ( std::string::const_iterator it= str.begin();
475 it != str.end();
476 ++it)
477 {
478 result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] );
479 result.push_back( hexDigits[ (*it) & 0x0f ] );
480 }
481 return result;
482} // eo convert_binary_to_hex(const std::string&,bool)
6a93d84a
TJ
483
484
485/**
486 * @brief converts a hex digit string to binary string.
487 * @param str hex digit string
488 * @return the binary string.
489 *
490 * The hex digit string may contains white spaces or colons which are treated
491 * as delimiters between hex digit groups.
492 *
493 * @todo rework the handling of half nibbles (consistency)!
494 */
6ab3bc95
RP
495std::string convert_hex_to_binary(
496 const std::string& str
6a93d84a 497)
6ab3bc95
RP
498throw (std::runtime_error)
499{
500 std::string result;
501 char c= 0;
502 bool hasNibble= false;
503 bool lastWasWS= true;
504 for ( std::string::const_iterator it= str.begin();
505 it != str.end();
506 ++it)
507 {
508 std::string::size_type p = hexDigitsLower.find( *it );
509 if (p== std::string::npos)
510 {
511 p= hexDigitsUpper.find( *it );
512 }
513 if (p == std::string::npos)
514 {
515 if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace?
6a93d84a 516 or ( *it == ':') // or a colon?
6ab3bc95
RP
517 )
518 {
519 // we treat that as a valid delimiter:
520 if (hasNibble)
6a93d84a 521 {
6ab3bc95
RP
522 // 1 nibble before WS is treate as lower part:
523 result.push_back(c);
524 // reset state:
525 hasNibble= false;
6a93d84a 526 }
6ab3bc95
RP
527 lastWasWS= true;
528 continue;
529 }
530 }
531 if (p == std::string::npos )
532 {
533 throw runtime_error("illegal character in hex digit string: " + str);
534 }
535 lastWasWS= false;
536 if (hasNibble)
537 {
538 c<<=4;
539 }
540 else
541 {
542 c=0;
543 }
544 c+= (p & 0x0f);
545 if (hasNibble)
546 {
547 //we already had a nibble, so a char is complete now:
548 result.push_back( c );
549 hasNibble=false;
550 }
551 else
552 {
553 // this is the first nibble of a new char:
554 hasNibble=true;
555 }
556 }
557 if (hasNibble)
558 {
559 //well, there is one nibble left
560 // let's do some heuristics:
561 if (lastWasWS)
562 {
563 // if the preceeding character was a white space (or a colon)
564 // we treat the nibble as lower part:
565 //( this is consistent with shortened hex notations where leading zeros are not noted)
566 result.push_back( c );
567 }
568 else
569 {
570 // if it was part of a hex digit chain, we treat it as UPPER part (!!)
571 result.push_back( c << 4 );
572 }
573 }
574 return result;
575} // eo convert_hex_to_binary(const std::string&)
576
577
578} // eo namespace I2n
579
580
581
6a93d84a 582
e93545dd
GE
583std::string iso_to_utf8(const std::string& isostring)
584{
6ab3bc95 585 string result;
118e216e 586
6ab3bc95 587 iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1");
118e216e 588
6ab3bc95
RP
589 if (iso_to_utf8 == (iconv_t)-1)
590 throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
118e216e 591
6ab3bc95
RP
592 size_t in_size=isostring.size();
593 size_t out_size=in_size*4;
118e216e 594
6ab3bc95
RP
595 char *buf = (char *)malloc(out_size+1);
596 if (buf == NULL)
597 throw runtime_error("out of memory for iconv buffer");
e93545dd 598
5a4ecb51 599 char *in = (char *)isostring.c_str();
6ab3bc95
RP
600 char *out = buf;
601 iconv(i2utf8, &in, &in_size, &out, &out_size);
118e216e 602
6ab3bc95 603 buf[isostring.size()*4-out_size]=0;
118e216e 604
6ab3bc95 605 result=buf;
118e216e 606
6ab3bc95
RP
607 free(buf);
608 iconv_close(i2utf8);
118e216e 609
6ab3bc95 610 return result;
e93545dd
GE
611}
612
613std::string utf8_to_iso(const std::string& utf8string)
614{
6ab3bc95 615 string result;
118e216e 616
6ab3bc95 617 iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8");
118e216e 618
6ab3bc95
RP
619 if (utf82iso == (iconv_t)-1)
620 throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
118e216e 621
6ab3bc95
RP
622 size_t in_size=utf8string.size();
623 size_t out_size=in_size;
118e216e 624
6ab3bc95
RP
625 char *buf = (char *)malloc(out_size+1);
626 if (buf == NULL)
627 throw runtime_error("out of memory for iconv buffer");
e93545dd 628
5a4ecb51 629 char *in = (char *)utf8string.c_str();
6ab3bc95
RP
630 char *out = buf;
631 iconv(utf82iso, &in, &in_size, &out, &out_size);
118e216e 632
6ab3bc95 633 buf[utf8string.size()-out_size]=0;
118e216e 634
6ab3bc95 635 result=buf;
118e216e 636
6ab3bc95
RP
637 free(buf);
638 iconv_close(utf82iso);
e93545dd 639
6ab3bc95 640 return result;
e93545dd
GE
641}
642
a5f3af6e
GE
643wchar_t* utf8_to_wbuf(const std::string& utf8string)
644{
6ab3bc95 645 iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8");
a5f3af6e 646
6ab3bc95
RP
647 if (utf82wstr == (iconv_t)-1)
648 throw runtime_error("iconv can't convert from UTF-8 to UCS-4");
a5f3af6e 649
6ab3bc95
RP
650 size_t in_size=utf8string.size();
651 size_t out_size= (in_size+1)*sizeof(wchar_t);
a5f3af6e 652
6ab3bc95
RP
653 wchar_t *buf = (wchar_t *)malloc(out_size);
654 if (buf == NULL)
655 throw runtime_error("out of memory for iconv buffer");
a5f3af6e 656
5a4ecb51 657 char *in = (char *)utf8string.c_str();
6ab3bc95 658 char *out = (char*) buf;
dbd6d77c 659 if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1)
6ab3bc95 660 throw runtime_error("error converting char encodings");
a5f3af6e 661
6ab3bc95 662 buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0;
a5f3af6e 663
6ab3bc95 664 iconv_close(utf82wstr);
a5f3af6e 665
6ab3bc95 666 return buf;
a5f3af6e
GE
667}
668
13cc4db1 669std::string utf7imap_to_utf8(const std::string& utf7imapstring)
d116a071 670{
6ab3bc95 671 string result;
118e216e 672
6ab3bc95 673 iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP");
118e216e 674
6ab3bc95
RP
675 if (utf7imap2utf8 == (iconv_t)-1)
676 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
118e216e 677
6ab3bc95
RP
678 size_t in_size=utf7imapstring.size();
679 size_t out_size=in_size*4;
118e216e 680
6ab3bc95
RP
681 char *buf = (char *)malloc(out_size+1);
682 if (buf == NULL)
683 throw runtime_error("out of memory for iconv buffer");
d116a071 684
5a4ecb51 685 char *in = (char *)utf7imapstring.c_str();
6ab3bc95
RP
686 char *out = buf;
687 iconv(utf7imap2utf8, &in, &in_size, &out, &out_size);
118e216e 688
6ab3bc95 689 buf[utf7imapstring.size()*4-out_size]=0;
118e216e 690
6ab3bc95 691 result=buf;
118e216e 692
6ab3bc95
RP
693 free(buf);
694 iconv_close(utf7imap2utf8);
118e216e 695
6ab3bc95 696 return result;
118e216e
TJ
697}
698
6a2b6dd1
TJ
699std::string utf8_to_utf7imap(const std::string& utf8string)
700{
6ab3bc95 701 string result;
6a2b6dd1 702
6ab3bc95 703 iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8");
6a2b6dd1 704
6ab3bc95
RP
705 if (utf82utf7imap == (iconv_t)-1)
706 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
6a2b6dd1 707
6ab3bc95
RP
708 // UTF-7 is base64 encoded, a buffer 10x as large
709 // as the utf-8 buffer should be enough. If not the string will be truncated.
710 size_t in_size=utf8string.size();
711 size_t out_size=in_size*10;
6a2b6dd1 712
6ab3bc95
RP
713 char *buf = (char *)malloc(out_size+1);
714 if (buf == NULL)
715 throw runtime_error("out of memory for iconv buffer");
6a2b6dd1 716
5a4ecb51 717 char *in = (char *)utf8string.c_str();
6ab3bc95
RP
718 char *out = buf;
719 iconv(utf82utf7imap, &in, &in_size, &out, &out_size);
6a2b6dd1 720
6ab3bc95 721 buf[utf8string.size()*10-out_size]= 0;
6a2b6dd1 722
6ab3bc95 723 result=buf;
6a2b6dd1 724
6ab3bc95
RP
725 free(buf);
726 iconv_close(utf82utf7imap);
6a2b6dd1 727
6ab3bc95 728 return result;
6a2b6dd1
TJ
729}
730
118e216e
TJ
731// Tokenize string by (html) tags
732void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
733{
6ab3bc95
RP
734 string::size_type pos, len = input.size();
735 bool inside_tag = false;
736 string current;
737
738 for (pos = 0; pos < len; pos++)
739 {
740 if (input[pos] == '<')
741 {
742 inside_tag = true;
743
744 if (!current.empty() )
745 {
746 tokenized.push_back( make_pair(current, false) );
747 current = "";
748 }
749
750 current += input[pos];
751 }
752 else if (input[pos] == '>' && inside_tag)
753 {
754 current += input[pos];
755 inside_tag = false;
756 if (!current.empty() )
757 {
758 tokenized.push_back( make_pair(current, true) );
759 current = "";
760 }
761 }
762 else
763 current += input[pos];
764 }
765
766 // String left over in buffer?
767 if (!current.empty() )
768 tokenized.push_back( make_pair(current, false) );
769} // eo tokenize_by_tag
118e216e 770
118e216e
TJ
771
772std::string strip_html_tags(const std::string &input)
773{
6ab3bc95
RP
774 // Pair first: string, second: isTag
775 vector<pair<string,bool> > tokenized;
776 tokenize_by_tag (tokenized, input);
118e216e 777
6ab3bc95
RP
778 string output;
779 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
83d700e9 780 for (token = tokenized.begin(); token != tokens_end; ++token)
6ab3bc95
RP
781 if (!token->second)
782 output += token->first;
783
784 return output;
785} // eo strip_html_tags
118e216e 786
118e216e
TJ
787
788// Smart-encode HTML en
789string smart_html_entities(const std::string &input)
790{
6ab3bc95
RP
791 // Pair first: string, second: isTag
792 vector<pair<string,bool> > tokenized;
793 tokenize_by_tag (tokenized, input);
794
795 string output;
796 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
83d700e9 797 for (token = tokenized.begin(); token != tokens_end; ++token)
6ab3bc95
RP
798 {
799 // keep HTML tags as they are
800 if (token->second)
801 output += token->first;
802 else
803 output += html_entities(token->first);
804 }
805
806 return output;
118e216e
TJ
807}
808
6ab3bc95 809
a5f3af6e
GE
810string::size_type find_8bit(const std::string &str)
811{
6ab3bc95
RP
812 string::size_type l=str.size();
813 for (string::size_type p=0; p < l; p++)
814 if (static_cast<unsigned char>(str[p]) > 127)
815 return p;
a5f3af6e 816
6ab3bc95 817 return string::npos;
a5f3af6e
GE
818}
819
118e216e
TJ
820// encoded UTF-8 chars into HTML entities
821string html_entities(std::string str)
822{
6ab3bc95
RP
823 // Normal chars
824 replace_all (str, "&", "&amp;");
6ab3bc95
RP
825 replace_all (str, "<", "&lt;");
826 replace_all (str, ">", "&gt;");
980577e1
TJ
827 replace_all (str, "\"", "&quot;");
828 replace_all (str, "'", "&#x27;");
829 replace_all (str, "/", "&#x2F;");
6ab3bc95
RP
830
831 // Umlauts
832 replace_all (str, "\xC3\xA4", "&auml;");
833 replace_all (str, "\xC3\xB6", "&ouml;");
834 replace_all (str, "\xC3\xBC", "&uuml;");
835 replace_all (str, "\xC3\x84", "&Auml;");
836 replace_all (str, "\xC3\x96", "&Ouml;");
837 replace_all (str, "\xC3\x9C", "&Uuml;");
838
839 // Misc
840 replace_all (str, "\xC3\x9F", "&szlig;");
841
842 // conversion of remaining non-ASCII chars needed?
843 // just do if needed because of performance
844 if (find_8bit(str) != string::npos)
845 {
846 // convert to fixed-size encoding UTF-32
847 wchar_t* wbuf=utf8_to_wbuf(str);
848 ostringstream target;
849
850 // replace all non-ASCII chars with HTML representation
851 for (int p=0; wbuf[p] != 0; p++)
852 {
853 unsigned int c=wbuf[p];
854
855 if (c <= 127)
856 target << static_cast<unsigned char>(c);
857 else
858 target << "&#" << c << ';';
859 }
860
861 free(wbuf);
862
863 str=target.str();
864 }
865
866 return str;
867} // eo html_entities(std::string)
868
554f813d
GE
869// convert HTML entities to something that can be viewed on a basic text console (restricted to ASCII-7)
870string html_entities_to_console(std::string str)
871{
872 // Normal chars
873 replace_all (str, "&amp;", "&");
874 replace_all (str, "&lt;", "<");
875 replace_all (str, "&gt;", ">");
876 replace_all (str, "&quot;", "\"");
877 replace_all (str, "&#x27;", "'");
878 replace_all (str, "&#x2F;", "/");
879
880 // Umlauts
881 replace_all (str, "&auml;", "ae");
882 replace_all (str, "&ouml;", "oe");
883 replace_all (str, "&uuml;", "ue");
884 replace_all (str, "&Auml;", "Ae");
885 replace_all (str, "&Ouml;", "Oe");
886 replace_all (str, "&Uuml;", "Ue");
887
888 // Misc
889 replace_all (str, "&szlig;", "ss");
890
891 return str;
892}
118e216e 893
e93545dd
GE
894bool replace_all(string &base, const char *ist, const char *soll)
895{
6ab3bc95
RP
896 string i=ist;
897 string s=soll;
898 return replace_all(base,&i,&s);
e93545dd
GE
899}
900
901bool replace_all(string &base, const string &ist, const char *soll)
902{
6ab3bc95
RP
903 string s=soll;
904 return replace_all(base,&ist,&s);
e93545dd
GE
905}
906
907bool replace_all(string &base, const string *ist, const string *soll)
908{
6ab3bc95 909 return replace_all(base,*ist,*soll);
e93545dd
GE
910}
911
912bool replace_all(string &base, const char *ist, const string *soll)
913{
6ab3bc95
RP
914 string i=ist;
915 return replace_all(base,&i,soll);
e93545dd
GE
916}
917
918bool replace_all(string &base, const string &ist, const string &soll)
919{
6ab3bc95
RP
920 bool found_ist = false;
921 string::size_type a=0;
922
923 if (ist.empty() )
924 throw runtime_error ("replace_all called with empty search string");
e93545dd 925
6ab3bc95
RP
926 while ( (a=base.find(ist,a) ) != string::npos)
927 {
928 base.replace(a,ist.size(),soll);
929 a=a+soll.size();
930 found_ist = true;
931 }
1ec2064e 932
6ab3bc95 933 return found_ist;
e93545dd
GE
934}
935
b953bf36
GE
936/**
937 * @brief replaces all characters that could be problematic or impose a security risk when being logged
938 * @param str the original string
939 * @param replace_with the character to replace the unsafe chars with
940 * @return a string that is safe to send to syslog or other logfiles
941 *
942 * All chars between 0x20 (space) and 0x7E (~) (including) are considered safe for logging.
943 * See e.g. RFC 5424, section 8.2 or the posix character class "printable".
944 * This eliminates all possible problems with NUL, control characters, 8 bit chars, UTF8.
945 *
946 */
947std::string sanitize_for_logging(const std::string &str, const char replace_with)
948{
949 std::string output=str;
950
c0e32d64
GE
951 const string::size_type len = output.size();
952 for (std::string::size_type p=0; p < len; p++)
b953bf36
GE
953 if (output[p] < 0x20 || output[p] > 0x7E)
954 output[p]=replace_with;
955
956 return output;
957}
958
e5b21dbb 959#if 0
e93545dd
GE
960string to_lower(const string &src)
961{
6ab3bc95 962 string dst = src;
e93545dd 963
6ab3bc95
RP
964 string::size_type pos, end = dst.size();
965 for (pos = 0; pos < end; pos++)
966 dst[pos] = tolower(dst[pos]);
e93545dd 967
6ab3bc95 968 return dst;
e93545dd
GE
969}
970
971string to_upper(const string &src)
972{
6ab3bc95 973 string dst = src;
e93545dd 974
6ab3bc95
RP
975 string::size_type pos, end = dst.size();
976 for (pos = 0; pos < end; pos++)
977 dst[pos] = toupper(dst[pos]);
e93545dd 978
6ab3bc95 979 return dst;
e93545dd 980}
e5b21dbb 981#endif
e93545dd 982
83809f5e 983const int MAX_UNIT_FORMAT_SYMBOLS = 6;
d1ea9075 984
2cb9a9c5 985const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
d1ea9075
GMF
986 " B",
987 " KB",
988 " MB",
989 " GB",
990 " TB",
83809f5e 991 " PB"
d1ea9075
GMF
992};
993
2cb9a9c5 994const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
5cb766b9
GMF
995 i18n_noop(" Bytes"),
996 i18n_noop(" KBytes"),
997 i18n_noop(" MBytes"),
998 i18n_noop(" GBytes"),
999 i18n_noop(" TBytes"),
83809f5e 1000 i18n_noop(" PBytes")
d1ea9075
GMF
1001};
1002
72a94426
GMF
1003
1004long double rounding_upwards(
e91c1952 1005 const long double number,
72a94426
GMF
1006 const int rounding_multiplier
1007)
1008{
1009 long double rounded_number;
1010 rounded_number = number * rounding_multiplier;
1011 rounded_number += 0.5;
1012 rounded_number = (int64_t) (rounded_number);
1013 rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier);
1014
1015 return rounded_number;
1016}
1017
1018
81267544
GMF
1019string nice_unit_format(
1020 const int64_t input,
70fc0674
GMF
1021 const UnitFormat format,
1022 const UnitBase base
81267544 1023)
6ab3bc95 1024{
d1ea9075 1025 // select the system of units (decimal or binary)
81267544 1026 int multiple = 0;
a398513a 1027 if (base == UnitBase1000)
81267544
GMF
1028 {
1029 multiple = 1000;
1030 }
1031 else
1032 {
1033 multiple = 1024;
1034 }
1035
1036 long double size = input;
6ab3bc95 1037
d1ea9075
GMF
1038 // check the size of the input number to fit in the appropriate symbol
1039 int sizecount = 0;
81267544 1040 while (size > multiple)
6ab3bc95 1041 {
81267544
GMF
1042 size = size / multiple;
1043 sizecount++;
83809f5e
GMF
1044
1045 // rollback to the previous values and stop the loop when cannot
1046 // represent the number length.
1047 if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS)
1048 {
1049 size = size * multiple;
1050 sizecount--;
1051 break;
1052 }
6ab3bc95
RP
1053 }
1054
a398513a
GMF
1055 // round the input number "half up" to multiples of 10
1056 const int rounding_multiplier = 10;
72a94426 1057 size = rounding_upwards(size, rounding_multiplier);
6ab3bc95 1058
d1ea9075 1059 // format the input number, placing the appropriate symbol
6ab3bc95 1060 ostringstream out;
6ab3bc95 1061 out.setf (ios::fixed);
a398513a 1062 if (format == ShortUnitFormat)
d1ea9075
GMF
1063 {
1064 out.precision(1);
68d37a5c 1065 out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() );
d1ea9075
GMF
1066 }
1067 else
6ab3bc95 1068 {
d1ea9075 1069 out.precision (2);
68d37a5c 1070 out << size << i18n( longUnitFormatSymbols[sizecount].c_str() );
6ab3bc95
RP
1071 }
1072
1073 return out.str();
1074} // eo nice_unit_format(int input)
1075
e93545dd 1076
5cd64148
CH
1077string nice_unit_format(
1078 const double input,
1079 const UnitFormat format,
1080 const UnitBase base
1081)
1082{
1083 // round as double and cast to int64_t
1084 // cast raised overflow error near max val of int64_t (~9.2e18, see unittest)
1085 int64_t input_casted_and_rounded =
1086 boost::numeric_cast<int64_t>( round(input) );
1087
1088 // now call other
1089 return nice_unit_format( input_casted_and_rounded, format, base );
1090} // eo nice_unit_format(double input)
1091
1092
47c07fba
GE
1093string escape(const string &s)
1094{
6ab3bc95
RP
1095 string out(s);
1096 string::size_type p;
47c07fba 1097
6ab3bc95
RP
1098 p=0;
1099 while ( (p=out.find_first_of("\"\\",p) ) !=out.npos)
1100 {
1101 out.insert (p,"\\");
1102 p+=2;
1103 }
47c07fba 1104
6ab3bc95
RP
1105 p=0;
1106 while ( (p=out.find_first_of("\r",p) ) !=out.npos)
1107 {
1108 out.replace (p,1,"\\r");
1109 p+=2;
1110 }
47c07fba 1111
6ab3bc95
RP
1112 p=0;
1113 while ( (p=out.find_first_of("\n",p) ) !=out.npos)
1114 {
1115 out.replace (p,1,"\\n");
1116 p+=2;
1117 }
47c07fba 1118
6ab3bc95 1119 out='"'+out+'"';
47c07fba 1120
6ab3bc95
RP
1121 return out;
1122} // eo scape(const std::string&)
47c07fba 1123
47c07fba 1124
6ab3bc95
RP
1125string descape(const string &s, int startpos, int &endpos)
1126{
1127 string out;
1128
1129 if (s.at(startpos) != '"')
1130 throw out_of_range("value not type escaped string");
1131
1132 out=s.substr(startpos+1);
1133 string::size_type p=0;
1134
1135 // search for the end of the string
1136 while ( (p=out.find("\"",p) ) !=out.npos)
1137 {
1138 int e=p-1;
1139 bool escaped=false;
1140
1141 // the " might be escaped with a backslash
1142 while (e>=0 && out.at (e) =='\\')
1143 {
1144 if (escaped == false)
1145 escaped=true;
1146 else
1147 escaped=false;
1148
1149 e--;
1150 }
1151
1152 if (escaped==false)
1153 break;
1154 else
1155 p++;
1156 }
1157
1158 // we now have the end of the string
1159 out=out.substr(0,p);
1160
1161 // tell calling prog about the endposition
1162 endpos=startpos+p+1;
1163
1164 // descape all \ stuff inside the string now
1165 p=0;
1166 while ( (p=out.find_first_of("\\",p) ) !=out.npos)
1167 {
1168 switch (out.at(p+1) )
1169 {
1170 case 'r':
47c07fba
GE
1171 out.replace(p,2,"\r");
1172 break;
6ab3bc95 1173 case 'n':
47c07fba
GE
1174 out.replace(p,2,"\n");
1175 break;
6ab3bc95 1176 default:
47c07fba 1177 out.erase(p,1);
6ab3bc95
RP
1178 }
1179 p++;
1180 }
1181
1182 return out;
1183} // eo descape(const std::string&,int,int&)
47c07fba 1184
e93545dd 1185
47c07fba
GE
1186string escape_shellarg(const string &input)
1187{
6ab3bc95
RP
1188 string output = "'";
1189 string::const_iterator it, it_end = input.end();
83d700e9 1190 for (it = input.begin(); it != it_end; ++it)
6ab3bc95
RP
1191 {
1192 if ( (*it) == '\'')
1193 output += "'\\'";
1194
1195 output += *it;
1196 }
1197
1198 output += "'";
1199 return output;
47c07fba 1200}