Commit | Line | Data |
---|---|---|
6a93d84a TJ |
1 | /** @file |
2 | * | |
3 | * (c) Copyright 2007-2008 by Intra2net AG | |
4 | * | |
5 | * info@intra2net.com | |
6 | */ | |
e93545dd GE |
7 | |
8 | #include <iostream> | |
9 | #include <string> | |
10 | #include <sstream> | |
11 | #include <stdexcept> | |
12 | ||
a5f3af6e | 13 | #include <wchar.h> |
e93545dd GE |
14 | #include <stdlib.h> |
15 | #include <iconv.h> | |
16 | #include <i18n.h> | |
17 | ||
18 | #include <stringfunc.hxx> | |
19 | ||
20 | using namespace std; | |
21 | ||
6a93d84a TJ |
22 | namespace i2n { |
23 | ||
24 | ||
25 | namespace { | |
26 | ||
27 | const std::string hexDigitsLower("0123456789abcdef"); | |
28 | const std::string hexDigitsUpper("0123456789ABCDEF"); | |
29 | ||
30 | ||
31 | struct UpperFunc | |
32 | { | |
33 | char operator() (char c) | |
34 | { | |
35 | return std::toupper(c); | |
36 | } | |
37 | }; // eo struct UpperFunc | |
38 | ||
39 | ||
40 | struct LowerFunc | |
41 | { | |
42 | char operator() (char c) | |
43 | { | |
44 | return std::tolower(c); | |
45 | } | |
46 | }; // eo struct LowerFunc | |
47 | ||
48 | ||
49 | } // eo namespace <anonymous> | |
50 | ||
51 | ||
52 | ||
53 | /** | |
54 | * default list of whitespaces (" \t\r\n"); | |
55 | */ | |
56 | const std::string whitespaces = " \t\r\n"; | |
57 | ||
58 | /** | |
59 | * default list of lineendings ("\r\n"); | |
60 | */ | |
61 | const std::string lineends= "\r\n"; | |
62 | ||
63 | ||
64 | ||
65 | /** | |
66 | * @brief checks if a string begins with a given prefix. | |
67 | * @param[in,out] str the string which is tested | |
68 | * @param prefix the prefix which should be tested for. | |
69 | * @return @a true iff the prefix is not empty and the string begins with that prefix. | |
70 | */ | |
71 | bool hasPrefix(const std::string& str, const std::string& prefix) | |
72 | { | |
73 | if (prefix.empty() || str.empty() || str.size() < prefix.size()) | |
74 | { | |
75 | return false; | |
76 | } | |
77 | return str.compare(0, prefix.size(), prefix) == 0; | |
78 | } // eo hasPrefix(const std::string&,const std::string&) | |
79 | ||
80 | ||
81 | /** | |
82 | * @brief checks if a string ends with a given suffix. | |
83 | * @param[in,out] str the string which is tested | |
84 | * @param suffix the suffix which should be tested for. | |
85 | * @return @a true iff the suffix is not empty and the string ends with that suffix. | |
86 | */ | |
87 | bool hasSuffix(const std::string& str, const std::string& suffix) | |
88 | { | |
89 | if (suffix.empty() || str.empty() || str.size() < suffix.size()) | |
90 | { | |
91 | return false; | |
92 | } | |
93 | return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; | |
94 | } // eo hasSuffix(const std::string&,const std::string&) | |
95 | ||
96 | ||
97 | /** | |
98 | * cut off characters from a given list from front and end of a string. | |
99 | * @param[in,out] str the string which should be trimmed. | |
100 | * @param charlist the list of characters to remove from beginning and end of string | |
101 | * @return the result string. | |
102 | */ | |
103 | std::string trimMod(std::string& str, const std::string& charlist) | |
104 | { | |
105 | // first: trim the beginning: | |
106 | std::string::size_type pos= str.find_first_not_of(charlist); | |
107 | if (pos == std::string::npos) | |
108 | { | |
109 | // whole string consists of charlist (or is already empty) | |
110 | str.clear(); | |
111 | return str; | |
112 | } | |
113 | else if (pos>0) | |
114 | { | |
115 | // str starts with charlist | |
116 | str.erase(0,pos); | |
117 | } | |
118 | // now let's look at the tail: | |
119 | pos= str.find_last_not_of(charlist)+1; // note: we already know there is at least one other char! | |
120 | if ( pos < str.size() ) | |
121 | { | |
122 | str.erase(pos, str.size()-pos); | |
123 | } | |
124 | return str; | |
125 | } // eo trimMod(std::string&,const std::string&) | |
126 | ||
127 | ||
128 | ||
129 | /** | |
130 | * removes last character from a string when it is in a list of chars to be removed. | |
131 | * @param[in,out] str the string. | |
132 | * @param what the list of chars which will be tested for. | |
133 | * @return the resulting string with last char removed (if applicable) | |
134 | */ | |
135 | std::string chompMod(std::string& str, const std::string& what) | |
136 | { | |
137 | if (str.empty() || what.empty()) | |
138 | { | |
139 | return str; | |
140 | } | |
141 | if (what.find( str.at(str.size()-1) ) != std::string::npos) | |
142 | { | |
143 | str.erase( str.size() - 1); | |
144 | } | |
145 | return str; | |
146 | } // eo chompMod(std::string&,const std::string&) | |
147 | ||
148 | ||
149 | /** | |
150 | * @brief converts a string to lower case. | |
151 | * @param[in,out] str the string to modify. | |
152 | * @return the string | |
153 | */ | |
154 | std::string lowerMod(std::string& str) | |
155 | { | |
156 | std::transform( str.begin(), str.end(), str.begin(), LowerFunc() ); | |
157 | return str; | |
158 | } // eo lowerMod(std::string&) | |
159 | ||
160 | ||
161 | /** | |
162 | * @brief converts a string to upper case. | |
163 | * @param[in,out] str the string to modify. | |
164 | * @return the string | |
165 | */ | |
166 | std::string upperMod(std::string& str) | |
167 | { | |
168 | std::transform( str.begin(), str.end(), str.begin(), UpperFunc() ); | |
169 | return str; | |
170 | } // eo upperMod(std::string&) | |
171 | ||
172 | ||
173 | ||
174 | /** | |
175 | * cut off characters from a given list from front and end of a string. | |
176 | * @param str the string which should be trimmed. | |
177 | * @param charlist the list of characters to remove from beginning and end of string | |
178 | * @return the result string. | |
179 | */ | |
180 | std::string trim(const std::string& str, const std::string& charlist) | |
181 | { | |
182 | // first: trim the beginning: | |
183 | std::string::size_type pos0= str.find_first_not_of(charlist); | |
184 | if (pos0 == std::string::npos) | |
185 | { | |
186 | // whole string consists of charlist (or is already empty) | |
187 | return std::string(); | |
188 | } | |
189 | // now let's look at the end: | |
190 | std::string::size_type pos1= str.find_last_not_of(charlist); | |
191 | return str.substr(pos0, pos1 - pos0 + 1); | |
192 | } // eo trim(const std:.string&,const std::string&) | |
193 | ||
194 | ||
195 | /** | |
196 | * removes last character from a string when it is in a list of chars to be removed. | |
197 | * @param str the string. | |
198 | * @param what the list of chars which will be tested for. | |
199 | * @return the resulting string with last char removed (if applicable) | |
200 | */ | |
201 | std::string chomp(const std::string& str, const std::string& what) | |
202 | { | |
203 | if (str.empty() || what.empty()) | |
204 | { | |
205 | return str; | |
206 | } | |
207 | if (what.find( str.at(str.size()-1) ) != std::string::npos) | |
208 | { | |
209 | return str.substr(0, str.size()-1); | |
210 | } | |
211 | return str; | |
212 | } // eo chomp(const std:.string&,const std::string&) | |
213 | ||
214 | ||
215 | /** | |
216 | * @brief returns a lower case version of a given string. | |
217 | * @param str the string | |
218 | * @return the lower case version of the string | |
219 | */ | |
220 | std::string lower(const std::string& str) | |
221 | { | |
222 | std::string result(str); | |
223 | return lowerMod(result); | |
224 | } // eo lower(const std::string&) | |
225 | ||
226 | ||
227 | /** | |
228 | * @brief returns a upper case version of a given string. | |
229 | * @param str the string | |
230 | * @return the upper case version of the string | |
231 | */ | |
232 | std::string upper(const std::string& str) | |
233 | { | |
234 | std::string result(str); | |
235 | return upperMod(result); | |
236 | } // eo upper(const std::string&) | |
237 | ||
238 | ||
239 | ||
240 | /** | |
241 | * @brief removes a given suffix from a string. | |
242 | * @param str the string. | |
243 | * @param suffix the suffix which should be removed if the string ends with it. | |
244 | * @return the string without the suffix. | |
245 | * | |
246 | * If the string ends with the suffix, it is removed. If the the string doesn't end | |
247 | * with the suffix the original string is returned. | |
248 | */ | |
249 | std::string removeSuffix(const std::string& str, const std::string& suffix) | |
250 | { | |
251 | if (hasSuffix(str,suffix)) | |
252 | { | |
253 | return str.substr(0, str.size()-suffix.size() ); | |
254 | } | |
255 | return str; | |
256 | } // eo removeSuffix(const std::string&,const std::string&) | |
257 | ||
258 | ||
259 | ||
260 | /** | |
261 | * @brief removes a given prefix from a string. | |
262 | * @param str the string. | |
263 | * @param prefix the prefix which should be removed if the string begins with it. | |
264 | * @return the string without the prefix. | |
265 | * | |
266 | * If the string begins with the prefix, it is removed. If the the string doesn't begin | |
267 | * with the prefix the original string is returned. | |
268 | */ | |
269 | std::string removePrefix(const std::string& str, const std::string& prefix) | |
270 | { | |
271 | if (hasPrefix(str,prefix)) | |
272 | { | |
273 | return str.substr( prefix.size() ); | |
274 | } | |
275 | return str; | |
276 | } // eo removePrefix(const std::string&,const std::string&) | |
277 | ||
278 | ||
279 | /** | |
280 | * split a string to key and value delimited by a given delimiter. | |
281 | * The resulting key and value strings are trimmed (whitespaces removed at beginning and end). | |
282 | * @param str the string which should be splitted. | |
283 | * @param[out] key the resulting key | |
284 | * @param[out] value the resulting value | |
285 | * @param delimiter the delimiter between key and value; default is '='. | |
286 | * @return @a true if the split was successful. | |
287 | */ | |
288 | bool pairSplit( | |
289 | const std::string& str, | |
290 | std::string& key, | |
291 | std::string& value, | |
292 | char delimiter) | |
293 | { | |
294 | std::string::size_type pos = str.find(delimiter); | |
295 | if (pos == std::string::npos) return false; | |
296 | key= str.substr(0,pos); | |
297 | value= str.substr(pos+1); | |
298 | trimMod(key); | |
299 | trimMod(value); | |
300 | return true; | |
301 | } // eo pairSplit(const std::string&,std::string&,std::string&,char) | |
302 | ||
303 | ||
304 | /** | |
305 | * splits a string by given delimiter | |
306 | * | |
307 | * @param[in] str the string which should be splitted. | |
308 | * @param[out] result the list resulting from splitting @a str. | |
309 | * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
310 | * @param[in] omit_empty should empty parts not be stored? | |
311 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
312 | * (empty string results in no trim) | |
313 | */ | |
314 | void splitString( | |
315 | const std::string& str, | |
316 | std::list<std::string>& result, | |
317 | const std::string& delimiter, | |
318 | bool omit_empty, | |
319 | const std::string& trim_list | |
320 | ) | |
321 | { | |
322 | std::string::size_type pos, last_pos=0; | |
323 | bool delimiter_found= false; | |
324 | while ( last_pos < str.size() && last_pos != std::string::npos) | |
325 | { | |
326 | pos= str.find(delimiter, last_pos); | |
327 | std::string part; | |
328 | if (pos == std::string::npos) | |
329 | { | |
330 | part= str.substr(last_pos); | |
331 | delimiter_found= false; | |
332 | } | |
333 | else | |
334 | { | |
335 | part= str.substr(last_pos, pos-last_pos); | |
336 | delimiter_found=true; | |
337 | } | |
338 | if (pos != std::string::npos) | |
339 | { | |
340 | last_pos= pos+ delimiter.size(); | |
341 | } | |
342 | else | |
343 | { | |
344 | last_pos= std::string::npos; | |
345 | } | |
346 | if (!trim_list.empty()) trimMod(part, trim_list); | |
347 | if (omit_empty && part.empty()) continue; | |
348 | result.push_back( part ); | |
349 | } | |
350 | // if the string ends with a delimiter we need to append an empty string if no omit_empty | |
351 | // was given. | |
352 | // (this way we keep the split result consistent to a join operation) | |
353 | if (delimiter_found && !omit_empty) | |
354 | { | |
355 | result.push_back(""); | |
356 | } | |
357 | } // eo splitString(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&) | |
358 | ||
359 | ||
360 | /** | |
361 | * splits a string by a given delimiter | |
362 | * @param str the string which should be splitted. | |
363 | * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
364 | * @param[in] omit_empty should empty parts not be stored? | |
365 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
366 | * (empty string results in no trim) | |
367 | * @return the list resulting from splitting @a str. | |
368 | */ | |
369 | std::list<std::string> splitString( | |
370 | const std::string& str, | |
371 | const std::string& delimiter, | |
372 | bool omit_empty, | |
373 | const std::string& trim_list | |
374 | ) | |
375 | { | |
376 | std::list<std::string> result; | |
377 | splitString(str, result, delimiter, omit_empty, trim_list); | |
378 | return result; | |
379 | } // eo splitString(const std::string&,const std::string&,bool,const std::string&) | |
380 | ||
381 | ||
382 | /** | |
383 | * @brief joins a list of strings into a single string. | |
384 | * | |
385 | * This funtion is (basically) the reverse operation of @a splitString. | |
386 | * | |
387 | * @param parts the list of strings. | |
388 | * @param delimiter the delimiter which is inserted between the strings. | |
389 | * @return the joined string. | |
390 | */ | |
391 | std::string joinString( | |
392 | const std::list< std::string >& parts, | |
393 | const std::string& delimiter | |
394 | ) | |
395 | { | |
396 | std::string result; | |
397 | if (! parts.empty()) | |
398 | { | |
399 | std::list< std::string >::const_iterator it= parts.begin(); | |
400 | result = *it; | |
401 | while( ++it != parts.end() ) | |
402 | { | |
403 | result+= delimiter; | |
404 | result+= *it; | |
405 | } | |
406 | } | |
407 | return result; | |
408 | } // eo joinString(const std::list< std::string >&,const std::string&) | |
409 | ||
410 | ||
411 | ||
412 | /* | |
413 | ** conversions | |
414 | */ | |
415 | ||
416 | ||
417 | /** | |
418 | * @brief returns a hex string from a binary string. | |
419 | * @param str the (binary) string | |
420 | * @param upper_case_digits determine whether to use upper case characters for digits A-F. | |
421 | * @return the string in hex notation. | |
422 | */ | |
423 | std::string binaryToHex( | |
424 | const std::string& str, | |
425 | bool upper_case_digits | |
426 | ) | |
427 | { | |
428 | std::string result; | |
429 | std::string hexDigits( upper_case_digits ? hexDigitsUpper : hexDigitsLower); | |
430 | for(std::string::const_iterator it= str.begin(); | |
431 | it != str.end(); | |
432 | ++it) | |
433 | { | |
434 | result.push_back( hexDigits[ ((*it) >> 4) & 0x0f ] ); | |
435 | result.push_back( hexDigits[ (*it) & 0x0f ] ); | |
436 | } | |
437 | return result; | |
438 | } // eo binaryToHex(const std::string&,bool) | |
439 | ||
440 | ||
441 | /** | |
442 | * @brief converts a hex digit string to binary string. | |
443 | * @param str hex digit string | |
444 | * @return the binary string. | |
445 | * | |
446 | * The hex digit string may contains white spaces or colons which are treated | |
447 | * as delimiters between hex digit groups. | |
448 | * | |
449 | * @todo rework the handling of half nibbles (consistency)! | |
450 | */ | |
451 | std::string hexToBinary( | |
452 | const std::string& str | |
453 | ) | |
454 | throw(std::runtime_error) | |
455 | { | |
456 | std::string result; | |
457 | char c= 0; | |
458 | bool hasNibble= false; | |
459 | bool lastWasWS= true; | |
460 | for(std::string::const_iterator it= str.begin(); | |
461 | it != str.end(); | |
462 | ++it) | |
463 | { | |
464 | std::string::size_type p = hexDigitsLower.find( *it ); | |
465 | if (p== std::string::npos) | |
466 | { | |
467 | p= hexDigitsUpper.find( *it ); | |
468 | } | |
469 | if (p == std::string::npos) | |
470 | { | |
471 | if ( ( whitespaces.find( *it ) != std::string::npos) // is it a whitespace? | |
472 | or ( *it == ':') // or a colon? | |
473 | ) | |
474 | { | |
475 | // we treat that as a valid delimiter: | |
476 | if (hasNibble) | |
477 | { | |
478 | // 1 nibble before WS is treate as lower part: | |
479 | result.push_back(c); | |
480 | // reset state: | |
481 | hasNibble= false; | |
482 | } | |
483 | lastWasWS= true; | |
484 | continue; | |
485 | } | |
486 | } | |
487 | if (p == std::string::npos ) | |
488 | { | |
489 | throw runtime_error("illegal character in hex digit string: " + str); | |
490 | } | |
491 | lastWasWS= false; | |
492 | if (hasNibble) | |
493 | { | |
494 | c<<=4; | |
495 | } | |
496 | else | |
497 | { | |
498 | c=0; | |
499 | } | |
500 | c+= (p & 0x0f); | |
501 | if (hasNibble) | |
502 | { | |
503 | //we already had a nibble, so a char is complete now: | |
504 | result.push_back( c ); | |
505 | hasNibble=false; | |
506 | } | |
507 | else | |
508 | { | |
509 | // this is the first nibble of a new char: | |
510 | hasNibble=true; | |
511 | } | |
512 | } | |
513 | if (hasNibble) | |
514 | { | |
515 | //well, there is one nibble left | |
516 | // let's do some heuristics: | |
517 | if (lastWasWS) | |
518 | { | |
519 | // if the preceeding character was a white space (or a colon) | |
520 | // we treat the nibble as lower part: | |
521 | //( this is consistent with shortened hex notations where leading zeros are not noted) | |
522 | result.push_back( c ); | |
523 | } | |
524 | else | |
525 | { | |
526 | // if it was part of a hex digit chain, we treat it as UPPER part (!!) | |
527 | result.push_back( c << 4 ); | |
528 | } | |
529 | } | |
530 | return result; | |
531 | } // eo hexToBinary(const std::string&) | |
532 | ||
533 | ||
534 | } // eo namespace i2n | |
535 | ||
e93545dd GE |
536 | std::string iso_to_utf8(const std::string& isostring) |
537 | { | |
538 | string result; | |
118e216e | 539 | |
e93545dd | 540 | iconv_t i2utf8 = iconv_open ("UTF-8", "ISO-8859-1"); |
118e216e | 541 | |
e93545dd GE |
542 | if (iso_to_utf8 == (iconv_t)-1) |
543 | throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8"); | |
118e216e | 544 | |
e93545dd GE |
545 | size_t in_size=isostring.size(); |
546 | size_t out_size=in_size*4; | |
118e216e | 547 | |
e93545dd GE |
548 | char *buf = (char *)malloc(out_size+1); |
549 | if (buf == NULL) | |
550 | throw runtime_error("out of memory for iconv buffer"); | |
551 | ||
552 | const char *in = isostring.c_str(); | |
553 | char *out = buf; | |
554 | iconv (i2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 555 | |
e93545dd | 556 | buf[isostring.size()*4-out_size]=0; |
118e216e | 557 | |
e93545dd | 558 | result=buf; |
118e216e | 559 | |
e93545dd GE |
560 | free(buf); |
561 | iconv_close (i2utf8); | |
118e216e | 562 | |
e93545dd GE |
563 | return result; |
564 | } | |
565 | ||
566 | std::string utf8_to_iso(const std::string& utf8string) | |
567 | { | |
568 | string result; | |
118e216e | 569 | |
e93545dd | 570 | iconv_t utf82iso = iconv_open ("ISO-8859-1","UTF-8"); |
118e216e | 571 | |
e93545dd GE |
572 | if (utf82iso == (iconv_t)-1) |
573 | throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1"); | |
118e216e | 574 | |
e93545dd GE |
575 | size_t in_size=utf8string.size(); |
576 | size_t out_size=in_size; | |
118e216e | 577 | |
e93545dd GE |
578 | char *buf = (char *)malloc(out_size+1); |
579 | if (buf == NULL) | |
580 | throw runtime_error("out of memory for iconv buffer"); | |
581 | ||
582 | const char *in = utf8string.c_str(); | |
583 | char *out = buf; | |
584 | iconv (utf82iso, &in, &in_size, &out, &out_size); | |
118e216e | 585 | |
e93545dd | 586 | buf[utf8string.size()-out_size]=0; |
118e216e | 587 | |
e93545dd | 588 | result=buf; |
118e216e | 589 | |
e93545dd GE |
590 | free(buf); |
591 | iconv_close (utf82iso); | |
592 | ||
593 | return result; | |
594 | } | |
595 | ||
a5f3af6e GE |
596 | wchar_t* utf8_to_wbuf(const std::string& utf8string) |
597 | { | |
598 | iconv_t utf82wstr = iconv_open ("UCS-4LE","UTF-8"); | |
599 | ||
600 | if (utf82wstr == (iconv_t)-1) | |
601 | throw runtime_error("iconv can't convert from UTF-8 to UCS-4"); | |
602 | ||
603 | size_t in_size=utf8string.size(); | |
604 | size_t out_size=(in_size+1)*sizeof(wchar_t); | |
605 | ||
606 | wchar_t *buf = (wchar_t *)malloc(out_size); | |
607 | if (buf == NULL) | |
608 | throw runtime_error("out of memory for iconv buffer"); | |
609 | ||
610 | const char *in = utf8string.c_str(); | |
611 | char *out = (char*)buf; | |
612 | if (iconv (utf82wstr, &in, &in_size, &out, &out_size) == -1) | |
613 | throw runtime_error("error converting char encodings"); | |
614 | ||
615 | buf[((utf8string.size()+1)*sizeof(wchar_t)-out_size)/sizeof(wchar_t)]=0; | |
616 | ||
617 | iconv_close (utf82wstr); | |
618 | ||
619 | return buf; | |
620 | } | |
621 | ||
13cc4db1 | 622 | std::string utf7imap_to_utf8(const std::string& utf7imapstring) |
d116a071 TJ |
623 | { |
624 | string result; | |
118e216e | 625 | |
13cc4db1 | 626 | iconv_t utf7imap2utf8 = iconv_open ("UTF-8","UTF-7-IMAP"); |
118e216e | 627 | |
13cc4db1 TJ |
628 | if (utf7imap2utf8 == (iconv_t)-1) |
629 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
118e216e | 630 | |
d116a071 | 631 | size_t in_size=utf7imapstring.size(); |
13cc4db1 | 632 | size_t out_size=in_size*4; |
118e216e | 633 | |
d116a071 TJ |
634 | char *buf = (char *)malloc(out_size+1); |
635 | if (buf == NULL) | |
636 | throw runtime_error("out of memory for iconv buffer"); | |
637 | ||
638 | const char *in = utf7imapstring.c_str(); | |
639 | char *out = buf; | |
13cc4db1 | 640 | iconv (utf7imap2utf8, &in, &in_size, &out, &out_size); |
118e216e | 641 | |
13cc4db1 | 642 | buf[utf7imapstring.size()*4-out_size]=0; |
118e216e | 643 | |
d116a071 | 644 | result=buf; |
118e216e | 645 | |
d116a071 | 646 | free(buf); |
13cc4db1 | 647 | iconv_close (utf7imap2utf8); |
118e216e TJ |
648 | |
649 | return result; | |
650 | } | |
651 | ||
6a2b6dd1 TJ |
652 | std::string utf8_to_utf7imap(const std::string& utf8string) |
653 | { | |
654 | string result; | |
655 | ||
656 | iconv_t utf82utf7imap = iconv_open ("UTF-7-IMAP", "UTF-8"); | |
657 | ||
658 | if (utf82utf7imap == (iconv_t)-1) | |
659 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
660 | ||
661 | // UTF-7 is base64 encoded, a buffer 10x as large | |
662 | // as the utf-8 buffer should be enough. If not the string will be truncated. | |
663 | size_t in_size=utf8string.size(); | |
664 | size_t out_size=in_size*10; | |
665 | ||
666 | char *buf = (char *)malloc(out_size+1); | |
667 | if (buf == NULL) | |
668 | throw runtime_error("out of memory for iconv buffer"); | |
669 | ||
670 | const char *in = utf8string.c_str(); | |
671 | char *out = buf; | |
672 | iconv (utf82utf7imap, &in, &in_size, &out, &out_size); | |
673 | ||
674 | buf[utf8string.size()*10-out_size]=0; | |
675 | ||
676 | result=buf; | |
677 | ||
678 | free(buf); | |
679 | iconv_close (utf82utf7imap); | |
680 | ||
681 | return result; | |
682 | } | |
683 | ||
118e216e TJ |
684 | // Tokenize string by (html) tags |
685 | void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input) | |
686 | { | |
687 | string::size_type pos, len = input.size(); | |
688 | bool inside_tag = false; | |
689 | string current; | |
690 | ||
691 | for (pos = 0; pos < len; pos++) { | |
692 | if (input[pos] == '<') { | |
693 | inside_tag = true; | |
694 | ||
695 | if (!current.empty()) { | |
696 | tokenized.push_back(make_pair(current, false)); | |
697 | current = ""; | |
698 | } | |
699 | ||
700 | current += input[pos]; | |
701 | } else if (input[pos] == '>' && inside_tag) { | |
702 | current += input[pos]; | |
703 | inside_tag = false; | |
704 | if (!current.empty()) { | |
705 | tokenized.push_back(make_pair(current, true)); | |
706 | current = ""; | |
707 | } | |
708 | } else | |
709 | current += input[pos]; | |
710 | } | |
711 | ||
712 | // String left over in buffer? | |
713 | if (!current.empty()) | |
714 | tokenized.push_back(make_pair(current, false)); | |
715 | } | |
716 | ||
717 | std::string strip_html_tags(const std::string &input) | |
718 | { | |
719 | // Pair first: string, second: isTag | |
720 | vector<pair<string,bool> > tokenized; | |
721 | tokenize_by_tag(tokenized, input); | |
722 | ||
723 | string output; | |
724 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
725 | for (token = tokenized.begin(); token != tokens_end; token++) | |
726 | if (!token->second) | |
727 | output += token->first; | |
728 | ||
729 | return output; | |
730 | } | |
731 | ||
732 | // Smart-encode HTML en | |
733 | string smart_html_entities(const std::string &input) | |
734 | { | |
735 | // Pair first: string, second: isTag | |
736 | vector<pair<string,bool> > tokenized; | |
737 | tokenize_by_tag(tokenized, input); | |
738 | ||
739 | string output; | |
740 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
741 | for (token = tokenized.begin(); token != tokens_end; token++) { | |
742 | // keep HTML tags as they are | |
743 | if (token->second) | |
744 | output += token->first; | |
745 | else | |
746 | output += html_entities(token->first); | |
747 | } | |
748 | ||
749 | return output; | |
750 | } | |
751 | ||
a5f3af6e GE |
752 | string::size_type find_8bit(const std::string &str) |
753 | { | |
754 | string::size_type l=str.size(); | |
755 | for (string::size_type p=0; p < l; p++) | |
756 | if (static_cast<unsigned char>(str[p]) > 127) | |
757 | return p; | |
758 | ||
759 | return string::npos; | |
760 | } | |
761 | ||
118e216e TJ |
762 | // encoded UTF-8 chars into HTML entities |
763 | string html_entities(std::string str) | |
764 | { | |
765 | // Normal chars | |
766 | replace_all (str, "&", "&"); | |
767 | replace_all (str, "\"", """); | |
768 | replace_all (str, "<", "<"); | |
769 | replace_all (str, ">", ">"); | |
770 | ||
771 | // Umlauts | |
a5f3af6e GE |
772 | replace_all (str, "\xC3\xA4", "ä"); |
773 | replace_all (str, "\xC3\xB6", "ö"); | |
774 | replace_all (str, "\xC3\xBC", "ü"); | |
775 | replace_all (str, "\xC3\x84", "Ä"); | |
776 | replace_all (str, "\xC3\x96", "Ö"); | |
777 | replace_all (str, "\xC3\x9C", "Ü"); | |
118e216e TJ |
778 | |
779 | // Misc | |
a5f3af6e GE |
780 | replace_all (str, "\xC3\x9F", "ß"); |
781 | ||
782 | // conversion of remaining non-ASCII chars needed? | |
783 | // just do if needed because of performance | |
784 | if (find_8bit(str) != string::npos) | |
785 | { | |
786 | // convert to fixed-size encoding UTF-32 | |
787 | wchar_t* wbuf=utf8_to_wbuf(str); | |
788 | ostringstream target; | |
789 | ||
790 | // replace all non-ASCII chars with HTML representation | |
791 | for (int p=0; wbuf[p] != 0; p++) | |
792 | { | |
793 | unsigned int c=wbuf[p]; | |
794 | ||
795 | if (c <= 127) | |
796 | target << static_cast<unsigned char>(c); | |
797 | else | |
798 | target << "&#" << c << ';'; | |
799 | } | |
800 | ||
801 | free(wbuf); | |
802 | ||
803 | str=target.str(); | |
804 | } | |
118e216e TJ |
805 | |
806 | return str; | |
807 | } | |
808 | ||
e93545dd GE |
809 | bool replace_all(string &base, const char *ist, const char *soll) |
810 | { | |
811 | string i=ist; | |
812 | string s=soll; | |
813 | return replace_all(base,&i,&s); | |
814 | } | |
815 | ||
816 | bool replace_all(string &base, const string &ist, const char *soll) | |
817 | { | |
818 | string s=soll; | |
819 | return replace_all(base,&ist,&s); | |
820 | } | |
821 | ||
822 | bool replace_all(string &base, const string *ist, const string *soll) | |
823 | { | |
824 | return replace_all(base,*ist,*soll); | |
825 | } | |
826 | ||
827 | bool replace_all(string &base, const char *ist, const string *soll) | |
828 | { | |
829 | string i=ist; | |
830 | return replace_all(base,&i,soll); | |
831 | } | |
832 | ||
833 | bool replace_all(string &base, const string &ist, const string &soll) | |
834 | { | |
835 | bool found_ist = false; | |
836 | string::size_type a=0; | |
837 | ||
1ec2064e TJ |
838 | if (ist.empty()) |
839 | throw runtime_error("replace_all called with empty search string"); | |
840 | ||
e93545dd GE |
841 | while((a=base.find(ist,a))!=string::npos) |
842 | { | |
843 | base.replace(a,ist.size(),soll); | |
844 | a=a+soll.size(); | |
845 | found_ist = true; | |
846 | } | |
847 | ||
848 | return found_ist; | |
849 | } | |
850 | ||
851 | string to_lower(const string &src) | |
852 | { | |
853 | string dst = src; | |
854 | ||
ca189cac | 855 | string::size_type pos, end = dst.size(); |
e93545dd GE |
856 | for (pos = 0; pos < end; pos++) |
857 | dst[pos] = tolower(dst[pos]); | |
858 | ||
859 | return dst; | |
860 | } | |
861 | ||
862 | string to_upper(const string &src) | |
863 | { | |
864 | string dst = src; | |
865 | ||
ca189cac | 866 | string::size_type pos, end = dst.size(); |
e93545dd GE |
867 | for (pos = 0; pos < end; pos++) |
868 | dst[pos] = toupper(dst[pos]); | |
869 | ||
870 | return dst; | |
871 | } | |
872 | ||
873 | string nice_unit_format (int input) { | |
874 | float size = input; | |
875 | int sizecount = 0; | |
876 | ||
877 | while (size > 1000) { | |
878 | size = size / 1000; | |
879 | sizecount++; | |
880 | } | |
881 | ||
882 | float tmp; // round | |
883 | tmp = size*10; | |
884 | tmp += 0.5; | |
885 | tmp = int (tmp); | |
886 | tmp = float(tmp)/float(10); | |
887 | size = tmp; | |
888 | ||
889 | ostringstream out; | |
890 | ||
891 | out.setf (ios::fixed); | |
892 | out.precision(2); | |
893 | switch (sizecount) { | |
894 | case 1: | |
895 | out << size << i18n(" KBytes"); | |
896 | break; | |
897 | case 2: | |
898 | out << size << i18n(" MBytes"); | |
899 | break; | |
900 | case 3: | |
901 | out << size << i18n(" Gbytes"); | |
902 | break; | |
903 | default: | |
904 | out << size << i18n(" Bytes"); | |
905 | break; | |
906 | } | |
907 | ||
908 | return out.str(); | |
909 | } | |
910 | ||
47c07fba GE |
911 | string escape(const string &s) |
912 | { | |
913 | string out(s); | |
914 | string::size_type p; | |
915 | ||
916 | p=0; | |
917 | while ((p=out.find_first_of("\"\\",p))!=out.npos) | |
918 | { | |
919 | out.insert(p,"\\"); | |
920 | p+=2; | |
921 | } | |
922 | ||
923 | p=0; | |
924 | while ((p=out.find_first_of("\r",p))!=out.npos) | |
925 | { | |
926 | out.replace(p,1,"\\r"); | |
927 | p+=2; | |
928 | } | |
929 | ||
930 | p=0; | |
931 | while ((p=out.find_first_of("\n",p))!=out.npos) | |
932 | { | |
933 | out.replace(p,1,"\\n"); | |
934 | p+=2; | |
935 | } | |
936 | ||
937 | out='"'+out+'"'; | |
938 | ||
939 | return out; | |
940 | } | |
941 | ||
942 | string descape(const string &s, int startpos, int &endpos) | |
943 | { | |
944 | string out; | |
945 | ||
946 | if (s.at(startpos) != '"') | |
947 | throw out_of_range("value not type escaped string"); | |
948 | ||
949 | out=s.substr(startpos+1); | |
950 | string::size_type p=0; | |
951 | ||
952 | // search for the end of the string | |
953 | while((p=out.find("\"",p))!=out.npos) | |
954 | { | |
955 | int e=p-1; | |
956 | bool escaped=false; | |
957 | ||
958 | // the " might be escaped with a backslash | |
959 | while(e>=0 && out.at(e)=='\\') | |
960 | { | |
961 | if (escaped == false) | |
962 | escaped=true; | |
963 | else | |
964 | escaped=false; | |
965 | ||
966 | e--; | |
967 | } | |
968 | ||
969 | if (escaped==false) | |
970 | break; | |
971 | else | |
972 | p++; | |
973 | } | |
974 | ||
975 | // we now have the end of the string | |
976 | out=out.substr(0,p); | |
977 | ||
978 | // tell calling prog about the endposition | |
979 | endpos=startpos+p+1; | |
980 | ||
981 | // descape all \ stuff inside the string now | |
982 | p=0; | |
983 | while((p=out.find_first_of("\\",p))!=out.npos) | |
984 | { | |
985 | switch(out.at(p+1)) | |
986 | { | |
987 | case 'r': | |
988 | out.replace(p,2,"\r"); | |
989 | break; | |
990 | case 'n': | |
991 | out.replace(p,2,"\n"); | |
992 | break; | |
993 | default: | |
994 | out.erase(p,1); | |
995 | } | |
996 | p++; | |
997 | } | |
998 | ||
999 | return out; | |
1000 | } | |
e93545dd | 1001 | |
47c07fba GE |
1002 | string escape_shellarg(const string &input) |
1003 | { | |
47c07fba GE |
1004 | string output = "'"; |
1005 | string::const_iterator it, it_end = input.end(); | |
1006 | for (it = input.begin(); it != it_end; it++) { | |
1007 | if ((*it) == '\'') | |
1008 | output += "'\\'"; | |
0f65241e | 1009 | |
47c07fba GE |
1010 | output += *it; |
1011 | } | |
0f65241e | 1012 | |
47c07fba GE |
1013 | output += "'"; |
1014 | return output; | |
1015 | } |