Commit | Line | Data |
---|---|---|
6a93d84a TJ |
1 | /** @file |
2 | * | |
3 | * (c) Copyright 2007-2008 by Intra2net AG | |
6ab3bc95 | 4 | * |
6a93d84a TJ |
5 | * info@intra2net.com |
6 | */ | |
e93545dd GE |
7 | |
8 | #include <iostream> | |
9 | #include <string> | |
10 | #include <sstream> | |
11 | #include <stdexcept> | |
12 | ||
a5f3af6e | 13 | #include <wchar.h> |
e93545dd GE |
14 | #include <stdlib.h> |
15 | #include <iconv.h> | |
16 | #include <i18n.h> | |
17 | ||
18 | #include <stringfunc.hxx> | |
19 | ||
20 | using namespace std; | |
21 | ||
6ab3bc95 RP |
22 | namespace I2n |
23 | { | |
6a93d84a TJ |
24 | |
25 | ||
6ab3bc95 RP |
26 | namespace |
27 | { | |
6a93d84a TJ |
28 | |
29 | const std::string hexDigitsLower("0123456789abcdef"); | |
30 | const std::string hexDigitsUpper("0123456789ABCDEF"); | |
31 | ||
32 | ||
33 | struct UpperFunc | |
34 | { | |
6ab3bc95 RP |
35 | char operator() (char c) |
36 | { | |
37 | return std::toupper(c); | |
38 | } | |
6a93d84a TJ |
39 | }; // eo struct UpperFunc |
40 | ||
41 | ||
42 | struct LowerFunc | |
43 | { | |
6ab3bc95 RP |
44 | char operator() (char c) |
45 | { | |
46 | return std::tolower(c); | |
47 | } | |
6a93d84a TJ |
48 | }; // eo struct LowerFunc |
49 | ||
50 | ||
51 | } // eo namespace <anonymous> | |
52 | ||
53 | ||
54 | ||
55 | /** | |
6ab3bc95 | 56 | * default list of Whitespaces (" \t\r\n"); |
6a93d84a | 57 | */ |
6ab3bc95 | 58 | const std::string Whitespaces = " \t\r\n"; |
6a93d84a TJ |
59 | |
60 | /** | |
61 | * default list of lineendings ("\r\n"); | |
62 | */ | |
6ab3bc95 | 63 | const std::string LineEndings= "\r\n"; |
6a93d84a TJ |
64 | |
65 | ||
66 | ||
67 | /** | |
68 | * @brief checks if a string begins with a given prefix. | |
69 | * @param[in,out] str the string which is tested | |
70 | * @param prefix the prefix which should be tested for. | |
71 | * @return @a true iff the prefix is not empty and the string begins with that prefix. | |
72 | */ | |
6ab3bc95 | 73 | bool has_prefix(const std::string& str, const std::string& prefix) |
6a93d84a | 74 | { |
6ab3bc95 RP |
75 | if (prefix.empty() || str.empty() || str.size() < prefix.size() ) |
76 | { | |
77 | return false; | |
78 | } | |
79 | return str.compare(0, prefix.size(), prefix) == 0; | |
80 | } // eo has_prefix(const std::string&,const std::string&) | |
6a93d84a TJ |
81 | |
82 | ||
83 | /** | |
84 | * @brief checks if a string ends with a given suffix. | |
85 | * @param[in,out] str the string which is tested | |
86 | * @param suffix the suffix which should be tested for. | |
87 | * @return @a true iff the suffix is not empty and the string ends with that suffix. | |
88 | */ | |
6ab3bc95 | 89 | bool has_suffix(const std::string& str, const std::string& suffix) |
6a93d84a | 90 | { |
6ab3bc95 RP |
91 | if (suffix.empty() || str.empty() || str.size() < suffix.size() ) |
92 | { | |
93 | return false; | |
94 | } | |
95 | return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; | |
96 | } // eo has_suffix(const std::string&,const std::string&) | |
6a93d84a TJ |
97 | |
98 | ||
99 | /** | |
100 | * cut off characters from a given list from front and end of a string. | |
101 | * @param[in,out] str the string which should be trimmed. | |
102 | * @param charlist the list of characters to remove from beginning and end of string | |
103 | * @return the result string. | |
104 | */ | |
6ab3bc95 RP |
105 | std::string trim_mod(std::string& str, const std::string& charlist) |
106 | { | |
107 | // first: trim the beginning: | |
108 | std::string::size_type pos= str.find_first_not_of (charlist); | |
109 | if (pos == std::string::npos) | |
110 | { | |
111 | // whole string consists of charlist (or is already empty) | |
112 | str.clear(); | |
113 | return str; | |
114 | } | |
115 | else if (pos>0) | |
116 | { | |
117 | // str starts with charlist | |
118 | str.erase(0,pos); | |
119 | } | |
120 | // now let's look at the tail: | |
121 | pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char! | |
122 | if ( pos < str.size() ) | |
123 | { | |
124 | str.erase(pos, str.size()-pos); | |
125 | } | |
126 | return str; | |
127 | } // eo trim_mod(std::string&,const std::string&) | |
6a93d84a TJ |
128 | |
129 | ||
130 | ||
131 | /** | |
132 | * removes last character from a string when it is in a list of chars to be removed. | |
133 | * @param[in,out] str the string. | |
134 | * @param what the list of chars which will be tested for. | |
135 | * @return the resulting string with last char removed (if applicable) | |
136 | */ | |
6ab3bc95 | 137 | std::string chomp_mod(std::string& str, const std::string& what) |
6a93d84a | 138 | { |
6ab3bc95 RP |
139 | if (str.empty() || what.empty() ) |
140 | { | |
141 | return str; | |
142 | } | |
143 | if (what.find(str.at (str.size()-1) ) != std::string::npos) | |
144 | { | |
145 | str.erase(str.size() - 1); | |
146 | } | |
147 | return str; | |
148 | } // eo chomp_mod(std::string&,const std::string&) | |
6a93d84a TJ |
149 | |
150 | ||
151 | /** | |
152 | * @brief converts a string to lower case. | |
153 | * @param[in,out] str the string to modify. | |
154 | * @return the string | |
155 | */ | |
6ab3bc95 | 156 | std::string to_lower_mod(std::string& str) |
6a93d84a | 157 | { |
6ab3bc95 RP |
158 | std::transform(str.begin(), str.end(), str.begin(), LowerFunc() ); |
159 | return str; | |
160 | } // eo to_lower_mod(std::string&) | |
6a93d84a TJ |
161 | |
162 | ||
163 | /** | |
164 | * @brief converts a string to upper case. | |
165 | * @param[in,out] str the string to modify. | |
166 | * @return the string | |
167 | */ | |
6ab3bc95 | 168 | std::string to_upper_mod(std::string& str) |
6a93d84a | 169 | { |
6ab3bc95 RP |
170 | std::transform( str.begin(), str.end(), str.begin(), UpperFunc() ); |
171 | return str; | |
172 | } // eo to_upper_mod(std::string&) | |
6a93d84a TJ |
173 | |
174 | ||
175 | ||
176 | /** | |
177 | * cut off characters from a given list from front and end of a string. | |
178 | * @param str the string which should be trimmed. | |
179 | * @param charlist the list of characters to remove from beginning and end of string | |
180 | * @return the result string. | |
181 | */ | |
6ab3bc95 RP |
182 | std::string trim (const std::string& str, const std::string& charlist) |
183 | { | |
184 | // first: trim the beginning: | |
185 | std::string::size_type pos0= str.find_first_not_of(charlist); | |
186 | if (pos0 == std::string::npos) | |
187 | { | |
188 | // whole string consists of charlist (or is already empty) | |
189 | return std::string(); | |
190 | } | |
191 | // now let's look at the end: | |
192 | std::string::size_type pos1= str.find_last_not_of(charlist); | |
193 | return str.substr(pos0, pos1 - pos0 + 1); | |
6a93d84a TJ |
194 | } // eo trim(const std:.string&,const std::string&) |
195 | ||
196 | ||
197 | /** | |
198 | * removes last character from a string when it is in a list of chars to be removed. | |
199 | * @param str the string. | |
200 | * @param what the list of chars which will be tested for. | |
201 | * @return the resulting string with last char removed (if applicable) | |
202 | */ | |
6ab3bc95 RP |
203 | std::string chomp (const std::string& str, const std::string& what) |
204 | { | |
205 | if (str.empty() || what.empty() ) | |
206 | { | |
207 | return str; | |
208 | } | |
209 | if (what.find(str.at (str.size()-1) ) != std::string::npos) | |
210 | { | |
211 | return str.substr(0, str.size()-1); | |
212 | } | |
213 | return str; | |
6a93d84a TJ |
214 | } // eo chomp(const std:.string&,const std::string&) |
215 | ||
216 | ||
217 | /** | |
218 | * @brief returns a lower case version of a given string. | |
219 | * @param str the string | |
220 | * @return the lower case version of the string | |
221 | */ | |
6ab3bc95 | 222 | std::string to_lower (const std::string& str) |
6a93d84a | 223 | { |
6ab3bc95 RP |
224 | std::string result(str); |
225 | return to_lower_mod(result); | |
226 | } // eo to_lower(const std::string&) | |
6a93d84a TJ |
227 | |
228 | ||
229 | /** | |
230 | * @brief returns a upper case version of a given string. | |
231 | * @param str the string | |
232 | * @return the upper case version of the string | |
233 | */ | |
6ab3bc95 | 234 | std::string to_upper(const std::string& str) |
6a93d84a | 235 | { |
6ab3bc95 RP |
236 | std::string result(str); |
237 | return to_upper_mod(result); | |
238 | } // eo to_upper(const std::string&) | |
6a93d84a TJ |
239 | |
240 | ||
241 | ||
242 | /** | |
243 | * @brief removes a given suffix from a string. | |
244 | * @param str the string. | |
245 | * @param suffix the suffix which should be removed if the string ends with it. | |
246 | * @return the string without the suffix. | |
247 | * | |
248 | * If the string ends with the suffix, it is removed. If the the string doesn't end | |
249 | * with the suffix the original string is returned. | |
250 | */ | |
6ab3bc95 | 251 | std::string remove_suffix(const std::string& str, const std::string& suffix) |
6a93d84a | 252 | { |
6ab3bc95 RP |
253 | if (has_suffix(str,suffix) ) |
254 | { | |
255 | return str.substr(0, str.size()-suffix.size() ); | |
256 | } | |
257 | return str; | |
258 | } // eo remove_suffix(const std::string&,const std::string&) | |
6a93d84a TJ |
259 | |
260 | ||
261 | ||
262 | /** | |
263 | * @brief removes a given prefix from a string. | |
264 | * @param str the string. | |
265 | * @param prefix the prefix which should be removed if the string begins with it. | |
266 | * @return the string without the prefix. | |
267 | * | |
268 | * If the string begins with the prefix, it is removed. If the the string doesn't begin | |
269 | * with the prefix the original string is returned. | |
270 | */ | |
6ab3bc95 | 271 | std::string remove_prefix(const std::string& str, const std::string& prefix) |
6a93d84a | 272 | { |
6ab3bc95 RP |
273 | if (has_prefix(str,prefix) ) |
274 | { | |
275 | return str.substr( prefix.size() ); | |
276 | } | |
277 | return str; | |
278 | } // eo remove_prefix(const std::string&,const std::string&) | |
6a93d84a TJ |
279 | |
280 | ||
281 | /** | |
282 | * split a string to key and value delimited by a given delimiter. | |
6ab3bc95 | 283 | * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end). |
6a93d84a TJ |
284 | * @param str the string which should be splitted. |
285 | * @param[out] key the resulting key | |
286 | * @param[out] value the resulting value | |
287 | * @param delimiter the delimiter between key and value; default is '='. | |
288 | * @return @a true if the split was successful. | |
289 | */ | |
6ab3bc95 RP |
290 | bool pair_split( |
291 | const std::string& str, | |
292 | std::string& key, | |
293 | std::string& value, | |
294 | char delimiter) | |
295 | { | |
296 | std::string::size_type pos = str.find (delimiter); | |
297 | if (pos == std::string::npos) return false; | |
298 | key= str.substr(0,pos); | |
299 | value= str.substr(pos+1); | |
300 | trim_mod(key); | |
301 | trim_mod(value); | |
302 | return true; | |
303 | } // eo pair_split(const std::string&,std::string&,std::string&,char) | |
6a93d84a TJ |
304 | |
305 | ||
306 | /** | |
307 | * splits a string by given delimiter | |
308 | * | |
309 | * @param[in] str the string which should be splitted. | |
310 | * @param[out] result the list resulting from splitting @a str. | |
311 | * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
312 | * @param[in] omit_empty should empty parts not be stored? | |
313 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
314 | * (empty string results in no trim) | |
315 | */ | |
6ab3bc95 RP |
316 | void split_string( |
317 | const std::string& str, | |
318 | std::list<std::string>& result, | |
319 | const std::string& delimiter, | |
320 | bool omit_empty, | |
321 | const std::string& trim_list | |
6a93d84a TJ |
322 | ) |
323 | { | |
6ab3bc95 RP |
324 | std::string::size_type pos, last_pos=0; |
325 | bool delimiter_found= false; | |
326 | while ( last_pos < str.size() && last_pos != std::string::npos) | |
327 | { | |
328 | pos= str.find(delimiter, last_pos); | |
329 | std::string part; | |
330 | if (pos == std::string::npos) | |
331 | { | |
332 | part= str.substr(last_pos); | |
333 | delimiter_found= false; | |
334 | } | |
335 | else | |
336 | { | |
337 | part= str.substr(last_pos, pos-last_pos); | |
338 | delimiter_found=true; | |
339 | } | |
340 | if (pos != std::string::npos) | |
341 | { | |
342 | last_pos= pos+ delimiter.size(); | |
343 | } | |
344 | else | |
345 | { | |
346 | last_pos= std::string::npos; | |
347 | } | |
348 | if (!trim_list.empty() ) trim_mod (part, trim_list); | |
349 | if (omit_empty && part.empty() ) continue; | |
350 | result.push_back( part ); | |
351 | } | |
352 | // if the string ends with a delimiter we need to append an empty string if no omit_empty | |
353 | // was given. | |
354 | // (this way we keep the split result consistent to a join operation) | |
355 | if (delimiter_found && !omit_empty) | |
356 | { | |
357 | result.push_back(""); | |
358 | } | |
359 | } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&) | |
6a93d84a TJ |
360 | |
361 | ||
362 | /** | |
363 | * splits a string by a given delimiter | |
364 | * @param str the string which should be splitted. | |
365 | * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
366 | * @param[in] omit_empty should empty parts not be stored? | |
367 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
368 | * (empty string results in no trim) | |
369 | * @return the list resulting from splitting @a str. | |
370 | */ | |
6ab3bc95 RP |
371 | std::list<std::string> split_string( |
372 | const std::string& str, | |
373 | const std::string& delimiter, | |
374 | bool omit_empty, | |
375 | const std::string& trim_list | |
6a93d84a TJ |
376 | ) |
377 | { | |
6ab3bc95 RP |
378 | std::list<std::string> result; |
379 | split_string(str, result, delimiter, omit_empty, trim_list); | |
380 | return result; | |
381 | } // eo split_string(const std::string&,const std::string&,bool,const std::string&) | |
6a93d84a TJ |
382 | |
383 | ||
384 | /** | |
385 | * @brief joins a list of strings into a single string. | |
386 | * | |
6ab3bc95 RP |
387 | * This funtion is (basically) the reverse operation of @a split_string. |
388 | * | |
6a93d84a TJ |
389 | * @param parts the list of strings. |
390 | * @param delimiter the delimiter which is inserted between the strings. | |
391 | * @return the joined string. | |
392 | */ | |
6ab3bc95 RP |
393 | std::string join_string( |
394 | const std::list< std::string >& parts, | |
395 | const std::string& delimiter | |
6a93d84a TJ |
396 | ) |
397 | { | |
6ab3bc95 RP |
398 | std::string result; |
399 | if (! parts.empty() ) | |
400 | { | |
401 | std::list< std::string >::const_iterator it= parts.begin(); | |
402 | result = *it; | |
403 | while ( ++it != parts.end() ) | |
404 | { | |
405 | result+= delimiter; | |
406 | result+= *it; | |
407 | } | |
408 | } | |
409 | return result; | |
410 | } // eo join_string(const std::list< std::string >&,const std::string&) | |
6a93d84a TJ |
411 | |
412 | ||
413 | ||
414 | /* | |
415 | ** conversions | |
416 | */ | |
417 | ||
418 | ||
419 | /** | |
420 | * @brief returns a hex string from a binary string. | |
421 | * @param str the (binary) string | |
422 | * @param upper_case_digits determine whether to use upper case characters for digits A-F. | |
423 | * @return the string in hex notation. | |
424 | */ | |
6ab3bc95 RP |
425 | std::string convert_binary_to_hex( |
426 | const std::string& str, | |
427 | bool upper_case_digits | |
6a93d84a TJ |
428 | ) |
429 | { | |
6ab3bc95 RP |
430 | std::string result; |
431 | std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower); | |
432 | for ( std::string::const_iterator it= str.begin(); | |
433 | it != str.end(); | |
434 | ++it) | |
435 | { | |
436 | result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] ); | |
437 | result.push_back( hexDigits[ (*it) & 0x0f ] ); | |
438 | } | |
439 | return result; | |
440 | } // eo convert_binary_to_hex(const std::string&,bool) | |
6a93d84a TJ |
441 | |
442 | ||
443 | /** | |
444 | * @brief converts a hex digit string to binary string. | |
445 | * @param str hex digit string | |
446 | * @return the binary string. | |
447 | * | |
448 | * The hex digit string may contains white spaces or colons which are treated | |
449 | * as delimiters between hex digit groups. | |
450 | * | |
451 | * @todo rework the handling of half nibbles (consistency)! | |
452 | */ | |
6ab3bc95 RP |
453 | std::string convert_hex_to_binary( |
454 | const std::string& str | |
6a93d84a | 455 | ) |
6ab3bc95 RP |
456 | throw (std::runtime_error) |
457 | { | |
458 | std::string result; | |
459 | char c= 0; | |
460 | bool hasNibble= false; | |
461 | bool lastWasWS= true; | |
462 | for ( std::string::const_iterator it= str.begin(); | |
463 | it != str.end(); | |
464 | ++it) | |
465 | { | |
466 | std::string::size_type p = hexDigitsLower.find( *it ); | |
467 | if (p== std::string::npos) | |
468 | { | |
469 | p= hexDigitsUpper.find( *it ); | |
470 | } | |
471 | if (p == std::string::npos) | |
472 | { | |
473 | if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace? | |
6a93d84a | 474 | or ( *it == ':') // or a colon? |
6ab3bc95 RP |
475 | ) |
476 | { | |
477 | // we treat that as a valid delimiter: | |
478 | if (hasNibble) | |
6a93d84a | 479 | { |
6ab3bc95 RP |
480 | // 1 nibble before WS is treate as lower part: |
481 | result.push_back(c); | |
482 | // reset state: | |
483 | hasNibble= false; | |
6a93d84a | 484 | } |
6ab3bc95 RP |
485 | lastWasWS= true; |
486 | continue; | |
487 | } | |
488 | } | |
489 | if (p == std::string::npos ) | |
490 | { | |
491 | throw runtime_error("illegal character in hex digit string: " + str); | |
492 | } | |
493 | lastWasWS= false; | |
494 | if (hasNibble) | |
495 | { | |
496 | c<<=4; | |
497 | } | |
498 | else | |
499 | { | |
500 | c=0; | |
501 | } | |
502 | c+= (p & 0x0f); | |
503 | if (hasNibble) | |
504 | { | |
505 | //we already had a nibble, so a char is complete now: | |
506 | result.push_back( c ); | |
507 | hasNibble=false; | |
508 | } | |
509 | else | |
510 | { | |
511 | // this is the first nibble of a new char: | |
512 | hasNibble=true; | |
513 | } | |
514 | } | |
515 | if (hasNibble) | |
516 | { | |
517 | //well, there is one nibble left | |
518 | // let's do some heuristics: | |
519 | if (lastWasWS) | |
520 | { | |
521 | // if the preceeding character was a white space (or a colon) | |
522 | // we treat the nibble as lower part: | |
523 | //( this is consistent with shortened hex notations where leading zeros are not noted) | |
524 | result.push_back( c ); | |
525 | } | |
526 | else | |
527 | { | |
528 | // if it was part of a hex digit chain, we treat it as UPPER part (!!) | |
529 | result.push_back( c << 4 ); | |
530 | } | |
531 | } | |
532 | return result; | |
533 | } // eo convert_hex_to_binary(const std::string&) | |
534 | ||
535 | ||
536 | } // eo namespace I2n | |
537 | ||
538 | ||
539 | ||
6a93d84a | 540 | |
e93545dd GE |
541 | std::string iso_to_utf8(const std::string& isostring) |
542 | { | |
6ab3bc95 | 543 | string result; |
118e216e | 544 | |
6ab3bc95 | 545 | iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1"); |
118e216e | 546 | |
6ab3bc95 RP |
547 | if (iso_to_utf8 == (iconv_t)-1) |
548 | throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8"); | |
118e216e | 549 | |
6ab3bc95 RP |
550 | size_t in_size=isostring.size(); |
551 | size_t out_size=in_size*4; | |
118e216e | 552 | |
6ab3bc95 RP |
553 | char *buf = (char *)malloc(out_size+1); |
554 | if (buf == NULL) | |
555 | throw runtime_error("out of memory for iconv buffer"); | |
e93545dd | 556 | |
6ab3bc95 RP |
557 | const char *in = isostring.c_str(); |
558 | char *out = buf; | |
559 | iconv(i2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 560 | |
6ab3bc95 | 561 | buf[isostring.size()*4-out_size]=0; |
118e216e | 562 | |
6ab3bc95 | 563 | result=buf; |
118e216e | 564 | |
6ab3bc95 RP |
565 | free(buf); |
566 | iconv_close(i2utf8); | |
118e216e | 567 | |
6ab3bc95 | 568 | return result; |
e93545dd GE |
569 | } |
570 | ||
571 | std::string utf8_to_iso(const std::string& utf8string) | |
572 | { | |
6ab3bc95 | 573 | string result; |
118e216e | 574 | |
6ab3bc95 | 575 | iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8"); |
118e216e | 576 | |
6ab3bc95 RP |
577 | if (utf82iso == (iconv_t)-1) |
578 | throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1"); | |
118e216e | 579 | |
6ab3bc95 RP |
580 | size_t in_size=utf8string.size(); |
581 | size_t out_size=in_size; | |
118e216e | 582 | |
6ab3bc95 RP |
583 | char *buf = (char *)malloc(out_size+1); |
584 | if (buf == NULL) | |
585 | throw runtime_error("out of memory for iconv buffer"); | |
e93545dd | 586 | |
6ab3bc95 RP |
587 | const char *in = utf8string.c_str(); |
588 | char *out = buf; | |
589 | iconv(utf82iso, &in, &in_size, &out, &out_size); | |
118e216e | 590 | |
6ab3bc95 | 591 | buf[utf8string.size()-out_size]=0; |
118e216e | 592 | |
6ab3bc95 | 593 | result=buf; |
118e216e | 594 | |
6ab3bc95 RP |
595 | free(buf); |
596 | iconv_close(utf82iso); | |
e93545dd | 597 | |
6ab3bc95 | 598 | return result; |
e93545dd GE |
599 | } |
600 | ||
a5f3af6e GE |
601 | wchar_t* utf8_to_wbuf(const std::string& utf8string) |
602 | { | |
6ab3bc95 | 603 | iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8"); |
a5f3af6e | 604 | |
6ab3bc95 RP |
605 | if (utf82wstr == (iconv_t)-1) |
606 | throw runtime_error("iconv can't convert from UTF-8 to UCS-4"); | |
a5f3af6e | 607 | |
6ab3bc95 RP |
608 | size_t in_size=utf8string.size(); |
609 | size_t out_size= (in_size+1)*sizeof(wchar_t); | |
a5f3af6e | 610 | |
6ab3bc95 RP |
611 | wchar_t *buf = (wchar_t *)malloc(out_size); |
612 | if (buf == NULL) | |
613 | throw runtime_error("out of memory for iconv buffer"); | |
a5f3af6e | 614 | |
6ab3bc95 RP |
615 | const char *in = utf8string.c_str(); |
616 | char *out = (char*) buf; | |
617 | if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == -1) | |
618 | throw runtime_error("error converting char encodings"); | |
a5f3af6e | 619 | |
6ab3bc95 | 620 | buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0; |
a5f3af6e | 621 | |
6ab3bc95 | 622 | iconv_close(utf82wstr); |
a5f3af6e | 623 | |
6ab3bc95 | 624 | return buf; |
a5f3af6e GE |
625 | } |
626 | ||
13cc4db1 | 627 | std::string utf7imap_to_utf8(const std::string& utf7imapstring) |
d116a071 | 628 | { |
6ab3bc95 | 629 | string result; |
118e216e | 630 | |
6ab3bc95 | 631 | iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP"); |
118e216e | 632 | |
6ab3bc95 RP |
633 | if (utf7imap2utf8 == (iconv_t)-1) |
634 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
118e216e | 635 | |
6ab3bc95 RP |
636 | size_t in_size=utf7imapstring.size(); |
637 | size_t out_size=in_size*4; | |
118e216e | 638 | |
6ab3bc95 RP |
639 | char *buf = (char *)malloc(out_size+1); |
640 | if (buf == NULL) | |
641 | throw runtime_error("out of memory for iconv buffer"); | |
d116a071 | 642 | |
6ab3bc95 RP |
643 | const char *in = utf7imapstring.c_str(); |
644 | char *out = buf; | |
645 | iconv(utf7imap2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 646 | |
6ab3bc95 | 647 | buf[utf7imapstring.size()*4-out_size]=0; |
118e216e | 648 | |
6ab3bc95 | 649 | result=buf; |
118e216e | 650 | |
6ab3bc95 RP |
651 | free(buf); |
652 | iconv_close(utf7imap2utf8); | |
118e216e | 653 | |
6ab3bc95 | 654 | return result; |
118e216e TJ |
655 | } |
656 | ||
6a2b6dd1 TJ |
657 | std::string utf8_to_utf7imap(const std::string& utf8string) |
658 | { | |
6ab3bc95 | 659 | string result; |
6a2b6dd1 | 660 | |
6ab3bc95 | 661 | iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8"); |
6a2b6dd1 | 662 | |
6ab3bc95 RP |
663 | if (utf82utf7imap == (iconv_t)-1) |
664 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
6a2b6dd1 | 665 | |
6ab3bc95 RP |
666 | // UTF-7 is base64 encoded, a buffer 10x as large |
667 | // as the utf-8 buffer should be enough. If not the string will be truncated. | |
668 | size_t in_size=utf8string.size(); | |
669 | size_t out_size=in_size*10; | |
6a2b6dd1 | 670 | |
6ab3bc95 RP |
671 | char *buf = (char *)malloc(out_size+1); |
672 | if (buf == NULL) | |
673 | throw runtime_error("out of memory for iconv buffer"); | |
6a2b6dd1 | 674 | |
6ab3bc95 RP |
675 | const char *in = utf8string.c_str(); |
676 | char *out = buf; | |
677 | iconv(utf82utf7imap, &in, &in_size, &out, &out_size); | |
6a2b6dd1 | 678 | |
6ab3bc95 | 679 | buf[utf8string.size()*10-out_size]= 0; |
6a2b6dd1 | 680 | |
6ab3bc95 | 681 | result=buf; |
6a2b6dd1 | 682 | |
6ab3bc95 RP |
683 | free(buf); |
684 | iconv_close(utf82utf7imap); | |
6a2b6dd1 | 685 | |
6ab3bc95 | 686 | return result; |
6a2b6dd1 TJ |
687 | } |
688 | ||
118e216e TJ |
689 | // Tokenize string by (html) tags |
690 | void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input) | |
691 | { | |
6ab3bc95 RP |
692 | string::size_type pos, len = input.size(); |
693 | bool inside_tag = false; | |
694 | string current; | |
695 | ||
696 | for (pos = 0; pos < len; pos++) | |
697 | { | |
698 | if (input[pos] == '<') | |
699 | { | |
700 | inside_tag = true; | |
701 | ||
702 | if (!current.empty() ) | |
703 | { | |
704 | tokenized.push_back( make_pair(current, false) ); | |
705 | current = ""; | |
706 | } | |
707 | ||
708 | current += input[pos]; | |
709 | } | |
710 | else if (input[pos] == '>' && inside_tag) | |
711 | { | |
712 | current += input[pos]; | |
713 | inside_tag = false; | |
714 | if (!current.empty() ) | |
715 | { | |
716 | tokenized.push_back( make_pair(current, true) ); | |
717 | current = ""; | |
718 | } | |
719 | } | |
720 | else | |
721 | current += input[pos]; | |
722 | } | |
723 | ||
724 | // String left over in buffer? | |
725 | if (!current.empty() ) | |
726 | tokenized.push_back( make_pair(current, false) ); | |
727 | } // eo tokenize_by_tag | |
118e216e | 728 | |
118e216e TJ |
729 | |
730 | std::string strip_html_tags(const std::string &input) | |
731 | { | |
6ab3bc95 RP |
732 | // Pair first: string, second: isTag |
733 | vector<pair<string,bool> > tokenized; | |
734 | tokenize_by_tag (tokenized, input); | |
118e216e | 735 | |
6ab3bc95 RP |
736 | string output; |
737 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
738 | for (token = tokenized.begin(); token != tokens_end; token++) | |
739 | if (!token->second) | |
740 | output += token->first; | |
741 | ||
742 | return output; | |
743 | } // eo strip_html_tags | |
118e216e | 744 | |
118e216e TJ |
745 | |
746 | // Smart-encode HTML en | |
747 | string smart_html_entities(const std::string &input) | |
748 | { | |
6ab3bc95 RP |
749 | // Pair first: string, second: isTag |
750 | vector<pair<string,bool> > tokenized; | |
751 | tokenize_by_tag (tokenized, input); | |
752 | ||
753 | string output; | |
754 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
755 | for (token = tokenized.begin(); token != tokens_end; token++) | |
756 | { | |
757 | // keep HTML tags as they are | |
758 | if (token->second) | |
759 | output += token->first; | |
760 | else | |
761 | output += html_entities(token->first); | |
762 | } | |
763 | ||
764 | return output; | |
118e216e TJ |
765 | } |
766 | ||
6ab3bc95 | 767 | |
a5f3af6e GE |
768 | string::size_type find_8bit(const std::string &str) |
769 | { | |
6ab3bc95 RP |
770 | string::size_type l=str.size(); |
771 | for (string::size_type p=0; p < l; p++) | |
772 | if (static_cast<unsigned char>(str[p]) > 127) | |
773 | return p; | |
a5f3af6e | 774 | |
6ab3bc95 | 775 | return string::npos; |
a5f3af6e GE |
776 | } |
777 | ||
118e216e TJ |
778 | // encoded UTF-8 chars into HTML entities |
779 | string html_entities(std::string str) | |
780 | { | |
6ab3bc95 RP |
781 | // Normal chars |
782 | replace_all (str, "&", "&"); | |
783 | replace_all (str, "\"", """); | |
784 | replace_all (str, "<", "<"); | |
785 | replace_all (str, ">", ">"); | |
786 | ||
787 | // Umlauts | |
788 | replace_all (str, "\xC3\xA4", "ä"); | |
789 | replace_all (str, "\xC3\xB6", "ö"); | |
790 | replace_all (str, "\xC3\xBC", "ü"); | |
791 | replace_all (str, "\xC3\x84", "Ä"); | |
792 | replace_all (str, "\xC3\x96", "Ö"); | |
793 | replace_all (str, "\xC3\x9C", "Ü"); | |
794 | ||
795 | // Misc | |
796 | replace_all (str, "\xC3\x9F", "ß"); | |
797 | ||
798 | // conversion of remaining non-ASCII chars needed? | |
799 | // just do if needed because of performance | |
800 | if (find_8bit(str) != string::npos) | |
801 | { | |
802 | // convert to fixed-size encoding UTF-32 | |
803 | wchar_t* wbuf=utf8_to_wbuf(str); | |
804 | ostringstream target; | |
805 | ||
806 | // replace all non-ASCII chars with HTML representation | |
807 | for (int p=0; wbuf[p] != 0; p++) | |
808 | { | |
809 | unsigned int c=wbuf[p]; | |
810 | ||
811 | if (c <= 127) | |
812 | target << static_cast<unsigned char>(c); | |
813 | else | |
814 | target << "&#" << c << ';'; | |
815 | } | |
816 | ||
817 | free(wbuf); | |
818 | ||
819 | str=target.str(); | |
820 | } | |
821 | ||
822 | return str; | |
823 | } // eo html_entities(std::string) | |
824 | ||
118e216e | 825 | |
e93545dd GE |
826 | bool replace_all(string &base, const char *ist, const char *soll) |
827 | { | |
6ab3bc95 RP |
828 | string i=ist; |
829 | string s=soll; | |
830 | return replace_all(base,&i,&s); | |
e93545dd GE |
831 | } |
832 | ||
833 | bool replace_all(string &base, const string &ist, const char *soll) | |
834 | { | |
6ab3bc95 RP |
835 | string s=soll; |
836 | return replace_all(base,&ist,&s); | |
e93545dd GE |
837 | } |
838 | ||
839 | bool replace_all(string &base, const string *ist, const string *soll) | |
840 | { | |
6ab3bc95 | 841 | return replace_all(base,*ist,*soll); |
e93545dd GE |
842 | } |
843 | ||
844 | bool replace_all(string &base, const char *ist, const string *soll) | |
845 | { | |
6ab3bc95 RP |
846 | string i=ist; |
847 | return replace_all(base,&i,soll); | |
e93545dd GE |
848 | } |
849 | ||
850 | bool replace_all(string &base, const string &ist, const string &soll) | |
851 | { | |
6ab3bc95 RP |
852 | bool found_ist = false; |
853 | string::size_type a=0; | |
854 | ||
855 | if (ist.empty() ) | |
856 | throw runtime_error ("replace_all called with empty search string"); | |
e93545dd | 857 | |
6ab3bc95 RP |
858 | while ( (a=base.find(ist,a) ) != string::npos) |
859 | { | |
860 | base.replace(a,ist.size(),soll); | |
861 | a=a+soll.size(); | |
862 | found_ist = true; | |
863 | } | |
1ec2064e | 864 | |
6ab3bc95 | 865 | return found_ist; |
e93545dd GE |
866 | } |
867 | ||
868 | string to_lower(const string &src) | |
869 | { | |
6ab3bc95 | 870 | string dst = src; |
e93545dd | 871 | |
6ab3bc95 RP |
872 | string::size_type pos, end = dst.size(); |
873 | for (pos = 0; pos < end; pos++) | |
874 | dst[pos] = tolower(dst[pos]); | |
e93545dd | 875 | |
6ab3bc95 | 876 | return dst; |
e93545dd GE |
877 | } |
878 | ||
879 | string to_upper(const string &src) | |
880 | { | |
6ab3bc95 | 881 | string dst = src; |
e93545dd | 882 | |
6ab3bc95 RP |
883 | string::size_type pos, end = dst.size(); |
884 | for (pos = 0; pos < end; pos++) | |
885 | dst[pos] = toupper(dst[pos]); | |
e93545dd | 886 | |
6ab3bc95 | 887 | return dst; |
e93545dd GE |
888 | } |
889 | ||
6ab3bc95 RP |
890 | string nice_unit_format(int input) |
891 | { | |
892 | float size = input; | |
893 | int sizecount = 0; | |
894 | ||
895 | while (size > 1000) | |
896 | { | |
897 | size = size / 1000; | |
898 | sizecount++; | |
899 | } | |
900 | ||
901 | float tmp; // round | |
902 | tmp = size*10; | |
903 | tmp += 0.5; | |
904 | tmp = int (tmp); | |
905 | tmp = float (tmp) /float (10); | |
906 | size = tmp; | |
907 | ||
908 | ostringstream out; | |
909 | ||
910 | out.setf (ios::fixed); | |
911 | out.precision (2); | |
912 | switch (sizecount) | |
913 | { | |
914 | case 0: | |
915 | out << size << i18n (" Bytes"); | |
916 | break; | |
917 | case 1: | |
918 | out << size << i18n (" KBytes"); | |
919 | break; | |
920 | case 2: | |
921 | out << size << i18n (" MBytes"); | |
922 | break; | |
923 | case 3: | |
924 | out << size << i18n (" GBytes"); | |
925 | break; | |
926 | case 4: | |
927 | out << size << i18n (" TBytes"); | |
928 | break; | |
929 | case 5: | |
930 | out << size << i18n (" PBytes"); | |
931 | break; | |
932 | case 6: | |
933 | out << size << i18n (" EBytes"); | |
934 | break; | |
935 | default: | |
936 | out << size << "*10^" << (sizecount*3)<< i18n (" Bytes"); | |
937 | break; | |
938 | } | |
939 | ||
940 | return out.str(); | |
941 | } // eo nice_unit_format(int input) | |
942 | ||
e93545dd | 943 | |
47c07fba GE |
944 | string escape(const string &s) |
945 | { | |
6ab3bc95 RP |
946 | string out(s); |
947 | string::size_type p; | |
47c07fba | 948 | |
6ab3bc95 RP |
949 | p=0; |
950 | while ( (p=out.find_first_of("\"\\",p) ) !=out.npos) | |
951 | { | |
952 | out.insert (p,"\\"); | |
953 | p+=2; | |
954 | } | |
47c07fba | 955 | |
6ab3bc95 RP |
956 | p=0; |
957 | while ( (p=out.find_first_of("\r",p) ) !=out.npos) | |
958 | { | |
959 | out.replace (p,1,"\\r"); | |
960 | p+=2; | |
961 | } | |
47c07fba | 962 | |
6ab3bc95 RP |
963 | p=0; |
964 | while ( (p=out.find_first_of("\n",p) ) !=out.npos) | |
965 | { | |
966 | out.replace (p,1,"\\n"); | |
967 | p+=2; | |
968 | } | |
47c07fba | 969 | |
6ab3bc95 | 970 | out='"'+out+'"'; |
47c07fba | 971 | |
6ab3bc95 RP |
972 | return out; |
973 | } // eo scape(const std::string&) | |
47c07fba | 974 | |
47c07fba | 975 | |
6ab3bc95 RP |
976 | string descape(const string &s, int startpos, int &endpos) |
977 | { | |
978 | string out; | |
979 | ||
980 | if (s.at(startpos) != '"') | |
981 | throw out_of_range("value not type escaped string"); | |
982 | ||
983 | out=s.substr(startpos+1); | |
984 | string::size_type p=0; | |
985 | ||
986 | // search for the end of the string | |
987 | while ( (p=out.find("\"",p) ) !=out.npos) | |
988 | { | |
989 | int e=p-1; | |
990 | bool escaped=false; | |
991 | ||
992 | // the " might be escaped with a backslash | |
993 | while (e>=0 && out.at (e) =='\\') | |
994 | { | |
995 | if (escaped == false) | |
996 | escaped=true; | |
997 | else | |
998 | escaped=false; | |
999 | ||
1000 | e--; | |
1001 | } | |
1002 | ||
1003 | if (escaped==false) | |
1004 | break; | |
1005 | else | |
1006 | p++; | |
1007 | } | |
1008 | ||
1009 | // we now have the end of the string | |
1010 | out=out.substr(0,p); | |
1011 | ||
1012 | // tell calling prog about the endposition | |
1013 | endpos=startpos+p+1; | |
1014 | ||
1015 | // descape all \ stuff inside the string now | |
1016 | p=0; | |
1017 | while ( (p=out.find_first_of("\\",p) ) !=out.npos) | |
1018 | { | |
1019 | switch (out.at(p+1) ) | |
1020 | { | |
1021 | case 'r': | |
47c07fba GE |
1022 | out.replace(p,2,"\r"); |
1023 | break; | |
6ab3bc95 | 1024 | case 'n': |
47c07fba GE |
1025 | out.replace(p,2,"\n"); |
1026 | break; | |
6ab3bc95 | 1027 | default: |
47c07fba | 1028 | out.erase(p,1); |
6ab3bc95 RP |
1029 | } |
1030 | p++; | |
1031 | } | |
1032 | ||
1033 | return out; | |
1034 | } // eo descape(const std::string&,int,int&) | |
47c07fba | 1035 | |
e93545dd | 1036 | |
47c07fba GE |
1037 | string escape_shellarg(const string &input) |
1038 | { | |
6ab3bc95 RP |
1039 | string output = "'"; |
1040 | string::const_iterator it, it_end = input.end(); | |
1041 | for (it = input.begin(); it != it_end; it++) | |
1042 | { | |
1043 | if ( (*it) == '\'') | |
1044 | output += "'\\'"; | |
1045 | ||
1046 | output += *it; | |
1047 | } | |
1048 | ||
1049 | output += "'"; | |
1050 | return output; | |
47c07fba | 1051 | } |