Commit | Line | Data |
---|---|---|
6a93d84a TJ |
1 | /** @file |
2 | * | |
3 | * (c) Copyright 2007-2008 by Intra2net AG | |
6ab3bc95 | 4 | * |
6a93d84a TJ |
5 | * info@intra2net.com |
6 | */ | |
e93545dd GE |
7 | |
8 | #include <iostream> | |
9 | #include <string> | |
10 | #include <sstream> | |
11 | #include <stdexcept> | |
5efd35b1 | 12 | #include <algorithm> |
e93545dd | 13 | |
a5f3af6e | 14 | #include <wchar.h> |
e93545dd GE |
15 | #include <stdlib.h> |
16 | #include <iconv.h> | |
17 | #include <i18n.h> | |
18 | ||
19 | #include <stringfunc.hxx> | |
20 | ||
21 | using namespace std; | |
22 | ||
6ab3bc95 RP |
23 | namespace I2n |
24 | { | |
6a93d84a TJ |
25 | |
26 | ||
6ab3bc95 RP |
27 | namespace |
28 | { | |
6a93d84a TJ |
29 | |
30 | const std::string hexDigitsLower("0123456789abcdef"); | |
31 | const std::string hexDigitsUpper("0123456789ABCDEF"); | |
32 | ||
33 | ||
34 | struct UpperFunc | |
35 | { | |
6ab3bc95 RP |
36 | char operator() (char c) |
37 | { | |
38 | return std::toupper(c); | |
39 | } | |
6a93d84a TJ |
40 | }; // eo struct UpperFunc |
41 | ||
42 | ||
43 | struct LowerFunc | |
44 | { | |
6ab3bc95 RP |
45 | char operator() (char c) |
46 | { | |
47 | return std::tolower(c); | |
48 | } | |
6a93d84a TJ |
49 | }; // eo struct LowerFunc |
50 | ||
51 | ||
52 | } // eo namespace <anonymous> | |
53 | ||
54 | ||
55 | ||
56 | /** | |
6ab3bc95 | 57 | * default list of Whitespaces (" \t\r\n"); |
6a93d84a | 58 | */ |
6ab3bc95 | 59 | const std::string Whitespaces = " \t\r\n"; |
6a93d84a TJ |
60 | |
61 | /** | |
62 | * default list of lineendings ("\r\n"); | |
63 | */ | |
6ab3bc95 | 64 | const std::string LineEndings= "\r\n"; |
6a93d84a TJ |
65 | |
66 | ||
67 | ||
68 | /** | |
69 | * @brief checks if a string begins with a given prefix. | |
70 | * @param[in,out] str the string which is tested | |
71 | * @param prefix the prefix which should be tested for. | |
72 | * @return @a true iff the prefix is not empty and the string begins with that prefix. | |
73 | */ | |
6ab3bc95 | 74 | bool has_prefix(const std::string& str, const std::string& prefix) |
6a93d84a | 75 | { |
6ab3bc95 RP |
76 | if (prefix.empty() || str.empty() || str.size() < prefix.size() ) |
77 | { | |
78 | return false; | |
79 | } | |
80 | return str.compare(0, prefix.size(), prefix) == 0; | |
81 | } // eo has_prefix(const std::string&,const std::string&) | |
6a93d84a TJ |
82 | |
83 | ||
84 | /** | |
85 | * @brief checks if a string ends with a given suffix. | |
86 | * @param[in,out] str the string which is tested | |
87 | * @param suffix the suffix which should be tested for. | |
88 | * @return @a true iff the suffix is not empty and the string ends with that suffix. | |
89 | */ | |
6ab3bc95 | 90 | bool has_suffix(const std::string& str, const std::string& suffix) |
6a93d84a | 91 | { |
6ab3bc95 RP |
92 | if (suffix.empty() || str.empty() || str.size() < suffix.size() ) |
93 | { | |
94 | return false; | |
95 | } | |
96 | return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; | |
97 | } // eo has_suffix(const std::string&,const std::string&) | |
6a93d84a TJ |
98 | |
99 | ||
100 | /** | |
101 | * cut off characters from a given list from front and end of a string. | |
102 | * @param[in,out] str the string which should be trimmed. | |
103 | * @param charlist the list of characters to remove from beginning and end of string | |
104 | * @return the result string. | |
105 | */ | |
6ab3bc95 RP |
106 | std::string trim_mod(std::string& str, const std::string& charlist) |
107 | { | |
108 | // first: trim the beginning: | |
109 | std::string::size_type pos= str.find_first_not_of (charlist); | |
110 | if (pos == std::string::npos) | |
111 | { | |
112 | // whole string consists of charlist (or is already empty) | |
113 | str.clear(); | |
114 | return str; | |
115 | } | |
116 | else if (pos>0) | |
117 | { | |
118 | // str starts with charlist | |
119 | str.erase(0,pos); | |
120 | } | |
121 | // now let's look at the tail: | |
122 | pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char! | |
123 | if ( pos < str.size() ) | |
124 | { | |
125 | str.erase(pos, str.size()-pos); | |
126 | } | |
127 | return str; | |
128 | } // eo trim_mod(std::string&,const std::string&) | |
6a93d84a TJ |
129 | |
130 | ||
131 | ||
132 | /** | |
133 | * removes last character from a string when it is in a list of chars to be removed. | |
134 | * @param[in,out] str the string. | |
135 | * @param what the list of chars which will be tested for. | |
136 | * @return the resulting string with last char removed (if applicable) | |
137 | */ | |
6ab3bc95 | 138 | std::string chomp_mod(std::string& str, const std::string& what) |
6a93d84a | 139 | { |
6ab3bc95 RP |
140 | if (str.empty() || what.empty() ) |
141 | { | |
142 | return str; | |
143 | } | |
144 | if (what.find(str.at (str.size()-1) ) != std::string::npos) | |
145 | { | |
146 | str.erase(str.size() - 1); | |
147 | } | |
148 | return str; | |
149 | } // eo chomp_mod(std::string&,const std::string&) | |
6a93d84a TJ |
150 | |
151 | ||
152 | /** | |
153 | * @brief converts a string to lower case. | |
154 | * @param[in,out] str the string to modify. | |
155 | * @return the string | |
156 | */ | |
6ab3bc95 | 157 | std::string to_lower_mod(std::string& str) |
6a93d84a | 158 | { |
6ab3bc95 RP |
159 | std::transform(str.begin(), str.end(), str.begin(), LowerFunc() ); |
160 | return str; | |
161 | } // eo to_lower_mod(std::string&) | |
6a93d84a TJ |
162 | |
163 | ||
164 | /** | |
165 | * @brief converts a string to upper case. | |
166 | * @param[in,out] str the string to modify. | |
167 | * @return the string | |
168 | */ | |
6ab3bc95 | 169 | std::string to_upper_mod(std::string& str) |
6a93d84a | 170 | { |
6ab3bc95 RP |
171 | std::transform( str.begin(), str.end(), str.begin(), UpperFunc() ); |
172 | return str; | |
173 | } // eo to_upper_mod(std::string&) | |
6a93d84a TJ |
174 | |
175 | ||
176 | ||
177 | /** | |
178 | * cut off characters from a given list from front and end of a string. | |
179 | * @param str the string which should be trimmed. | |
180 | * @param charlist the list of characters to remove from beginning and end of string | |
181 | * @return the result string. | |
182 | */ | |
6ab3bc95 RP |
183 | std::string trim (const std::string& str, const std::string& charlist) |
184 | { | |
185 | // first: trim the beginning: | |
186 | std::string::size_type pos0= str.find_first_not_of(charlist); | |
187 | if (pos0 == std::string::npos) | |
188 | { | |
189 | // whole string consists of charlist (or is already empty) | |
190 | return std::string(); | |
191 | } | |
192 | // now let's look at the end: | |
193 | std::string::size_type pos1= str.find_last_not_of(charlist); | |
194 | return str.substr(pos0, pos1 - pos0 + 1); | |
6a93d84a TJ |
195 | } // eo trim(const std:.string&,const std::string&) |
196 | ||
197 | ||
198 | /** | |
199 | * removes last character from a string when it is in a list of chars to be removed. | |
200 | * @param str the string. | |
201 | * @param what the list of chars which will be tested for. | |
202 | * @return the resulting string with last char removed (if applicable) | |
203 | */ | |
6ab3bc95 RP |
204 | std::string chomp (const std::string& str, const std::string& what) |
205 | { | |
206 | if (str.empty() || what.empty() ) | |
207 | { | |
208 | return str; | |
209 | } | |
210 | if (what.find(str.at (str.size()-1) ) != std::string::npos) | |
211 | { | |
212 | return str.substr(0, str.size()-1); | |
213 | } | |
214 | return str; | |
6a93d84a TJ |
215 | } // eo chomp(const std:.string&,const std::string&) |
216 | ||
217 | ||
218 | /** | |
219 | * @brief returns a lower case version of a given string. | |
220 | * @param str the string | |
221 | * @return the lower case version of the string | |
222 | */ | |
6ab3bc95 | 223 | std::string to_lower (const std::string& str) |
6a93d84a | 224 | { |
6ab3bc95 RP |
225 | std::string result(str); |
226 | return to_lower_mod(result); | |
227 | } // eo to_lower(const std::string&) | |
6a93d84a TJ |
228 | |
229 | ||
230 | /** | |
231 | * @brief returns a upper case version of a given string. | |
232 | * @param str the string | |
233 | * @return the upper case version of the string | |
234 | */ | |
6ab3bc95 | 235 | std::string to_upper(const std::string& str) |
6a93d84a | 236 | { |
6ab3bc95 RP |
237 | std::string result(str); |
238 | return to_upper_mod(result); | |
239 | } // eo to_upper(const std::string&) | |
6a93d84a TJ |
240 | |
241 | ||
242 | ||
243 | /** | |
244 | * @brief removes a given suffix from a string. | |
245 | * @param str the string. | |
246 | * @param suffix the suffix which should be removed if the string ends with it. | |
247 | * @return the string without the suffix. | |
248 | * | |
249 | * If the string ends with the suffix, it is removed. If the the string doesn't end | |
250 | * with the suffix the original string is returned. | |
251 | */ | |
6ab3bc95 | 252 | std::string remove_suffix(const std::string& str, const std::string& suffix) |
6a93d84a | 253 | { |
6ab3bc95 RP |
254 | if (has_suffix(str,suffix) ) |
255 | { | |
256 | return str.substr(0, str.size()-suffix.size() ); | |
257 | } | |
258 | return str; | |
259 | } // eo remove_suffix(const std::string&,const std::string&) | |
6a93d84a TJ |
260 | |
261 | ||
262 | ||
263 | /** | |
264 | * @brief removes a given prefix from a string. | |
265 | * @param str the string. | |
266 | * @param prefix the prefix which should be removed if the string begins with it. | |
267 | * @return the string without the prefix. | |
268 | * | |
269 | * If the string begins with the prefix, it is removed. If the the string doesn't begin | |
270 | * with the prefix the original string is returned. | |
271 | */ | |
6ab3bc95 | 272 | std::string remove_prefix(const std::string& str, const std::string& prefix) |
6a93d84a | 273 | { |
6ab3bc95 RP |
274 | if (has_prefix(str,prefix) ) |
275 | { | |
276 | return str.substr( prefix.size() ); | |
277 | } | |
278 | return str; | |
279 | } // eo remove_prefix(const std::string&,const std::string&) | |
6a93d84a TJ |
280 | |
281 | ||
282 | /** | |
283 | * split a string to key and value delimited by a given delimiter. | |
6ab3bc95 | 284 | * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end). |
6a93d84a TJ |
285 | * @param str the string which should be splitted. |
286 | * @param[out] key the resulting key | |
287 | * @param[out] value the resulting value | |
288 | * @param delimiter the delimiter between key and value; default is '='. | |
289 | * @return @a true if the split was successful. | |
290 | */ | |
6ab3bc95 RP |
291 | bool pair_split( |
292 | const std::string& str, | |
293 | std::string& key, | |
294 | std::string& value, | |
295 | char delimiter) | |
296 | { | |
297 | std::string::size_type pos = str.find (delimiter); | |
298 | if (pos == std::string::npos) return false; | |
299 | key= str.substr(0,pos); | |
300 | value= str.substr(pos+1); | |
301 | trim_mod(key); | |
302 | trim_mod(value); | |
303 | return true; | |
304 | } // eo pair_split(const std::string&,std::string&,std::string&,char) | |
6a93d84a TJ |
305 | |
306 | ||
307 | /** | |
308 | * splits a string by given delimiter | |
309 | * | |
310 | * @param[in] str the string which should be splitted. | |
311 | * @param[out] result the list resulting from splitting @a str. | |
312 | * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
313 | * @param[in] omit_empty should empty parts not be stored? | |
314 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
315 | * (empty string results in no trim) | |
316 | */ | |
6ab3bc95 RP |
317 | void split_string( |
318 | const std::string& str, | |
319 | std::list<std::string>& result, | |
320 | const std::string& delimiter, | |
321 | bool omit_empty, | |
322 | const std::string& trim_list | |
6a93d84a TJ |
323 | ) |
324 | { | |
6ab3bc95 RP |
325 | std::string::size_type pos, last_pos=0; |
326 | bool delimiter_found= false; | |
327 | while ( last_pos < str.size() && last_pos != std::string::npos) | |
328 | { | |
329 | pos= str.find(delimiter, last_pos); | |
330 | std::string part; | |
331 | if (pos == std::string::npos) | |
332 | { | |
333 | part= str.substr(last_pos); | |
334 | delimiter_found= false; | |
335 | } | |
336 | else | |
337 | { | |
338 | part= str.substr(last_pos, pos-last_pos); | |
339 | delimiter_found=true; | |
340 | } | |
341 | if (pos != std::string::npos) | |
342 | { | |
343 | last_pos= pos+ delimiter.size(); | |
344 | } | |
345 | else | |
346 | { | |
347 | last_pos= std::string::npos; | |
348 | } | |
349 | if (!trim_list.empty() ) trim_mod (part, trim_list); | |
350 | if (omit_empty && part.empty() ) continue; | |
351 | result.push_back( part ); | |
352 | } | |
353 | // if the string ends with a delimiter we need to append an empty string if no omit_empty | |
354 | // was given. | |
355 | // (this way we keep the split result consistent to a join operation) | |
356 | if (delimiter_found && !omit_empty) | |
357 | { | |
358 | result.push_back(""); | |
359 | } | |
360 | } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&) | |
6a93d84a TJ |
361 | |
362 | ||
363 | /** | |
364 | * splits a string by a given delimiter | |
365 | * @param str the string which should be splitted. | |
366 | * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
367 | * @param[in] omit_empty should empty parts not be stored? | |
368 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
369 | * (empty string results in no trim) | |
370 | * @return the list resulting from splitting @a str. | |
371 | */ | |
6ab3bc95 RP |
372 | std::list<std::string> split_string( |
373 | const std::string& str, | |
374 | const std::string& delimiter, | |
375 | bool omit_empty, | |
376 | const std::string& trim_list | |
6a93d84a TJ |
377 | ) |
378 | { | |
6ab3bc95 RP |
379 | std::list<std::string> result; |
380 | split_string(str, result, delimiter, omit_empty, trim_list); | |
381 | return result; | |
382 | } // eo split_string(const std::string&,const std::string&,bool,const std::string&) | |
6a93d84a TJ |
383 | |
384 | ||
385 | /** | |
386 | * @brief joins a list of strings into a single string. | |
387 | * | |
6ab3bc95 RP |
388 | * This funtion is (basically) the reverse operation of @a split_string. |
389 | * | |
6a93d84a TJ |
390 | * @param parts the list of strings. |
391 | * @param delimiter the delimiter which is inserted between the strings. | |
392 | * @return the joined string. | |
393 | */ | |
6ab3bc95 RP |
394 | std::string join_string( |
395 | const std::list< std::string >& parts, | |
396 | const std::string& delimiter | |
6a93d84a TJ |
397 | ) |
398 | { | |
6ab3bc95 RP |
399 | std::string result; |
400 | if (! parts.empty() ) | |
401 | { | |
402 | std::list< std::string >::const_iterator it= parts.begin(); | |
403 | result = *it; | |
404 | while ( ++it != parts.end() ) | |
405 | { | |
406 | result+= delimiter; | |
407 | result+= *it; | |
408 | } | |
409 | } | |
410 | return result; | |
411 | } // eo join_string(const std::list< std::string >&,const std::string&) | |
6a93d84a TJ |
412 | |
413 | ||
414 | ||
415 | /* | |
416 | ** conversions | |
417 | */ | |
418 | ||
419 | ||
420 | /** | |
421 | * @brief returns a hex string from a binary string. | |
422 | * @param str the (binary) string | |
423 | * @param upper_case_digits determine whether to use upper case characters for digits A-F. | |
424 | * @return the string in hex notation. | |
425 | */ | |
6ab3bc95 RP |
426 | std::string convert_binary_to_hex( |
427 | const std::string& str, | |
428 | bool upper_case_digits | |
6a93d84a TJ |
429 | ) |
430 | { | |
6ab3bc95 RP |
431 | std::string result; |
432 | std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower); | |
433 | for ( std::string::const_iterator it= str.begin(); | |
434 | it != str.end(); | |
435 | ++it) | |
436 | { | |
437 | result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] ); | |
438 | result.push_back( hexDigits[ (*it) & 0x0f ] ); | |
439 | } | |
440 | return result; | |
441 | } // eo convert_binary_to_hex(const std::string&,bool) | |
6a93d84a TJ |
442 | |
443 | ||
444 | /** | |
445 | * @brief converts a hex digit string to binary string. | |
446 | * @param str hex digit string | |
447 | * @return the binary string. | |
448 | * | |
449 | * The hex digit string may contains white spaces or colons which are treated | |
450 | * as delimiters between hex digit groups. | |
451 | * | |
452 | * @todo rework the handling of half nibbles (consistency)! | |
453 | */ | |
6ab3bc95 RP |
454 | std::string convert_hex_to_binary( |
455 | const std::string& str | |
6a93d84a | 456 | ) |
6ab3bc95 RP |
457 | throw (std::runtime_error) |
458 | { | |
459 | std::string result; | |
460 | char c= 0; | |
461 | bool hasNibble= false; | |
462 | bool lastWasWS= true; | |
463 | for ( std::string::const_iterator it= str.begin(); | |
464 | it != str.end(); | |
465 | ++it) | |
466 | { | |
467 | std::string::size_type p = hexDigitsLower.find( *it ); | |
468 | if (p== std::string::npos) | |
469 | { | |
470 | p= hexDigitsUpper.find( *it ); | |
471 | } | |
472 | if (p == std::string::npos) | |
473 | { | |
474 | if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace? | |
6a93d84a | 475 | or ( *it == ':') // or a colon? |
6ab3bc95 RP |
476 | ) |
477 | { | |
478 | // we treat that as a valid delimiter: | |
479 | if (hasNibble) | |
6a93d84a | 480 | { |
6ab3bc95 RP |
481 | // 1 nibble before WS is treate as lower part: |
482 | result.push_back(c); | |
483 | // reset state: | |
484 | hasNibble= false; | |
6a93d84a | 485 | } |
6ab3bc95 RP |
486 | lastWasWS= true; |
487 | continue; | |
488 | } | |
489 | } | |
490 | if (p == std::string::npos ) | |
491 | { | |
492 | throw runtime_error("illegal character in hex digit string: " + str); | |
493 | } | |
494 | lastWasWS= false; | |
495 | if (hasNibble) | |
496 | { | |
497 | c<<=4; | |
498 | } | |
499 | else | |
500 | { | |
501 | c=0; | |
502 | } | |
503 | c+= (p & 0x0f); | |
504 | if (hasNibble) | |
505 | { | |
506 | //we already had a nibble, so a char is complete now: | |
507 | result.push_back( c ); | |
508 | hasNibble=false; | |
509 | } | |
510 | else | |
511 | { | |
512 | // this is the first nibble of a new char: | |
513 | hasNibble=true; | |
514 | } | |
515 | } | |
516 | if (hasNibble) | |
517 | { | |
518 | //well, there is one nibble left | |
519 | // let's do some heuristics: | |
520 | if (lastWasWS) | |
521 | { | |
522 | // if the preceeding character was a white space (or a colon) | |
523 | // we treat the nibble as lower part: | |
524 | //( this is consistent with shortened hex notations where leading zeros are not noted) | |
525 | result.push_back( c ); | |
526 | } | |
527 | else | |
528 | { | |
529 | // if it was part of a hex digit chain, we treat it as UPPER part (!!) | |
530 | result.push_back( c << 4 ); | |
531 | } | |
532 | } | |
533 | return result; | |
534 | } // eo convert_hex_to_binary(const std::string&) | |
535 | ||
536 | ||
537 | } // eo namespace I2n | |
538 | ||
539 | ||
540 | ||
6a93d84a | 541 | |
e93545dd GE |
542 | std::string iso_to_utf8(const std::string& isostring) |
543 | { | |
6ab3bc95 | 544 | string result; |
118e216e | 545 | |
6ab3bc95 | 546 | iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1"); |
118e216e | 547 | |
6ab3bc95 RP |
548 | if (iso_to_utf8 == (iconv_t)-1) |
549 | throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8"); | |
118e216e | 550 | |
6ab3bc95 RP |
551 | size_t in_size=isostring.size(); |
552 | size_t out_size=in_size*4; | |
118e216e | 553 | |
6ab3bc95 RP |
554 | char *buf = (char *)malloc(out_size+1); |
555 | if (buf == NULL) | |
556 | throw runtime_error("out of memory for iconv buffer"); | |
e93545dd | 557 | |
5a4ecb51 | 558 | char *in = (char *)isostring.c_str(); |
6ab3bc95 RP |
559 | char *out = buf; |
560 | iconv(i2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 561 | |
6ab3bc95 | 562 | buf[isostring.size()*4-out_size]=0; |
118e216e | 563 | |
6ab3bc95 | 564 | result=buf; |
118e216e | 565 | |
6ab3bc95 RP |
566 | free(buf); |
567 | iconv_close(i2utf8); | |
118e216e | 568 | |
6ab3bc95 | 569 | return result; |
e93545dd GE |
570 | } |
571 | ||
572 | std::string utf8_to_iso(const std::string& utf8string) | |
573 | { | |
6ab3bc95 | 574 | string result; |
118e216e | 575 | |
6ab3bc95 | 576 | iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8"); |
118e216e | 577 | |
6ab3bc95 RP |
578 | if (utf82iso == (iconv_t)-1) |
579 | throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1"); | |
118e216e | 580 | |
6ab3bc95 RP |
581 | size_t in_size=utf8string.size(); |
582 | size_t out_size=in_size; | |
118e216e | 583 | |
6ab3bc95 RP |
584 | char *buf = (char *)malloc(out_size+1); |
585 | if (buf == NULL) | |
586 | throw runtime_error("out of memory for iconv buffer"); | |
e93545dd | 587 | |
5a4ecb51 | 588 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 RP |
589 | char *out = buf; |
590 | iconv(utf82iso, &in, &in_size, &out, &out_size); | |
118e216e | 591 | |
6ab3bc95 | 592 | buf[utf8string.size()-out_size]=0; |
118e216e | 593 | |
6ab3bc95 | 594 | result=buf; |
118e216e | 595 | |
6ab3bc95 RP |
596 | free(buf); |
597 | iconv_close(utf82iso); | |
e93545dd | 598 | |
6ab3bc95 | 599 | return result; |
e93545dd GE |
600 | } |
601 | ||
a5f3af6e GE |
602 | wchar_t* utf8_to_wbuf(const std::string& utf8string) |
603 | { | |
6ab3bc95 | 604 | iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8"); |
a5f3af6e | 605 | |
6ab3bc95 RP |
606 | if (utf82wstr == (iconv_t)-1) |
607 | throw runtime_error("iconv can't convert from UTF-8 to UCS-4"); | |
a5f3af6e | 608 | |
6ab3bc95 RP |
609 | size_t in_size=utf8string.size(); |
610 | size_t out_size= (in_size+1)*sizeof(wchar_t); | |
a5f3af6e | 611 | |
6ab3bc95 RP |
612 | wchar_t *buf = (wchar_t *)malloc(out_size); |
613 | if (buf == NULL) | |
614 | throw runtime_error("out of memory for iconv buffer"); | |
a5f3af6e | 615 | |
5a4ecb51 | 616 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 RP |
617 | char *out = (char*) buf; |
618 | if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == -1) | |
619 | throw runtime_error("error converting char encodings"); | |
a5f3af6e | 620 | |
6ab3bc95 | 621 | buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0; |
a5f3af6e | 622 | |
6ab3bc95 | 623 | iconv_close(utf82wstr); |
a5f3af6e | 624 | |
6ab3bc95 | 625 | return buf; |
a5f3af6e GE |
626 | } |
627 | ||
13cc4db1 | 628 | std::string utf7imap_to_utf8(const std::string& utf7imapstring) |
d116a071 | 629 | { |
6ab3bc95 | 630 | string result; |
118e216e | 631 | |
6ab3bc95 | 632 | iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP"); |
118e216e | 633 | |
6ab3bc95 RP |
634 | if (utf7imap2utf8 == (iconv_t)-1) |
635 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
118e216e | 636 | |
6ab3bc95 RP |
637 | size_t in_size=utf7imapstring.size(); |
638 | size_t out_size=in_size*4; | |
118e216e | 639 | |
6ab3bc95 RP |
640 | char *buf = (char *)malloc(out_size+1); |
641 | if (buf == NULL) | |
642 | throw runtime_error("out of memory for iconv buffer"); | |
d116a071 | 643 | |
5a4ecb51 | 644 | char *in = (char *)utf7imapstring.c_str(); |
6ab3bc95 RP |
645 | char *out = buf; |
646 | iconv(utf7imap2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 647 | |
6ab3bc95 | 648 | buf[utf7imapstring.size()*4-out_size]=0; |
118e216e | 649 | |
6ab3bc95 | 650 | result=buf; |
118e216e | 651 | |
6ab3bc95 RP |
652 | free(buf); |
653 | iconv_close(utf7imap2utf8); | |
118e216e | 654 | |
6ab3bc95 | 655 | return result; |
118e216e TJ |
656 | } |
657 | ||
6a2b6dd1 TJ |
658 | std::string utf8_to_utf7imap(const std::string& utf8string) |
659 | { | |
6ab3bc95 | 660 | string result; |
6a2b6dd1 | 661 | |
6ab3bc95 | 662 | iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8"); |
6a2b6dd1 | 663 | |
6ab3bc95 RP |
664 | if (utf82utf7imap == (iconv_t)-1) |
665 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
6a2b6dd1 | 666 | |
6ab3bc95 RP |
667 | // UTF-7 is base64 encoded, a buffer 10x as large |
668 | // as the utf-8 buffer should be enough. If not the string will be truncated. | |
669 | size_t in_size=utf8string.size(); | |
670 | size_t out_size=in_size*10; | |
6a2b6dd1 | 671 | |
6ab3bc95 RP |
672 | char *buf = (char *)malloc(out_size+1); |
673 | if (buf == NULL) | |
674 | throw runtime_error("out of memory for iconv buffer"); | |
6a2b6dd1 | 675 | |
5a4ecb51 | 676 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 RP |
677 | char *out = buf; |
678 | iconv(utf82utf7imap, &in, &in_size, &out, &out_size); | |
6a2b6dd1 | 679 | |
6ab3bc95 | 680 | buf[utf8string.size()*10-out_size]= 0; |
6a2b6dd1 | 681 | |
6ab3bc95 | 682 | result=buf; |
6a2b6dd1 | 683 | |
6ab3bc95 RP |
684 | free(buf); |
685 | iconv_close(utf82utf7imap); | |
6a2b6dd1 | 686 | |
6ab3bc95 | 687 | return result; |
6a2b6dd1 TJ |
688 | } |
689 | ||
118e216e TJ |
690 | // Tokenize string by (html) tags |
691 | void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input) | |
692 | { | |
6ab3bc95 RP |
693 | string::size_type pos, len = input.size(); |
694 | bool inside_tag = false; | |
695 | string current; | |
696 | ||
697 | for (pos = 0; pos < len; pos++) | |
698 | { | |
699 | if (input[pos] == '<') | |
700 | { | |
701 | inside_tag = true; | |
702 | ||
703 | if (!current.empty() ) | |
704 | { | |
705 | tokenized.push_back( make_pair(current, false) ); | |
706 | current = ""; | |
707 | } | |
708 | ||
709 | current += input[pos]; | |
710 | } | |
711 | else if (input[pos] == '>' && inside_tag) | |
712 | { | |
713 | current += input[pos]; | |
714 | inside_tag = false; | |
715 | if (!current.empty() ) | |
716 | { | |
717 | tokenized.push_back( make_pair(current, true) ); | |
718 | current = ""; | |
719 | } | |
720 | } | |
721 | else | |
722 | current += input[pos]; | |
723 | } | |
724 | ||
725 | // String left over in buffer? | |
726 | if (!current.empty() ) | |
727 | tokenized.push_back( make_pair(current, false) ); | |
728 | } // eo tokenize_by_tag | |
118e216e | 729 | |
118e216e TJ |
730 | |
731 | std::string strip_html_tags(const std::string &input) | |
732 | { | |
6ab3bc95 RP |
733 | // Pair first: string, second: isTag |
734 | vector<pair<string,bool> > tokenized; | |
735 | tokenize_by_tag (tokenized, input); | |
118e216e | 736 | |
6ab3bc95 RP |
737 | string output; |
738 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
739 | for (token = tokenized.begin(); token != tokens_end; token++) | |
740 | if (!token->second) | |
741 | output += token->first; | |
742 | ||
743 | return output; | |
744 | } // eo strip_html_tags | |
118e216e | 745 | |
118e216e TJ |
746 | |
747 | // Smart-encode HTML en | |
748 | string smart_html_entities(const std::string &input) | |
749 | { | |
6ab3bc95 RP |
750 | // Pair first: string, second: isTag |
751 | vector<pair<string,bool> > tokenized; | |
752 | tokenize_by_tag (tokenized, input); | |
753 | ||
754 | string output; | |
755 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
756 | for (token = tokenized.begin(); token != tokens_end; token++) | |
757 | { | |
758 | // keep HTML tags as they are | |
759 | if (token->second) | |
760 | output += token->first; | |
761 | else | |
762 | output += html_entities(token->first); | |
763 | } | |
764 | ||
765 | return output; | |
118e216e TJ |
766 | } |
767 | ||
6ab3bc95 | 768 | |
a5f3af6e GE |
769 | string::size_type find_8bit(const std::string &str) |
770 | { | |
6ab3bc95 RP |
771 | string::size_type l=str.size(); |
772 | for (string::size_type p=0; p < l; p++) | |
773 | if (static_cast<unsigned char>(str[p]) > 127) | |
774 | return p; | |
a5f3af6e | 775 | |
6ab3bc95 | 776 | return string::npos; |
a5f3af6e GE |
777 | } |
778 | ||
118e216e TJ |
779 | // encoded UTF-8 chars into HTML entities |
780 | string html_entities(std::string str) | |
781 | { | |
6ab3bc95 RP |
782 | // Normal chars |
783 | replace_all (str, "&", "&"); | |
6ab3bc95 RP |
784 | replace_all (str, "<", "<"); |
785 | replace_all (str, ">", ">"); | |
980577e1 TJ |
786 | replace_all (str, "\"", """); |
787 | replace_all (str, "'", "'"); | |
788 | replace_all (str, "/", "/"); | |
6ab3bc95 RP |
789 | |
790 | // Umlauts | |
791 | replace_all (str, "\xC3\xA4", "ä"); | |
792 | replace_all (str, "\xC3\xB6", "ö"); | |
793 | replace_all (str, "\xC3\xBC", "ü"); | |
794 | replace_all (str, "\xC3\x84", "Ä"); | |
795 | replace_all (str, "\xC3\x96", "Ö"); | |
796 | replace_all (str, "\xC3\x9C", "Ü"); | |
797 | ||
798 | // Misc | |
799 | replace_all (str, "\xC3\x9F", "ß"); | |
800 | ||
801 | // conversion of remaining non-ASCII chars needed? | |
802 | // just do if needed because of performance | |
803 | if (find_8bit(str) != string::npos) | |
804 | { | |
805 | // convert to fixed-size encoding UTF-32 | |
806 | wchar_t* wbuf=utf8_to_wbuf(str); | |
807 | ostringstream target; | |
808 | ||
809 | // replace all non-ASCII chars with HTML representation | |
810 | for (int p=0; wbuf[p] != 0; p++) | |
811 | { | |
812 | unsigned int c=wbuf[p]; | |
813 | ||
814 | if (c <= 127) | |
815 | target << static_cast<unsigned char>(c); | |
816 | else | |
817 | target << "&#" << c << ';'; | |
818 | } | |
819 | ||
820 | free(wbuf); | |
821 | ||
822 | str=target.str(); | |
823 | } | |
824 | ||
825 | return str; | |
826 | } // eo html_entities(std::string) | |
827 | ||
118e216e | 828 | |
e93545dd GE |
829 | bool replace_all(string &base, const char *ist, const char *soll) |
830 | { | |
6ab3bc95 RP |
831 | string i=ist; |
832 | string s=soll; | |
833 | return replace_all(base,&i,&s); | |
e93545dd GE |
834 | } |
835 | ||
836 | bool replace_all(string &base, const string &ist, const char *soll) | |
837 | { | |
6ab3bc95 RP |
838 | string s=soll; |
839 | return replace_all(base,&ist,&s); | |
e93545dd GE |
840 | } |
841 | ||
842 | bool replace_all(string &base, const string *ist, const string *soll) | |
843 | { | |
6ab3bc95 | 844 | return replace_all(base,*ist,*soll); |
e93545dd GE |
845 | } |
846 | ||
847 | bool replace_all(string &base, const char *ist, const string *soll) | |
848 | { | |
6ab3bc95 RP |
849 | string i=ist; |
850 | return replace_all(base,&i,soll); | |
e93545dd GE |
851 | } |
852 | ||
853 | bool replace_all(string &base, const string &ist, const string &soll) | |
854 | { | |
6ab3bc95 RP |
855 | bool found_ist = false; |
856 | string::size_type a=0; | |
857 | ||
858 | if (ist.empty() ) | |
859 | throw runtime_error ("replace_all called with empty search string"); | |
e93545dd | 860 | |
6ab3bc95 RP |
861 | while ( (a=base.find(ist,a) ) != string::npos) |
862 | { | |
863 | base.replace(a,ist.size(),soll); | |
864 | a=a+soll.size(); | |
865 | found_ist = true; | |
866 | } | |
1ec2064e | 867 | |
6ab3bc95 | 868 | return found_ist; |
e93545dd GE |
869 | } |
870 | ||
e5b21dbb | 871 | #if 0 |
e93545dd GE |
872 | string to_lower(const string &src) |
873 | { | |
6ab3bc95 | 874 | string dst = src; |
e93545dd | 875 | |
6ab3bc95 RP |
876 | string::size_type pos, end = dst.size(); |
877 | for (pos = 0; pos < end; pos++) | |
878 | dst[pos] = tolower(dst[pos]); | |
e93545dd | 879 | |
6ab3bc95 | 880 | return dst; |
e93545dd GE |
881 | } |
882 | ||
883 | string to_upper(const string &src) | |
884 | { | |
6ab3bc95 | 885 | string dst = src; |
e93545dd | 886 | |
6ab3bc95 RP |
887 | string::size_type pos, end = dst.size(); |
888 | for (pos = 0; pos < end; pos++) | |
889 | dst[pos] = toupper(dst[pos]); | |
e93545dd | 890 | |
6ab3bc95 | 891 | return dst; |
e93545dd | 892 | } |
e5b21dbb | 893 | #endif |
e93545dd | 894 | |
83809f5e | 895 | const int MAX_UNIT_FORMAT_SYMBOLS = 6; |
d1ea9075 | 896 | |
2cb9a9c5 | 897 | const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = { |
d1ea9075 GMF |
898 | " B", |
899 | " KB", | |
900 | " MB", | |
901 | " GB", | |
902 | " TB", | |
83809f5e | 903 | " PB" |
d1ea9075 GMF |
904 | }; |
905 | ||
2cb9a9c5 | 906 | const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = { |
5cb766b9 GMF |
907 | i18n_noop(" Bytes"), |
908 | i18n_noop(" KBytes"), | |
909 | i18n_noop(" MBytes"), | |
910 | i18n_noop(" GBytes"), | |
911 | i18n_noop(" TBytes"), | |
83809f5e | 912 | i18n_noop(" PBytes") |
d1ea9075 GMF |
913 | }; |
914 | ||
72a94426 GMF |
915 | |
916 | long double rounding_upwards( | |
917 | long double number, | |
918 | const int rounding_multiplier | |
919 | ) | |
920 | { | |
921 | long double rounded_number; | |
922 | rounded_number = number * rounding_multiplier; | |
923 | rounded_number += 0.5; | |
924 | rounded_number = (int64_t) (rounded_number); | |
925 | rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier); | |
926 | ||
927 | return rounded_number; | |
928 | } | |
929 | ||
930 | ||
81267544 GMF |
931 | string nice_unit_format( |
932 | const int64_t input, | |
70fc0674 GMF |
933 | const UnitFormat format, |
934 | const UnitBase base | |
81267544 | 935 | ) |
6ab3bc95 | 936 | { |
d1ea9075 | 937 | // select the system of units (decimal or binary) |
81267544 | 938 | int multiple = 0; |
a398513a | 939 | if (base == UnitBase1000) |
81267544 GMF |
940 | { |
941 | multiple = 1000; | |
942 | } | |
943 | else | |
944 | { | |
945 | multiple = 1024; | |
946 | } | |
947 | ||
948 | long double size = input; | |
6ab3bc95 | 949 | |
d1ea9075 GMF |
950 | // check the size of the input number to fit in the appropriate symbol |
951 | int sizecount = 0; | |
81267544 | 952 | while (size > multiple) |
6ab3bc95 | 953 | { |
81267544 GMF |
954 | size = size / multiple; |
955 | sizecount++; | |
83809f5e GMF |
956 | |
957 | // rollback to the previous values and stop the loop when cannot | |
958 | // represent the number length. | |
959 | if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS) | |
960 | { | |
961 | size = size * multiple; | |
962 | sizecount--; | |
963 | break; | |
964 | } | |
6ab3bc95 RP |
965 | } |
966 | ||
a398513a GMF |
967 | // round the input number "half up" to multiples of 10 |
968 | const int rounding_multiplier = 10; | |
72a94426 | 969 | size = rounding_upwards(size, rounding_multiplier); |
6ab3bc95 | 970 | |
d1ea9075 | 971 | // format the input number, placing the appropriate symbol |
6ab3bc95 | 972 | ostringstream out; |
6ab3bc95 | 973 | out.setf (ios::fixed); |
a398513a | 974 | if (format == ShortUnitFormat) |
d1ea9075 GMF |
975 | { |
976 | out.precision(1); | |
68d37a5c | 977 | out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() ); |
d1ea9075 GMF |
978 | } |
979 | else | |
6ab3bc95 | 980 | { |
d1ea9075 | 981 | out.precision (2); |
68d37a5c | 982 | out << size << i18n( longUnitFormatSymbols[sizecount].c_str() ); |
6ab3bc95 RP |
983 | } |
984 | ||
985 | return out.str(); | |
986 | } // eo nice_unit_format(int input) | |
987 | ||
e93545dd | 988 | |
47c07fba GE |
989 | string escape(const string &s) |
990 | { | |
6ab3bc95 RP |
991 | string out(s); |
992 | string::size_type p; | |
47c07fba | 993 | |
6ab3bc95 RP |
994 | p=0; |
995 | while ( (p=out.find_first_of("\"\\",p) ) !=out.npos) | |
996 | { | |
997 | out.insert (p,"\\"); | |
998 | p+=2; | |
999 | } | |
47c07fba | 1000 | |
6ab3bc95 RP |
1001 | p=0; |
1002 | while ( (p=out.find_first_of("\r",p) ) !=out.npos) | |
1003 | { | |
1004 | out.replace (p,1,"\\r"); | |
1005 | p+=2; | |
1006 | } | |
47c07fba | 1007 | |
6ab3bc95 RP |
1008 | p=0; |
1009 | while ( (p=out.find_first_of("\n",p) ) !=out.npos) | |
1010 | { | |
1011 | out.replace (p,1,"\\n"); | |
1012 | p+=2; | |
1013 | } | |
47c07fba | 1014 | |
6ab3bc95 | 1015 | out='"'+out+'"'; |
47c07fba | 1016 | |
6ab3bc95 RP |
1017 | return out; |
1018 | } // eo scape(const std::string&) | |
47c07fba | 1019 | |
47c07fba | 1020 | |
6ab3bc95 RP |
1021 | string descape(const string &s, int startpos, int &endpos) |
1022 | { | |
1023 | string out; | |
1024 | ||
1025 | if (s.at(startpos) != '"') | |
1026 | throw out_of_range("value not type escaped string"); | |
1027 | ||
1028 | out=s.substr(startpos+1); | |
1029 | string::size_type p=0; | |
1030 | ||
1031 | // search for the end of the string | |
1032 | while ( (p=out.find("\"",p) ) !=out.npos) | |
1033 | { | |
1034 | int e=p-1; | |
1035 | bool escaped=false; | |
1036 | ||
1037 | // the " might be escaped with a backslash | |
1038 | while (e>=0 && out.at (e) =='\\') | |
1039 | { | |
1040 | if (escaped == false) | |
1041 | escaped=true; | |
1042 | else | |
1043 | escaped=false; | |
1044 | ||
1045 | e--; | |
1046 | } | |
1047 | ||
1048 | if (escaped==false) | |
1049 | break; | |
1050 | else | |
1051 | p++; | |
1052 | } | |
1053 | ||
1054 | // we now have the end of the string | |
1055 | out=out.substr(0,p); | |
1056 | ||
1057 | // tell calling prog about the endposition | |
1058 | endpos=startpos+p+1; | |
1059 | ||
1060 | // descape all \ stuff inside the string now | |
1061 | p=0; | |
1062 | while ( (p=out.find_first_of("\\",p) ) !=out.npos) | |
1063 | { | |
1064 | switch (out.at(p+1) ) | |
1065 | { | |
1066 | case 'r': | |
47c07fba GE |
1067 | out.replace(p,2,"\r"); |
1068 | break; | |
6ab3bc95 | 1069 | case 'n': |
47c07fba GE |
1070 | out.replace(p,2,"\n"); |
1071 | break; | |
6ab3bc95 | 1072 | default: |
47c07fba | 1073 | out.erase(p,1); |
6ab3bc95 RP |
1074 | } |
1075 | p++; | |
1076 | } | |
1077 | ||
1078 | return out; | |
1079 | } // eo descape(const std::string&,int,int&) | |
47c07fba | 1080 | |
e93545dd | 1081 | |
47c07fba GE |
1082 | string escape_shellarg(const string &input) |
1083 | { | |
6ab3bc95 RP |
1084 | string output = "'"; |
1085 | string::const_iterator it, it_end = input.end(); | |
1086 | for (it = input.begin(); it != it_end; it++) | |
1087 | { | |
1088 | if ( (*it) == '\'') | |
1089 | output += "'\\'"; | |
1090 | ||
1091 | output += *it; | |
1092 | } | |
1093 | ||
1094 | output += "'"; | |
1095 | return output; | |
47c07fba | 1096 | } |