Commit | Line | Data |
---|---|---|
0e23f538 TJ |
1 | /* |
2 | The software in this package is distributed under the GNU General | |
3 | Public License version 2 (with a special exception described below). | |
4 | ||
5 | A copy of GNU General Public License (GPL) is included in this distribution, | |
6 | in the file COPYING.GPL. | |
7 | ||
8 | As a special exception, if other files instantiate templates or use macros | |
9 | or inline functions from this file, or you compile this file and link it | |
10 | with other works to produce a work based on this file, this file | |
11 | does not by itself cause the resulting work to be covered | |
12 | by the GNU General Public License. | |
13 | ||
14 | However the source code for this file must still be made available | |
15 | in accordance with section (3) of the GNU General Public License. | |
16 | ||
17 | This exception does not invalidate any other reasons why a work based | |
18 | on this file might be covered by the GNU General Public License. | |
19 | */ | |
6a93d84a TJ |
20 | /** @file |
21 | * | |
22 | * (c) Copyright 2007-2008 by Intra2net AG | |
6a93d84a | 23 | */ |
e93545dd GE |
24 | |
25 | #include <iostream> | |
26 | #include <string> | |
27 | #include <sstream> | |
28 | #include <stdexcept> | |
5efd35b1 | 29 | #include <algorithm> |
e93545dd | 30 | |
a5f3af6e | 31 | #include <wchar.h> |
e93545dd GE |
32 | #include <stdlib.h> |
33 | #include <iconv.h> | |
34 | #include <i18n.h> | |
35 | ||
36 | #include <stringfunc.hxx> | |
37 | ||
38 | using namespace std; | |
39 | ||
6ab3bc95 RP |
40 | namespace I2n |
41 | { | |
6a93d84a TJ |
42 | |
43 | ||
6ab3bc95 RP |
44 | namespace |
45 | { | |
6a93d84a TJ |
46 | |
47 | const std::string hexDigitsLower("0123456789abcdef"); | |
48 | const std::string hexDigitsUpper("0123456789ABCDEF"); | |
49 | ||
50 | ||
51 | struct UpperFunc | |
52 | { | |
6ab3bc95 RP |
53 | char operator() (char c) |
54 | { | |
55 | return std::toupper(c); | |
56 | } | |
6a93d84a TJ |
57 | }; // eo struct UpperFunc |
58 | ||
59 | ||
60 | struct LowerFunc | |
61 | { | |
6ab3bc95 RP |
62 | char operator() (char c) |
63 | { | |
64 | return std::tolower(c); | |
65 | } | |
6a93d84a TJ |
66 | }; // eo struct LowerFunc |
67 | ||
68 | ||
69 | } // eo namespace <anonymous> | |
70 | ||
71 | ||
72 | ||
73 | /** | |
6ab3bc95 | 74 | * default list of Whitespaces (" \t\r\n"); |
6a93d84a | 75 | */ |
6ab3bc95 | 76 | const std::string Whitespaces = " \t\r\n"; |
6a93d84a TJ |
77 | |
78 | /** | |
79 | * default list of lineendings ("\r\n"); | |
80 | */ | |
6ab3bc95 | 81 | const std::string LineEndings= "\r\n"; |
6a93d84a TJ |
82 | |
83 | ||
84 | ||
85 | /** | |
86 | * @brief checks if a string begins with a given prefix. | |
87 | * @param[in,out] str the string which is tested | |
88 | * @param prefix the prefix which should be tested for. | |
89 | * @return @a true iff the prefix is not empty and the string begins with that prefix. | |
90 | */ | |
6ab3bc95 | 91 | bool has_prefix(const std::string& str, const std::string& prefix) |
6a93d84a | 92 | { |
6ab3bc95 RP |
93 | if (prefix.empty() || str.empty() || str.size() < prefix.size() ) |
94 | { | |
95 | return false; | |
96 | } | |
97 | return str.compare(0, prefix.size(), prefix) == 0; | |
98 | } // eo has_prefix(const std::string&,const std::string&) | |
6a93d84a TJ |
99 | |
100 | ||
101 | /** | |
102 | * @brief checks if a string ends with a given suffix. | |
103 | * @param[in,out] str the string which is tested | |
104 | * @param suffix the suffix which should be tested for. | |
105 | * @return @a true iff the suffix is not empty and the string ends with that suffix. | |
106 | */ | |
6ab3bc95 | 107 | bool has_suffix(const std::string& str, const std::string& suffix) |
6a93d84a | 108 | { |
6ab3bc95 RP |
109 | if (suffix.empty() || str.empty() || str.size() < suffix.size() ) |
110 | { | |
111 | return false; | |
112 | } | |
113 | return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; | |
114 | } // eo has_suffix(const std::string&,const std::string&) | |
6a93d84a TJ |
115 | |
116 | ||
117 | /** | |
118 | * cut off characters from a given list from front and end of a string. | |
119 | * @param[in,out] str the string which should be trimmed. | |
120 | * @param charlist the list of characters to remove from beginning and end of string | |
121 | * @return the result string. | |
122 | */ | |
6ab3bc95 RP |
123 | std::string trim_mod(std::string& str, const std::string& charlist) |
124 | { | |
125 | // first: trim the beginning: | |
126 | std::string::size_type pos= str.find_first_not_of (charlist); | |
127 | if (pos == std::string::npos) | |
128 | { | |
129 | // whole string consists of charlist (or is already empty) | |
130 | str.clear(); | |
131 | return str; | |
132 | } | |
133 | else if (pos>0) | |
134 | { | |
135 | // str starts with charlist | |
136 | str.erase(0,pos); | |
137 | } | |
138 | // now let's look at the tail: | |
139 | pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char! | |
140 | if ( pos < str.size() ) | |
141 | { | |
142 | str.erase(pos, str.size()-pos); | |
143 | } | |
144 | return str; | |
145 | } // eo trim_mod(std::string&,const std::string&) | |
6a93d84a TJ |
146 | |
147 | ||
148 | ||
149 | /** | |
150 | * removes last character from a string when it is in a list of chars to be removed. | |
151 | * @param[in,out] str the string. | |
152 | * @param what the list of chars which will be tested for. | |
153 | * @return the resulting string with last char removed (if applicable) | |
154 | */ | |
6ab3bc95 | 155 | std::string chomp_mod(std::string& str, const std::string& what) |
6a93d84a | 156 | { |
6ab3bc95 RP |
157 | if (str.empty() || what.empty() ) |
158 | { | |
159 | return str; | |
160 | } | |
161 | if (what.find(str.at (str.size()-1) ) != std::string::npos) | |
162 | { | |
163 | str.erase(str.size() - 1); | |
164 | } | |
165 | return str; | |
166 | } // eo chomp_mod(std::string&,const std::string&) | |
6a93d84a TJ |
167 | |
168 | ||
169 | /** | |
170 | * @brief converts a string to lower case. | |
171 | * @param[in,out] str the string to modify. | |
172 | * @return the string | |
173 | */ | |
6ab3bc95 | 174 | std::string to_lower_mod(std::string& str) |
6a93d84a | 175 | { |
6ab3bc95 RP |
176 | std::transform(str.begin(), str.end(), str.begin(), LowerFunc() ); |
177 | return str; | |
178 | } // eo to_lower_mod(std::string&) | |
6a93d84a TJ |
179 | |
180 | ||
181 | /** | |
182 | * @brief converts a string to upper case. | |
183 | * @param[in,out] str the string to modify. | |
184 | * @return the string | |
185 | */ | |
6ab3bc95 | 186 | std::string to_upper_mod(std::string& str) |
6a93d84a | 187 | { |
6ab3bc95 RP |
188 | std::transform( str.begin(), str.end(), str.begin(), UpperFunc() ); |
189 | return str; | |
190 | } // eo to_upper_mod(std::string&) | |
6a93d84a TJ |
191 | |
192 | ||
193 | ||
194 | /** | |
195 | * cut off characters from a given list from front and end of a string. | |
196 | * @param str the string which should be trimmed. | |
197 | * @param charlist the list of characters to remove from beginning and end of string | |
198 | * @return the result string. | |
199 | */ | |
6ab3bc95 RP |
200 | std::string trim (const std::string& str, const std::string& charlist) |
201 | { | |
202 | // first: trim the beginning: | |
203 | std::string::size_type pos0= str.find_first_not_of(charlist); | |
204 | if (pos0 == std::string::npos) | |
205 | { | |
206 | // whole string consists of charlist (or is already empty) | |
207 | return std::string(); | |
208 | } | |
209 | // now let's look at the end: | |
210 | std::string::size_type pos1= str.find_last_not_of(charlist); | |
211 | return str.substr(pos0, pos1 - pos0 + 1); | |
6a93d84a TJ |
212 | } // eo trim(const std:.string&,const std::string&) |
213 | ||
214 | ||
215 | /** | |
216 | * removes last character from a string when it is in a list of chars to be removed. | |
217 | * @param str the string. | |
218 | * @param what the list of chars which will be tested for. | |
219 | * @return the resulting string with last char removed (if applicable) | |
220 | */ | |
6ab3bc95 RP |
221 | std::string chomp (const std::string& str, const std::string& what) |
222 | { | |
223 | if (str.empty() || what.empty() ) | |
224 | { | |
225 | return str; | |
226 | } | |
227 | if (what.find(str.at (str.size()-1) ) != std::string::npos) | |
228 | { | |
229 | return str.substr(0, str.size()-1); | |
230 | } | |
231 | return str; | |
6a93d84a TJ |
232 | } // eo chomp(const std:.string&,const std::string&) |
233 | ||
234 | ||
235 | /** | |
236 | * @brief returns a lower case version of a given string. | |
237 | * @param str the string | |
238 | * @return the lower case version of the string | |
239 | */ | |
6ab3bc95 | 240 | std::string to_lower (const std::string& str) |
6a93d84a | 241 | { |
6ab3bc95 RP |
242 | std::string result(str); |
243 | return to_lower_mod(result); | |
244 | } // eo to_lower(const std::string&) | |
6a93d84a TJ |
245 | |
246 | ||
247 | /** | |
248 | * @brief returns a upper case version of a given string. | |
249 | * @param str the string | |
250 | * @return the upper case version of the string | |
251 | */ | |
6ab3bc95 | 252 | std::string to_upper(const std::string& str) |
6a93d84a | 253 | { |
6ab3bc95 RP |
254 | std::string result(str); |
255 | return to_upper_mod(result); | |
256 | } // eo to_upper(const std::string&) | |
6a93d84a TJ |
257 | |
258 | ||
259 | ||
260 | /** | |
261 | * @brief removes a given suffix from a string. | |
262 | * @param str the string. | |
263 | * @param suffix the suffix which should be removed if the string ends with it. | |
264 | * @return the string without the suffix. | |
265 | * | |
266 | * If the string ends with the suffix, it is removed. If the the string doesn't end | |
267 | * with the suffix the original string is returned. | |
268 | */ | |
6ab3bc95 | 269 | std::string remove_suffix(const std::string& str, const std::string& suffix) |
6a93d84a | 270 | { |
6ab3bc95 RP |
271 | if (has_suffix(str,suffix) ) |
272 | { | |
273 | return str.substr(0, str.size()-suffix.size() ); | |
274 | } | |
275 | return str; | |
276 | } // eo remove_suffix(const std::string&,const std::string&) | |
6a93d84a TJ |
277 | |
278 | ||
279 | ||
280 | /** | |
281 | * @brief removes a given prefix from a string. | |
282 | * @param str the string. | |
283 | * @param prefix the prefix which should be removed if the string begins with it. | |
284 | * @return the string without the prefix. | |
285 | * | |
286 | * If the string begins with the prefix, it is removed. If the the string doesn't begin | |
287 | * with the prefix the original string is returned. | |
288 | */ | |
6ab3bc95 | 289 | std::string remove_prefix(const std::string& str, const std::string& prefix) |
6a93d84a | 290 | { |
6ab3bc95 RP |
291 | if (has_prefix(str,prefix) ) |
292 | { | |
293 | return str.substr( prefix.size() ); | |
294 | } | |
295 | return str; | |
296 | } // eo remove_prefix(const std::string&,const std::string&) | |
6a93d84a TJ |
297 | |
298 | ||
299 | /** | |
300 | * split a string to key and value delimited by a given delimiter. | |
6ab3bc95 | 301 | * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end). |
6a93d84a TJ |
302 | * @param str the string which should be splitted. |
303 | * @param[out] key the resulting key | |
304 | * @param[out] value the resulting value | |
305 | * @param delimiter the delimiter between key and value; default is '='. | |
306 | * @return @a true if the split was successful. | |
307 | */ | |
6ab3bc95 RP |
308 | bool pair_split( |
309 | const std::string& str, | |
310 | std::string& key, | |
311 | std::string& value, | |
312 | char delimiter) | |
313 | { | |
314 | std::string::size_type pos = str.find (delimiter); | |
315 | if (pos == std::string::npos) return false; | |
316 | key= str.substr(0,pos); | |
317 | value= str.substr(pos+1); | |
318 | trim_mod(key); | |
319 | trim_mod(value); | |
320 | return true; | |
321 | } // eo pair_split(const std::string&,std::string&,std::string&,char) | |
6a93d84a TJ |
322 | |
323 | ||
324 | /** | |
325 | * splits a string by given delimiter | |
326 | * | |
327 | * @param[in] str the string which should be splitted. | |
328 | * @param[out] result the list resulting from splitting @a str. | |
329 | * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
330 | * @param[in] omit_empty should empty parts not be stored? | |
331 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
332 | * (empty string results in no trim) | |
333 | */ | |
6ab3bc95 RP |
334 | void split_string( |
335 | const std::string& str, | |
336 | std::list<std::string>& result, | |
337 | const std::string& delimiter, | |
338 | bool omit_empty, | |
339 | const std::string& trim_list | |
6a93d84a TJ |
340 | ) |
341 | { | |
6ab3bc95 RP |
342 | std::string::size_type pos, last_pos=0; |
343 | bool delimiter_found= false; | |
344 | while ( last_pos < str.size() && last_pos != std::string::npos) | |
345 | { | |
346 | pos= str.find(delimiter, last_pos); | |
347 | std::string part; | |
348 | if (pos == std::string::npos) | |
349 | { | |
350 | part= str.substr(last_pos); | |
351 | delimiter_found= false; | |
352 | } | |
353 | else | |
354 | { | |
355 | part= str.substr(last_pos, pos-last_pos); | |
356 | delimiter_found=true; | |
357 | } | |
358 | if (pos != std::string::npos) | |
359 | { | |
360 | last_pos= pos+ delimiter.size(); | |
361 | } | |
362 | else | |
363 | { | |
364 | last_pos= std::string::npos; | |
365 | } | |
366 | if (!trim_list.empty() ) trim_mod (part, trim_list); | |
367 | if (omit_empty && part.empty() ) continue; | |
368 | result.push_back( part ); | |
369 | } | |
370 | // if the string ends with a delimiter we need to append an empty string if no omit_empty | |
371 | // was given. | |
372 | // (this way we keep the split result consistent to a join operation) | |
373 | if (delimiter_found && !omit_empty) | |
374 | { | |
375 | result.push_back(""); | |
376 | } | |
377 | } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&) | |
6a93d84a TJ |
378 | |
379 | ||
380 | /** | |
381 | * splits a string by a given delimiter | |
382 | * @param str the string which should be splitted. | |
383 | * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
384 | * @param[in] omit_empty should empty parts not be stored? | |
385 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
386 | * (empty string results in no trim) | |
387 | * @return the list resulting from splitting @a str. | |
388 | */ | |
6ab3bc95 RP |
389 | std::list<std::string> split_string( |
390 | const std::string& str, | |
391 | const std::string& delimiter, | |
392 | bool omit_empty, | |
393 | const std::string& trim_list | |
6a93d84a TJ |
394 | ) |
395 | { | |
6ab3bc95 RP |
396 | std::list<std::string> result; |
397 | split_string(str, result, delimiter, omit_empty, trim_list); | |
398 | return result; | |
399 | } // eo split_string(const std::string&,const std::string&,bool,const std::string&) | |
6a93d84a TJ |
400 | |
401 | ||
402 | /** | |
403 | * @brief joins a list of strings into a single string. | |
404 | * | |
6ab3bc95 RP |
405 | * This funtion is (basically) the reverse operation of @a split_string. |
406 | * | |
6a93d84a TJ |
407 | * @param parts the list of strings. |
408 | * @param delimiter the delimiter which is inserted between the strings. | |
409 | * @return the joined string. | |
410 | */ | |
6ab3bc95 RP |
411 | std::string join_string( |
412 | const std::list< std::string >& parts, | |
413 | const std::string& delimiter | |
6a93d84a TJ |
414 | ) |
415 | { | |
6ab3bc95 RP |
416 | std::string result; |
417 | if (! parts.empty() ) | |
418 | { | |
419 | std::list< std::string >::const_iterator it= parts.begin(); | |
420 | result = *it; | |
421 | while ( ++it != parts.end() ) | |
422 | { | |
423 | result+= delimiter; | |
424 | result+= *it; | |
425 | } | |
426 | } | |
427 | return result; | |
428 | } // eo join_string(const std::list< std::string >&,const std::string&) | |
6a93d84a TJ |
429 | |
430 | ||
431 | ||
432 | /* | |
433 | ** conversions | |
434 | */ | |
435 | ||
436 | ||
437 | /** | |
438 | * @brief returns a hex string from a binary string. | |
439 | * @param str the (binary) string | |
440 | * @param upper_case_digits determine whether to use upper case characters for digits A-F. | |
441 | * @return the string in hex notation. | |
442 | */ | |
6ab3bc95 RP |
443 | std::string convert_binary_to_hex( |
444 | const std::string& str, | |
445 | bool upper_case_digits | |
6a93d84a TJ |
446 | ) |
447 | { | |
6ab3bc95 RP |
448 | std::string result; |
449 | std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower); | |
450 | for ( std::string::const_iterator it= str.begin(); | |
451 | it != str.end(); | |
452 | ++it) | |
453 | { | |
454 | result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] ); | |
455 | result.push_back( hexDigits[ (*it) & 0x0f ] ); | |
456 | } | |
457 | return result; | |
458 | } // eo convert_binary_to_hex(const std::string&,bool) | |
6a93d84a TJ |
459 | |
460 | ||
461 | /** | |
462 | * @brief converts a hex digit string to binary string. | |
463 | * @param str hex digit string | |
464 | * @return the binary string. | |
465 | * | |
466 | * The hex digit string may contains white spaces or colons which are treated | |
467 | * as delimiters between hex digit groups. | |
468 | * | |
469 | * @todo rework the handling of half nibbles (consistency)! | |
470 | */ | |
6ab3bc95 RP |
471 | std::string convert_hex_to_binary( |
472 | const std::string& str | |
6a93d84a | 473 | ) |
6ab3bc95 RP |
474 | throw (std::runtime_error) |
475 | { | |
476 | std::string result; | |
477 | char c= 0; | |
478 | bool hasNibble= false; | |
479 | bool lastWasWS= true; | |
480 | for ( std::string::const_iterator it= str.begin(); | |
481 | it != str.end(); | |
482 | ++it) | |
483 | { | |
484 | std::string::size_type p = hexDigitsLower.find( *it ); | |
485 | if (p== std::string::npos) | |
486 | { | |
487 | p= hexDigitsUpper.find( *it ); | |
488 | } | |
489 | if (p == std::string::npos) | |
490 | { | |
491 | if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace? | |
6a93d84a | 492 | or ( *it == ':') // or a colon? |
6ab3bc95 RP |
493 | ) |
494 | { | |
495 | // we treat that as a valid delimiter: | |
496 | if (hasNibble) | |
6a93d84a | 497 | { |
6ab3bc95 RP |
498 | // 1 nibble before WS is treate as lower part: |
499 | result.push_back(c); | |
500 | // reset state: | |
501 | hasNibble= false; | |
6a93d84a | 502 | } |
6ab3bc95 RP |
503 | lastWasWS= true; |
504 | continue; | |
505 | } | |
506 | } | |
507 | if (p == std::string::npos ) | |
508 | { | |
509 | throw runtime_error("illegal character in hex digit string: " + str); | |
510 | } | |
511 | lastWasWS= false; | |
512 | if (hasNibble) | |
513 | { | |
514 | c<<=4; | |
515 | } | |
516 | else | |
517 | { | |
518 | c=0; | |
519 | } | |
520 | c+= (p & 0x0f); | |
521 | if (hasNibble) | |
522 | { | |
523 | //we already had a nibble, so a char is complete now: | |
524 | result.push_back( c ); | |
525 | hasNibble=false; | |
526 | } | |
527 | else | |
528 | { | |
529 | // this is the first nibble of a new char: | |
530 | hasNibble=true; | |
531 | } | |
532 | } | |
533 | if (hasNibble) | |
534 | { | |
535 | //well, there is one nibble left | |
536 | // let's do some heuristics: | |
537 | if (lastWasWS) | |
538 | { | |
539 | // if the preceeding character was a white space (or a colon) | |
540 | // we treat the nibble as lower part: | |
541 | //( this is consistent with shortened hex notations where leading zeros are not noted) | |
542 | result.push_back( c ); | |
543 | } | |
544 | else | |
545 | { | |
546 | // if it was part of a hex digit chain, we treat it as UPPER part (!!) | |
547 | result.push_back( c << 4 ); | |
548 | } | |
549 | } | |
550 | return result; | |
551 | } // eo convert_hex_to_binary(const std::string&) | |
552 | ||
553 | ||
554 | } // eo namespace I2n | |
555 | ||
556 | ||
557 | ||
6a93d84a | 558 | |
e93545dd GE |
559 | std::string iso_to_utf8(const std::string& isostring) |
560 | { | |
6ab3bc95 | 561 | string result; |
118e216e | 562 | |
6ab3bc95 | 563 | iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1"); |
118e216e | 564 | |
6ab3bc95 RP |
565 | if (iso_to_utf8 == (iconv_t)-1) |
566 | throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8"); | |
118e216e | 567 | |
6ab3bc95 RP |
568 | size_t in_size=isostring.size(); |
569 | size_t out_size=in_size*4; | |
118e216e | 570 | |
6ab3bc95 RP |
571 | char *buf = (char *)malloc(out_size+1); |
572 | if (buf == NULL) | |
573 | throw runtime_error("out of memory for iconv buffer"); | |
e93545dd | 574 | |
5a4ecb51 | 575 | char *in = (char *)isostring.c_str(); |
6ab3bc95 RP |
576 | char *out = buf; |
577 | iconv(i2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 578 | |
6ab3bc95 | 579 | buf[isostring.size()*4-out_size]=0; |
118e216e | 580 | |
6ab3bc95 | 581 | result=buf; |
118e216e | 582 | |
6ab3bc95 RP |
583 | free(buf); |
584 | iconv_close(i2utf8); | |
118e216e | 585 | |
6ab3bc95 | 586 | return result; |
e93545dd GE |
587 | } |
588 | ||
589 | std::string utf8_to_iso(const std::string& utf8string) | |
590 | { | |
6ab3bc95 | 591 | string result; |
118e216e | 592 | |
6ab3bc95 | 593 | iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8"); |
118e216e | 594 | |
6ab3bc95 RP |
595 | if (utf82iso == (iconv_t)-1) |
596 | throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1"); | |
118e216e | 597 | |
6ab3bc95 RP |
598 | size_t in_size=utf8string.size(); |
599 | size_t out_size=in_size; | |
118e216e | 600 | |
6ab3bc95 RP |
601 | char *buf = (char *)malloc(out_size+1); |
602 | if (buf == NULL) | |
603 | throw runtime_error("out of memory for iconv buffer"); | |
e93545dd | 604 | |
5a4ecb51 | 605 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 RP |
606 | char *out = buf; |
607 | iconv(utf82iso, &in, &in_size, &out, &out_size); | |
118e216e | 608 | |
6ab3bc95 | 609 | buf[utf8string.size()-out_size]=0; |
118e216e | 610 | |
6ab3bc95 | 611 | result=buf; |
118e216e | 612 | |
6ab3bc95 RP |
613 | free(buf); |
614 | iconv_close(utf82iso); | |
e93545dd | 615 | |
6ab3bc95 | 616 | return result; |
e93545dd GE |
617 | } |
618 | ||
a5f3af6e GE |
619 | wchar_t* utf8_to_wbuf(const std::string& utf8string) |
620 | { | |
6ab3bc95 | 621 | iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8"); |
a5f3af6e | 622 | |
6ab3bc95 RP |
623 | if (utf82wstr == (iconv_t)-1) |
624 | throw runtime_error("iconv can't convert from UTF-8 to UCS-4"); | |
a5f3af6e | 625 | |
6ab3bc95 RP |
626 | size_t in_size=utf8string.size(); |
627 | size_t out_size= (in_size+1)*sizeof(wchar_t); | |
a5f3af6e | 628 | |
6ab3bc95 RP |
629 | wchar_t *buf = (wchar_t *)malloc(out_size); |
630 | if (buf == NULL) | |
631 | throw runtime_error("out of memory for iconv buffer"); | |
a5f3af6e | 632 | |
5a4ecb51 | 633 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 | 634 | char *out = (char*) buf; |
dbd6d77c | 635 | if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1) |
6ab3bc95 | 636 | throw runtime_error("error converting char encodings"); |
a5f3af6e | 637 | |
6ab3bc95 | 638 | buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0; |
a5f3af6e | 639 | |
6ab3bc95 | 640 | iconv_close(utf82wstr); |
a5f3af6e | 641 | |
6ab3bc95 | 642 | return buf; |
a5f3af6e GE |
643 | } |
644 | ||
13cc4db1 | 645 | std::string utf7imap_to_utf8(const std::string& utf7imapstring) |
d116a071 | 646 | { |
6ab3bc95 | 647 | string result; |
118e216e | 648 | |
6ab3bc95 | 649 | iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP"); |
118e216e | 650 | |
6ab3bc95 RP |
651 | if (utf7imap2utf8 == (iconv_t)-1) |
652 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
118e216e | 653 | |
6ab3bc95 RP |
654 | size_t in_size=utf7imapstring.size(); |
655 | size_t out_size=in_size*4; | |
118e216e | 656 | |
6ab3bc95 RP |
657 | char *buf = (char *)malloc(out_size+1); |
658 | if (buf == NULL) | |
659 | throw runtime_error("out of memory for iconv buffer"); | |
d116a071 | 660 | |
5a4ecb51 | 661 | char *in = (char *)utf7imapstring.c_str(); |
6ab3bc95 RP |
662 | char *out = buf; |
663 | iconv(utf7imap2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 664 | |
6ab3bc95 | 665 | buf[utf7imapstring.size()*4-out_size]=0; |
118e216e | 666 | |
6ab3bc95 | 667 | result=buf; |
118e216e | 668 | |
6ab3bc95 RP |
669 | free(buf); |
670 | iconv_close(utf7imap2utf8); | |
118e216e | 671 | |
6ab3bc95 | 672 | return result; |
118e216e TJ |
673 | } |
674 | ||
6a2b6dd1 TJ |
675 | std::string utf8_to_utf7imap(const std::string& utf8string) |
676 | { | |
6ab3bc95 | 677 | string result; |
6a2b6dd1 | 678 | |
6ab3bc95 | 679 | iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8"); |
6a2b6dd1 | 680 | |
6ab3bc95 RP |
681 | if (utf82utf7imap == (iconv_t)-1) |
682 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
6a2b6dd1 | 683 | |
6ab3bc95 RP |
684 | // UTF-7 is base64 encoded, a buffer 10x as large |
685 | // as the utf-8 buffer should be enough. If not the string will be truncated. | |
686 | size_t in_size=utf8string.size(); | |
687 | size_t out_size=in_size*10; | |
6a2b6dd1 | 688 | |
6ab3bc95 RP |
689 | char *buf = (char *)malloc(out_size+1); |
690 | if (buf == NULL) | |
691 | throw runtime_error("out of memory for iconv buffer"); | |
6a2b6dd1 | 692 | |
5a4ecb51 | 693 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 RP |
694 | char *out = buf; |
695 | iconv(utf82utf7imap, &in, &in_size, &out, &out_size); | |
6a2b6dd1 | 696 | |
6ab3bc95 | 697 | buf[utf8string.size()*10-out_size]= 0; |
6a2b6dd1 | 698 | |
6ab3bc95 | 699 | result=buf; |
6a2b6dd1 | 700 | |
6ab3bc95 RP |
701 | free(buf); |
702 | iconv_close(utf82utf7imap); | |
6a2b6dd1 | 703 | |
6ab3bc95 | 704 | return result; |
6a2b6dd1 TJ |
705 | } |
706 | ||
118e216e TJ |
707 | // Tokenize string by (html) tags |
708 | void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input) | |
709 | { | |
6ab3bc95 RP |
710 | string::size_type pos, len = input.size(); |
711 | bool inside_tag = false; | |
712 | string current; | |
713 | ||
714 | for (pos = 0; pos < len; pos++) | |
715 | { | |
716 | if (input[pos] == '<') | |
717 | { | |
718 | inside_tag = true; | |
719 | ||
720 | if (!current.empty() ) | |
721 | { | |
722 | tokenized.push_back( make_pair(current, false) ); | |
723 | current = ""; | |
724 | } | |
725 | ||
726 | current += input[pos]; | |
727 | } | |
728 | else if (input[pos] == '>' && inside_tag) | |
729 | { | |
730 | current += input[pos]; | |
731 | inside_tag = false; | |
732 | if (!current.empty() ) | |
733 | { | |
734 | tokenized.push_back( make_pair(current, true) ); | |
735 | current = ""; | |
736 | } | |
737 | } | |
738 | else | |
739 | current += input[pos]; | |
740 | } | |
741 | ||
742 | // String left over in buffer? | |
743 | if (!current.empty() ) | |
744 | tokenized.push_back( make_pair(current, false) ); | |
745 | } // eo tokenize_by_tag | |
118e216e | 746 | |
118e216e TJ |
747 | |
748 | std::string strip_html_tags(const std::string &input) | |
749 | { | |
6ab3bc95 RP |
750 | // Pair first: string, second: isTag |
751 | vector<pair<string,bool> > tokenized; | |
752 | tokenize_by_tag (tokenized, input); | |
118e216e | 753 | |
6ab3bc95 RP |
754 | string output; |
755 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
756 | for (token = tokenized.begin(); token != tokens_end; token++) | |
757 | if (!token->second) | |
758 | output += token->first; | |
759 | ||
760 | return output; | |
761 | } // eo strip_html_tags | |
118e216e | 762 | |
118e216e TJ |
763 | |
764 | // Smart-encode HTML en | |
765 | string smart_html_entities(const std::string &input) | |
766 | { | |
6ab3bc95 RP |
767 | // Pair first: string, second: isTag |
768 | vector<pair<string,bool> > tokenized; | |
769 | tokenize_by_tag (tokenized, input); | |
770 | ||
771 | string output; | |
772 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
773 | for (token = tokenized.begin(); token != tokens_end; token++) | |
774 | { | |
775 | // keep HTML tags as they are | |
776 | if (token->second) | |
777 | output += token->first; | |
778 | else | |
779 | output += html_entities(token->first); | |
780 | } | |
781 | ||
782 | return output; | |
118e216e TJ |
783 | } |
784 | ||
6ab3bc95 | 785 | |
a5f3af6e GE |
786 | string::size_type find_8bit(const std::string &str) |
787 | { | |
6ab3bc95 RP |
788 | string::size_type l=str.size(); |
789 | for (string::size_type p=0; p < l; p++) | |
790 | if (static_cast<unsigned char>(str[p]) > 127) | |
791 | return p; | |
a5f3af6e | 792 | |
6ab3bc95 | 793 | return string::npos; |
a5f3af6e GE |
794 | } |
795 | ||
118e216e TJ |
796 | // encoded UTF-8 chars into HTML entities |
797 | string html_entities(std::string str) | |
798 | { | |
6ab3bc95 RP |
799 | // Normal chars |
800 | replace_all (str, "&", "&"); | |
6ab3bc95 RP |
801 | replace_all (str, "<", "<"); |
802 | replace_all (str, ">", ">"); | |
980577e1 TJ |
803 | replace_all (str, "\"", """); |
804 | replace_all (str, "'", "'"); | |
805 | replace_all (str, "/", "/"); | |
6ab3bc95 RP |
806 | |
807 | // Umlauts | |
808 | replace_all (str, "\xC3\xA4", "ä"); | |
809 | replace_all (str, "\xC3\xB6", "ö"); | |
810 | replace_all (str, "\xC3\xBC", "ü"); | |
811 | replace_all (str, "\xC3\x84", "Ä"); | |
812 | replace_all (str, "\xC3\x96", "Ö"); | |
813 | replace_all (str, "\xC3\x9C", "Ü"); | |
814 | ||
815 | // Misc | |
816 | replace_all (str, "\xC3\x9F", "ß"); | |
817 | ||
818 | // conversion of remaining non-ASCII chars needed? | |
819 | // just do if needed because of performance | |
820 | if (find_8bit(str) != string::npos) | |
821 | { | |
822 | // convert to fixed-size encoding UTF-32 | |
823 | wchar_t* wbuf=utf8_to_wbuf(str); | |
824 | ostringstream target; | |
825 | ||
826 | // replace all non-ASCII chars with HTML representation | |
827 | for (int p=0; wbuf[p] != 0; p++) | |
828 | { | |
829 | unsigned int c=wbuf[p]; | |
830 | ||
831 | if (c <= 127) | |
832 | target << static_cast<unsigned char>(c); | |
833 | else | |
834 | target << "&#" << c << ';'; | |
835 | } | |
836 | ||
837 | free(wbuf); | |
838 | ||
839 | str=target.str(); | |
840 | } | |
841 | ||
842 | return str; | |
843 | } // eo html_entities(std::string) | |
844 | ||
118e216e | 845 | |
e93545dd GE |
846 | bool replace_all(string &base, const char *ist, const char *soll) |
847 | { | |
6ab3bc95 RP |
848 | string i=ist; |
849 | string s=soll; | |
850 | return replace_all(base,&i,&s); | |
e93545dd GE |
851 | } |
852 | ||
853 | bool replace_all(string &base, const string &ist, const char *soll) | |
854 | { | |
6ab3bc95 RP |
855 | string s=soll; |
856 | return replace_all(base,&ist,&s); | |
e93545dd GE |
857 | } |
858 | ||
859 | bool replace_all(string &base, const string *ist, const string *soll) | |
860 | { | |
6ab3bc95 | 861 | return replace_all(base,*ist,*soll); |
e93545dd GE |
862 | } |
863 | ||
864 | bool replace_all(string &base, const char *ist, const string *soll) | |
865 | { | |
6ab3bc95 RP |
866 | string i=ist; |
867 | return replace_all(base,&i,soll); | |
e93545dd GE |
868 | } |
869 | ||
870 | bool replace_all(string &base, const string &ist, const string &soll) | |
871 | { | |
6ab3bc95 RP |
872 | bool found_ist = false; |
873 | string::size_type a=0; | |
874 | ||
875 | if (ist.empty() ) | |
876 | throw runtime_error ("replace_all called with empty search string"); | |
e93545dd | 877 | |
6ab3bc95 RP |
878 | while ( (a=base.find(ist,a) ) != string::npos) |
879 | { | |
880 | base.replace(a,ist.size(),soll); | |
881 | a=a+soll.size(); | |
882 | found_ist = true; | |
883 | } | |
1ec2064e | 884 | |
6ab3bc95 | 885 | return found_ist; |
e93545dd GE |
886 | } |
887 | ||
e5b21dbb | 888 | #if 0 |
e93545dd GE |
889 | string to_lower(const string &src) |
890 | { | |
6ab3bc95 | 891 | string dst = src; |
e93545dd | 892 | |
6ab3bc95 RP |
893 | string::size_type pos, end = dst.size(); |
894 | for (pos = 0; pos < end; pos++) | |
895 | dst[pos] = tolower(dst[pos]); | |
e93545dd | 896 | |
6ab3bc95 | 897 | return dst; |
e93545dd GE |
898 | } |
899 | ||
900 | string to_upper(const string &src) | |
901 | { | |
6ab3bc95 | 902 | string dst = src; |
e93545dd | 903 | |
6ab3bc95 RP |
904 | string::size_type pos, end = dst.size(); |
905 | for (pos = 0; pos < end; pos++) | |
906 | dst[pos] = toupper(dst[pos]); | |
e93545dd | 907 | |
6ab3bc95 | 908 | return dst; |
e93545dd | 909 | } |
e5b21dbb | 910 | #endif |
e93545dd | 911 | |
83809f5e | 912 | const int MAX_UNIT_FORMAT_SYMBOLS = 6; |
d1ea9075 | 913 | |
2cb9a9c5 | 914 | const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = { |
d1ea9075 GMF |
915 | " B", |
916 | " KB", | |
917 | " MB", | |
918 | " GB", | |
919 | " TB", | |
83809f5e | 920 | " PB" |
d1ea9075 GMF |
921 | }; |
922 | ||
2cb9a9c5 | 923 | const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = { |
5cb766b9 GMF |
924 | i18n_noop(" Bytes"), |
925 | i18n_noop(" KBytes"), | |
926 | i18n_noop(" MBytes"), | |
927 | i18n_noop(" GBytes"), | |
928 | i18n_noop(" TBytes"), | |
83809f5e | 929 | i18n_noop(" PBytes") |
d1ea9075 GMF |
930 | }; |
931 | ||
72a94426 GMF |
932 | |
933 | long double rounding_upwards( | |
934 | long double number, | |
935 | const int rounding_multiplier | |
936 | ) | |
937 | { | |
938 | long double rounded_number; | |
939 | rounded_number = number * rounding_multiplier; | |
940 | rounded_number += 0.5; | |
941 | rounded_number = (int64_t) (rounded_number); | |
942 | rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier); | |
943 | ||
944 | return rounded_number; | |
945 | } | |
946 | ||
947 | ||
81267544 GMF |
948 | string nice_unit_format( |
949 | const int64_t input, | |
70fc0674 GMF |
950 | const UnitFormat format, |
951 | const UnitBase base | |
81267544 | 952 | ) |
6ab3bc95 | 953 | { |
d1ea9075 | 954 | // select the system of units (decimal or binary) |
81267544 | 955 | int multiple = 0; |
a398513a | 956 | if (base == UnitBase1000) |
81267544 GMF |
957 | { |
958 | multiple = 1000; | |
959 | } | |
960 | else | |
961 | { | |
962 | multiple = 1024; | |
963 | } | |
964 | ||
965 | long double size = input; | |
6ab3bc95 | 966 | |
d1ea9075 GMF |
967 | // check the size of the input number to fit in the appropriate symbol |
968 | int sizecount = 0; | |
81267544 | 969 | while (size > multiple) |
6ab3bc95 | 970 | { |
81267544 GMF |
971 | size = size / multiple; |
972 | sizecount++; | |
83809f5e GMF |
973 | |
974 | // rollback to the previous values and stop the loop when cannot | |
975 | // represent the number length. | |
976 | if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS) | |
977 | { | |
978 | size = size * multiple; | |
979 | sizecount--; | |
980 | break; | |
981 | } | |
6ab3bc95 RP |
982 | } |
983 | ||
a398513a GMF |
984 | // round the input number "half up" to multiples of 10 |
985 | const int rounding_multiplier = 10; | |
72a94426 | 986 | size = rounding_upwards(size, rounding_multiplier); |
6ab3bc95 | 987 | |
d1ea9075 | 988 | // format the input number, placing the appropriate symbol |
6ab3bc95 | 989 | ostringstream out; |
6ab3bc95 | 990 | out.setf (ios::fixed); |
a398513a | 991 | if (format == ShortUnitFormat) |
d1ea9075 GMF |
992 | { |
993 | out.precision(1); | |
68d37a5c | 994 | out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() ); |
d1ea9075 GMF |
995 | } |
996 | else | |
6ab3bc95 | 997 | { |
d1ea9075 | 998 | out.precision (2); |
68d37a5c | 999 | out << size << i18n( longUnitFormatSymbols[sizecount].c_str() ); |
6ab3bc95 RP |
1000 | } |
1001 | ||
1002 | return out.str(); | |
1003 | } // eo nice_unit_format(int input) | |
1004 | ||
e93545dd | 1005 | |
47c07fba GE |
1006 | string escape(const string &s) |
1007 | { | |
6ab3bc95 RP |
1008 | string out(s); |
1009 | string::size_type p; | |
47c07fba | 1010 | |
6ab3bc95 RP |
1011 | p=0; |
1012 | while ( (p=out.find_first_of("\"\\",p) ) !=out.npos) | |
1013 | { | |
1014 | out.insert (p,"\\"); | |
1015 | p+=2; | |
1016 | } | |
47c07fba | 1017 | |
6ab3bc95 RP |
1018 | p=0; |
1019 | while ( (p=out.find_first_of("\r",p) ) !=out.npos) | |
1020 | { | |
1021 | out.replace (p,1,"\\r"); | |
1022 | p+=2; | |
1023 | } | |
47c07fba | 1024 | |
6ab3bc95 RP |
1025 | p=0; |
1026 | while ( (p=out.find_first_of("\n",p) ) !=out.npos) | |
1027 | { | |
1028 | out.replace (p,1,"\\n"); | |
1029 | p+=2; | |
1030 | } | |
47c07fba | 1031 | |
6ab3bc95 | 1032 | out='"'+out+'"'; |
47c07fba | 1033 | |
6ab3bc95 RP |
1034 | return out; |
1035 | } // eo scape(const std::string&) | |
47c07fba | 1036 | |
47c07fba | 1037 | |
6ab3bc95 RP |
1038 | string descape(const string &s, int startpos, int &endpos) |
1039 | { | |
1040 | string out; | |
1041 | ||
1042 | if (s.at(startpos) != '"') | |
1043 | throw out_of_range("value not type escaped string"); | |
1044 | ||
1045 | out=s.substr(startpos+1); | |
1046 | string::size_type p=0; | |
1047 | ||
1048 | // search for the end of the string | |
1049 | while ( (p=out.find("\"",p) ) !=out.npos) | |
1050 | { | |
1051 | int e=p-1; | |
1052 | bool escaped=false; | |
1053 | ||
1054 | // the " might be escaped with a backslash | |
1055 | while (e>=0 && out.at (e) =='\\') | |
1056 | { | |
1057 | if (escaped == false) | |
1058 | escaped=true; | |
1059 | else | |
1060 | escaped=false; | |
1061 | ||
1062 | e--; | |
1063 | } | |
1064 | ||
1065 | if (escaped==false) | |
1066 | break; | |
1067 | else | |
1068 | p++; | |
1069 | } | |
1070 | ||
1071 | // we now have the end of the string | |
1072 | out=out.substr(0,p); | |
1073 | ||
1074 | // tell calling prog about the endposition | |
1075 | endpos=startpos+p+1; | |
1076 | ||
1077 | // descape all \ stuff inside the string now | |
1078 | p=0; | |
1079 | while ( (p=out.find_first_of("\\",p) ) !=out.npos) | |
1080 | { | |
1081 | switch (out.at(p+1) ) | |
1082 | { | |
1083 | case 'r': | |
47c07fba GE |
1084 | out.replace(p,2,"\r"); |
1085 | break; | |
6ab3bc95 | 1086 | case 'n': |
47c07fba GE |
1087 | out.replace(p,2,"\n"); |
1088 | break; | |
6ab3bc95 | 1089 | default: |
47c07fba | 1090 | out.erase(p,1); |
6ab3bc95 RP |
1091 | } |
1092 | p++; | |
1093 | } | |
1094 | ||
1095 | return out; | |
1096 | } // eo descape(const std::string&,int,int&) | |
47c07fba | 1097 | |
e93545dd | 1098 | |
47c07fba GE |
1099 | string escape_shellarg(const string &input) |
1100 | { | |
6ab3bc95 RP |
1101 | string output = "'"; |
1102 | string::const_iterator it, it_end = input.end(); | |
1103 | for (it = input.begin(); it != it_end; it++) | |
1104 | { | |
1105 | if ( (*it) == '\'') | |
1106 | output += "'\\'"; | |
1107 | ||
1108 | output += *it; | |
1109 | } | |
1110 | ||
1111 | output += "'"; | |
1112 | return output; | |
47c07fba | 1113 | } |