Commit | Line | Data |
---|---|---|
0e23f538 TJ |
1 | /* |
2 | The software in this package is distributed under the GNU General | |
3 | Public License version 2 (with a special exception described below). | |
4 | ||
5 | A copy of GNU General Public License (GPL) is included in this distribution, | |
6 | in the file COPYING.GPL. | |
7 | ||
8 | As a special exception, if other files instantiate templates or use macros | |
9 | or inline functions from this file, or you compile this file and link it | |
10 | with other works to produce a work based on this file, this file | |
11 | does not by itself cause the resulting work to be covered | |
12 | by the GNU General Public License. | |
13 | ||
14 | However the source code for this file must still be made available | |
15 | in accordance with section (3) of the GNU General Public License. | |
16 | ||
17 | This exception does not invalidate any other reasons why a work based | |
18 | on this file might be covered by the GNU General Public License. | |
19 | */ | |
6a93d84a TJ |
20 | /** @file |
21 | * | |
22 | * (c) Copyright 2007-2008 by Intra2net AG | |
6a93d84a | 23 | */ |
e93545dd GE |
24 | |
25 | #include <iostream> | |
26 | #include <string> | |
27 | #include <sstream> | |
28 | #include <stdexcept> | |
5efd35b1 | 29 | #include <algorithm> |
5cd64148 | 30 | #include <cmath> // for round() |
e93545dd | 31 | |
a5f3af6e | 32 | #include <wchar.h> |
e93545dd GE |
33 | #include <stdlib.h> |
34 | #include <iconv.h> | |
35 | #include <i18n.h> | |
36 | ||
5cd64148 | 37 | #include <boost/numeric/conversion/cast.hpp> |
3f5c5ccd | 38 | #include <boost/foreach.hpp> |
5cd64148 | 39 | |
e93545dd GE |
40 | #include <stringfunc.hxx> |
41 | ||
42 | using namespace std; | |
43 | ||
6ab3bc95 RP |
44 | namespace I2n |
45 | { | |
6a93d84a TJ |
46 | |
47 | ||
6ab3bc95 RP |
48 | namespace |
49 | { | |
6a93d84a TJ |
50 | |
51 | const std::string hexDigitsLower("0123456789abcdef"); | |
52 | const std::string hexDigitsUpper("0123456789ABCDEF"); | |
53 | ||
54 | ||
55 | struct UpperFunc | |
56 | { | |
6ab3bc95 RP |
57 | char operator() (char c) |
58 | { | |
59 | return std::toupper(c); | |
60 | } | |
6a93d84a TJ |
61 | }; // eo struct UpperFunc |
62 | ||
63 | ||
64 | struct LowerFunc | |
65 | { | |
6ab3bc95 RP |
66 | char operator() (char c) |
67 | { | |
68 | return std::tolower(c); | |
69 | } | |
6a93d84a TJ |
70 | }; // eo struct LowerFunc |
71 | ||
72 | ||
73 | } // eo namespace <anonymous> | |
74 | ||
75 | ||
76 | ||
77 | /** | |
6ab3bc95 | 78 | * default list of Whitespaces (" \t\r\n"); |
6a93d84a | 79 | */ |
6ab3bc95 | 80 | const std::string Whitespaces = " \t\r\n"; |
6a93d84a TJ |
81 | |
82 | /** | |
83 | * default list of lineendings ("\r\n"); | |
84 | */ | |
6ab3bc95 | 85 | const std::string LineEndings= "\r\n"; |
6a93d84a TJ |
86 | |
87 | ||
88 | ||
89 | /** | |
90 | * @brief checks if a string begins with a given prefix. | |
91 | * @param[in,out] str the string which is tested | |
92 | * @param prefix the prefix which should be tested for. | |
93 | * @return @a true iff the prefix is not empty and the string begins with that prefix. | |
94 | */ | |
6ab3bc95 | 95 | bool has_prefix(const std::string& str, const std::string& prefix) |
6a93d84a | 96 | { |
6ab3bc95 RP |
97 | if (prefix.empty() || str.empty() || str.size() < prefix.size() ) |
98 | { | |
99 | return false; | |
100 | } | |
101 | return str.compare(0, prefix.size(), prefix) == 0; | |
102 | } // eo has_prefix(const std::string&,const std::string&) | |
6a93d84a TJ |
103 | |
104 | ||
105 | /** | |
106 | * @brief checks if a string ends with a given suffix. | |
107 | * @param[in,out] str the string which is tested | |
108 | * @param suffix the suffix which should be tested for. | |
109 | * @return @a true iff the suffix is not empty and the string ends with that suffix. | |
110 | */ | |
6ab3bc95 | 111 | bool has_suffix(const std::string& str, const std::string& suffix) |
6a93d84a | 112 | { |
6ab3bc95 RP |
113 | if (suffix.empty() || str.empty() || str.size() < suffix.size() ) |
114 | { | |
115 | return false; | |
116 | } | |
117 | return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; | |
118 | } // eo has_suffix(const std::string&,const std::string&) | |
6a93d84a TJ |
119 | |
120 | ||
121 | /** | |
122 | * cut off characters from a given list from front and end of a string. | |
123 | * @param[in,out] str the string which should be trimmed. | |
124 | * @param charlist the list of characters to remove from beginning and end of string | |
125 | * @return the result string. | |
126 | */ | |
6ab3bc95 RP |
127 | std::string trim_mod(std::string& str, const std::string& charlist) |
128 | { | |
129 | // first: trim the beginning: | |
130 | std::string::size_type pos= str.find_first_not_of (charlist); | |
131 | if (pos == std::string::npos) | |
132 | { | |
133 | // whole string consists of charlist (or is already empty) | |
134 | str.clear(); | |
135 | return str; | |
136 | } | |
137 | else if (pos>0) | |
138 | { | |
139 | // str starts with charlist | |
140 | str.erase(0,pos); | |
141 | } | |
142 | // now let's look at the tail: | |
143 | pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char! | |
144 | if ( pos < str.size() ) | |
145 | { | |
146 | str.erase(pos, str.size()-pos); | |
147 | } | |
148 | return str; | |
149 | } // eo trim_mod(std::string&,const std::string&) | |
6a93d84a TJ |
150 | |
151 | ||
152 | ||
153 | /** | |
154 | * removes last character from a string when it is in a list of chars to be removed. | |
155 | * @param[in,out] str the string. | |
156 | * @param what the list of chars which will be tested for. | |
157 | * @return the resulting string with last char removed (if applicable) | |
158 | */ | |
6ab3bc95 | 159 | std::string chomp_mod(std::string& str, const std::string& what) |
6a93d84a | 160 | { |
6ab3bc95 RP |
161 | if (str.empty() || what.empty() ) |
162 | { | |
163 | return str; | |
164 | } | |
165 | if (what.find(str.at (str.size()-1) ) != std::string::npos) | |
166 | { | |
167 | str.erase(str.size() - 1); | |
168 | } | |
169 | return str; | |
170 | } // eo chomp_mod(std::string&,const std::string&) | |
6a93d84a TJ |
171 | |
172 | ||
173 | /** | |
174 | * @brief converts a string to lower case. | |
175 | * @param[in,out] str the string to modify. | |
176 | * @return the string | |
177 | */ | |
6ab3bc95 | 178 | std::string to_lower_mod(std::string& str) |
6a93d84a | 179 | { |
6ab3bc95 RP |
180 | std::transform(str.begin(), str.end(), str.begin(), LowerFunc() ); |
181 | return str; | |
182 | } // eo to_lower_mod(std::string&) | |
6a93d84a TJ |
183 | |
184 | ||
185 | /** | |
186 | * @brief converts a string to upper case. | |
187 | * @param[in,out] str the string to modify. | |
188 | * @return the string | |
189 | */ | |
6ab3bc95 | 190 | std::string to_upper_mod(std::string& str) |
6a93d84a | 191 | { |
6ab3bc95 RP |
192 | std::transform( str.begin(), str.end(), str.begin(), UpperFunc() ); |
193 | return str; | |
194 | } // eo to_upper_mod(std::string&) | |
6a93d84a TJ |
195 | |
196 | ||
197 | ||
198 | /** | |
199 | * cut off characters from a given list from front and end of a string. | |
200 | * @param str the string which should be trimmed. | |
201 | * @param charlist the list of characters to remove from beginning and end of string | |
202 | * @return the result string. | |
203 | */ | |
6ab3bc95 RP |
204 | std::string trim (const std::string& str, const std::string& charlist) |
205 | { | |
206 | // first: trim the beginning: | |
207 | std::string::size_type pos0= str.find_first_not_of(charlist); | |
208 | if (pos0 == std::string::npos) | |
209 | { | |
210 | // whole string consists of charlist (or is already empty) | |
211 | return std::string(); | |
212 | } | |
213 | // now let's look at the end: | |
214 | std::string::size_type pos1= str.find_last_not_of(charlist); | |
215 | return str.substr(pos0, pos1 - pos0 + 1); | |
6a93d84a TJ |
216 | } // eo trim(const std:.string&,const std::string&) |
217 | ||
218 | ||
219 | /** | |
220 | * removes last character from a string when it is in a list of chars to be removed. | |
221 | * @param str the string. | |
222 | * @param what the list of chars which will be tested for. | |
223 | * @return the resulting string with last char removed (if applicable) | |
224 | */ | |
6ab3bc95 RP |
225 | std::string chomp (const std::string& str, const std::string& what) |
226 | { | |
227 | if (str.empty() || what.empty() ) | |
228 | { | |
229 | return str; | |
230 | } | |
231 | if (what.find(str.at (str.size()-1) ) != std::string::npos) | |
232 | { | |
233 | return str.substr(0, str.size()-1); | |
234 | } | |
235 | return str; | |
6a93d84a TJ |
236 | } // eo chomp(const std:.string&,const std::string&) |
237 | ||
238 | ||
239 | /** | |
240 | * @brief returns a lower case version of a given string. | |
241 | * @param str the string | |
242 | * @return the lower case version of the string | |
243 | */ | |
6ab3bc95 | 244 | std::string to_lower (const std::string& str) |
6a93d84a | 245 | { |
6ab3bc95 RP |
246 | std::string result(str); |
247 | return to_lower_mod(result); | |
248 | } // eo to_lower(const std::string&) | |
6a93d84a TJ |
249 | |
250 | ||
251 | /** | |
252 | * @brief returns a upper case version of a given string. | |
253 | * @param str the string | |
254 | * @return the upper case version of the string | |
255 | */ | |
6ab3bc95 | 256 | std::string to_upper(const std::string& str) |
6a93d84a | 257 | { |
6ab3bc95 RP |
258 | std::string result(str); |
259 | return to_upper_mod(result); | |
260 | } // eo to_upper(const std::string&) | |
6a93d84a TJ |
261 | |
262 | ||
263 | ||
264 | /** | |
265 | * @brief removes a given suffix from a string. | |
266 | * @param str the string. | |
267 | * @param suffix the suffix which should be removed if the string ends with it. | |
268 | * @return the string without the suffix. | |
269 | * | |
270 | * If the string ends with the suffix, it is removed. If the the string doesn't end | |
271 | * with the suffix the original string is returned. | |
272 | */ | |
6ab3bc95 | 273 | std::string remove_suffix(const std::string& str, const std::string& suffix) |
6a93d84a | 274 | { |
6ab3bc95 RP |
275 | if (has_suffix(str,suffix) ) |
276 | { | |
277 | return str.substr(0, str.size()-suffix.size() ); | |
278 | } | |
279 | return str; | |
280 | } // eo remove_suffix(const std::string&,const std::string&) | |
6a93d84a TJ |
281 | |
282 | ||
283 | ||
284 | /** | |
285 | * @brief removes a given prefix from a string. | |
286 | * @param str the string. | |
287 | * @param prefix the prefix which should be removed if the string begins with it. | |
288 | * @return the string without the prefix. | |
289 | * | |
290 | * If the string begins with the prefix, it is removed. If the the string doesn't begin | |
291 | * with the prefix the original string is returned. | |
292 | */ | |
6ab3bc95 | 293 | std::string remove_prefix(const std::string& str, const std::string& prefix) |
6a93d84a | 294 | { |
6ab3bc95 RP |
295 | if (has_prefix(str,prefix) ) |
296 | { | |
297 | return str.substr( prefix.size() ); | |
298 | } | |
299 | return str; | |
300 | } // eo remove_prefix(const std::string&,const std::string&) | |
6a93d84a TJ |
301 | |
302 | ||
303 | /** | |
304 | * split a string to key and value delimited by a given delimiter. | |
6ab3bc95 | 305 | * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end). |
6a93d84a TJ |
306 | * @param str the string which should be splitted. |
307 | * @param[out] key the resulting key | |
308 | * @param[out] value the resulting value | |
309 | * @param delimiter the delimiter between key and value; default is '='. | |
310 | * @return @a true if the split was successful. | |
311 | */ | |
6ab3bc95 RP |
312 | bool pair_split( |
313 | const std::string& str, | |
314 | std::string& key, | |
315 | std::string& value, | |
316 | char delimiter) | |
317 | { | |
318 | std::string::size_type pos = str.find (delimiter); | |
319 | if (pos == std::string::npos) return false; | |
320 | key= str.substr(0,pos); | |
321 | value= str.substr(pos+1); | |
322 | trim_mod(key); | |
323 | trim_mod(value); | |
324 | return true; | |
325 | } // eo pair_split(const std::string&,std::string&,std::string&,char) | |
6a93d84a TJ |
326 | |
327 | ||
328 | /** | |
329 | * splits a string by given delimiter | |
330 | * | |
331 | * @param[in] str the string which should be splitted. | |
332 | * @param[out] result the list resulting from splitting @a str. | |
333 | * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
334 | * @param[in] omit_empty should empty parts not be stored? | |
335 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
336 | * (empty string results in no trim) | |
337 | */ | |
6ab3bc95 RP |
338 | void split_string( |
339 | const std::string& str, | |
340 | std::list<std::string>& result, | |
341 | const std::string& delimiter, | |
342 | bool omit_empty, | |
343 | const std::string& trim_list | |
6a93d84a TJ |
344 | ) |
345 | { | |
6ab3bc95 RP |
346 | std::string::size_type pos, last_pos=0; |
347 | bool delimiter_found= false; | |
348 | while ( last_pos < str.size() && last_pos != std::string::npos) | |
349 | { | |
350 | pos= str.find(delimiter, last_pos); | |
351 | std::string part; | |
352 | if (pos == std::string::npos) | |
353 | { | |
354 | part= str.substr(last_pos); | |
355 | delimiter_found= false; | |
356 | } | |
357 | else | |
358 | { | |
359 | part= str.substr(last_pos, pos-last_pos); | |
360 | delimiter_found=true; | |
361 | } | |
362 | if (pos != std::string::npos) | |
363 | { | |
364 | last_pos= pos+ delimiter.size(); | |
365 | } | |
366 | else | |
367 | { | |
368 | last_pos= std::string::npos; | |
369 | } | |
370 | if (!trim_list.empty() ) trim_mod (part, trim_list); | |
371 | if (omit_empty && part.empty() ) continue; | |
372 | result.push_back( part ); | |
373 | } | |
374 | // if the string ends with a delimiter we need to append an empty string if no omit_empty | |
375 | // was given. | |
376 | // (this way we keep the split result consistent to a join operation) | |
377 | if (delimiter_found && !omit_empty) | |
378 | { | |
379 | result.push_back(""); | |
380 | } | |
381 | } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&) | |
6a93d84a TJ |
382 | |
383 | ||
384 | /** | |
385 | * splits a string by a given delimiter | |
386 | * @param str the string which should be splitted. | |
387 | * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
388 | * @param[in] omit_empty should empty parts not be stored? | |
389 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
390 | * (empty string results in no trim) | |
391 | * @return the list resulting from splitting @a str. | |
392 | */ | |
6ab3bc95 RP |
393 | std::list<std::string> split_string( |
394 | const std::string& str, | |
395 | const std::string& delimiter, | |
396 | bool omit_empty, | |
397 | const std::string& trim_list | |
6a93d84a TJ |
398 | ) |
399 | { | |
6ab3bc95 RP |
400 | std::list<std::string> result; |
401 | split_string(str, result, delimiter, omit_empty, trim_list); | |
402 | return result; | |
403 | } // eo split_string(const std::string&,const std::string&,bool,const std::string&) | |
6a93d84a TJ |
404 | |
405 | ||
406 | /** | |
407 | * @brief joins a list of strings into a single string. | |
408 | * | |
6ab3bc95 RP |
409 | * This funtion is (basically) the reverse operation of @a split_string. |
410 | * | |
6a93d84a TJ |
411 | * @param parts the list of strings. |
412 | * @param delimiter the delimiter which is inserted between the strings. | |
413 | * @return the joined string. | |
414 | */ | |
6ab3bc95 RP |
415 | std::string join_string( |
416 | const std::list< std::string >& parts, | |
417 | const std::string& delimiter | |
6a93d84a TJ |
418 | ) |
419 | { | |
6ab3bc95 RP |
420 | std::string result; |
421 | if (! parts.empty() ) | |
422 | { | |
423 | std::list< std::string >::const_iterator it= parts.begin(); | |
424 | result = *it; | |
425 | while ( ++it != parts.end() ) | |
426 | { | |
427 | result+= delimiter; | |
428 | result+= *it; | |
429 | } | |
430 | } | |
431 | return result; | |
432 | } // eo join_string(const std::list< std::string >&,const std::string&) | |
6a93d84a TJ |
433 | |
434 | ||
376ec4fa CH |
435 | /** @brief same as join_string for list, except uses a vector */ |
436 | std::string join_string( | |
437 | const std::vector< std::string >& parts, | |
438 | const std::string& delimiter | |
439 | ) | |
440 | { | |
441 | std::string result; | |
442 | if (! parts.empty() ) | |
443 | { | |
444 | std::vector< std::string >::const_iterator it= parts.begin(); | |
445 | result = *it; | |
446 | while ( ++it != parts.end() ) | |
447 | { | |
448 | result+= delimiter; | |
449 | result+= *it; | |
450 | } | |
451 | } | |
452 | return result; | |
453 | } // eo join_string(const std::vector< std::string >&,const std::string&) | |
454 | ||
455 | ||
6a93d84a TJ |
456 | |
457 | /* | |
458 | ** conversions | |
459 | */ | |
460 | ||
461 | ||
462 | /** | |
463 | * @brief returns a hex string from a binary string. | |
464 | * @param str the (binary) string | |
465 | * @param upper_case_digits determine whether to use upper case characters for digits A-F. | |
466 | * @return the string in hex notation. | |
467 | */ | |
6ab3bc95 RP |
468 | std::string convert_binary_to_hex( |
469 | const std::string& str, | |
470 | bool upper_case_digits | |
6a93d84a TJ |
471 | ) |
472 | { | |
6ab3bc95 RP |
473 | std::string result; |
474 | std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower); | |
475 | for ( std::string::const_iterator it= str.begin(); | |
476 | it != str.end(); | |
477 | ++it) | |
478 | { | |
479 | result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] ); | |
480 | result.push_back( hexDigits[ (*it) & 0x0f ] ); | |
481 | } | |
482 | return result; | |
483 | } // eo convert_binary_to_hex(const std::string&,bool) | |
6a93d84a TJ |
484 | |
485 | ||
486 | /** | |
487 | * @brief converts a hex digit string to binary string. | |
488 | * @param str hex digit string | |
489 | * @return the binary string. | |
490 | * | |
491 | * The hex digit string may contains white spaces or colons which are treated | |
492 | * as delimiters between hex digit groups. | |
493 | * | |
494 | * @todo rework the handling of half nibbles (consistency)! | |
495 | */ | |
6ab3bc95 RP |
496 | std::string convert_hex_to_binary( |
497 | const std::string& str | |
6a93d84a | 498 | ) |
6ab3bc95 RP |
499 | throw (std::runtime_error) |
500 | { | |
501 | std::string result; | |
502 | char c= 0; | |
503 | bool hasNibble= false; | |
504 | bool lastWasWS= true; | |
505 | for ( std::string::const_iterator it= str.begin(); | |
506 | it != str.end(); | |
507 | ++it) | |
508 | { | |
509 | std::string::size_type p = hexDigitsLower.find( *it ); | |
510 | if (p== std::string::npos) | |
511 | { | |
512 | p= hexDigitsUpper.find( *it ); | |
513 | } | |
514 | if (p == std::string::npos) | |
515 | { | |
516 | if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace? | |
6a93d84a | 517 | or ( *it == ':') // or a colon? |
6ab3bc95 RP |
518 | ) |
519 | { | |
520 | // we treat that as a valid delimiter: | |
521 | if (hasNibble) | |
6a93d84a | 522 | { |
6ab3bc95 RP |
523 | // 1 nibble before WS is treate as lower part: |
524 | result.push_back(c); | |
525 | // reset state: | |
526 | hasNibble= false; | |
6a93d84a | 527 | } |
6ab3bc95 RP |
528 | lastWasWS= true; |
529 | continue; | |
530 | } | |
531 | } | |
532 | if (p == std::string::npos ) | |
533 | { | |
534 | throw runtime_error("illegal character in hex digit string: " + str); | |
535 | } | |
536 | lastWasWS= false; | |
537 | if (hasNibble) | |
538 | { | |
539 | c<<=4; | |
540 | } | |
541 | else | |
542 | { | |
543 | c=0; | |
544 | } | |
545 | c+= (p & 0x0f); | |
546 | if (hasNibble) | |
547 | { | |
548 | //we already had a nibble, so a char is complete now: | |
549 | result.push_back( c ); | |
550 | hasNibble=false; | |
551 | } | |
552 | else | |
553 | { | |
554 | // this is the first nibble of a new char: | |
555 | hasNibble=true; | |
556 | } | |
557 | } | |
558 | if (hasNibble) | |
559 | { | |
560 | //well, there is one nibble left | |
561 | // let's do some heuristics: | |
562 | if (lastWasWS) | |
563 | { | |
564 | // if the preceeding character was a white space (or a colon) | |
565 | // we treat the nibble as lower part: | |
566 | //( this is consistent with shortened hex notations where leading zeros are not noted) | |
567 | result.push_back( c ); | |
568 | } | |
569 | else | |
570 | { | |
571 | // if it was part of a hex digit chain, we treat it as UPPER part (!!) | |
572 | result.push_back( c << 4 ); | |
573 | } | |
574 | } | |
575 | return result; | |
576 | } // eo convert_hex_to_binary(const std::string&) | |
577 | ||
578 | ||
579 | } // eo namespace I2n | |
580 | ||
581 | ||
582 | ||
6a93d84a | 583 | |
e93545dd GE |
584 | std::string iso_to_utf8(const std::string& isostring) |
585 | { | |
6ab3bc95 | 586 | string result; |
118e216e | 587 | |
6ab3bc95 | 588 | iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1"); |
118e216e | 589 | |
6ab3bc95 RP |
590 | if (iso_to_utf8 == (iconv_t)-1) |
591 | throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8"); | |
118e216e | 592 | |
6ab3bc95 RP |
593 | size_t in_size=isostring.size(); |
594 | size_t out_size=in_size*4; | |
118e216e | 595 | |
6ab3bc95 RP |
596 | char *buf = (char *)malloc(out_size+1); |
597 | if (buf == NULL) | |
598 | throw runtime_error("out of memory for iconv buffer"); | |
e93545dd | 599 | |
5a4ecb51 | 600 | char *in = (char *)isostring.c_str(); |
6ab3bc95 RP |
601 | char *out = buf; |
602 | iconv(i2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 603 | |
6ab3bc95 | 604 | buf[isostring.size()*4-out_size]=0; |
118e216e | 605 | |
6ab3bc95 | 606 | result=buf; |
118e216e | 607 | |
6ab3bc95 RP |
608 | free(buf); |
609 | iconv_close(i2utf8); | |
118e216e | 610 | |
6ab3bc95 | 611 | return result; |
e93545dd GE |
612 | } |
613 | ||
614 | std::string utf8_to_iso(const std::string& utf8string) | |
615 | { | |
6ab3bc95 | 616 | string result; |
118e216e | 617 | |
6ab3bc95 | 618 | iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8"); |
118e216e | 619 | |
6ab3bc95 RP |
620 | if (utf82iso == (iconv_t)-1) |
621 | throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1"); | |
118e216e | 622 | |
6ab3bc95 RP |
623 | size_t in_size=utf8string.size(); |
624 | size_t out_size=in_size; | |
118e216e | 625 | |
6ab3bc95 RP |
626 | char *buf = (char *)malloc(out_size+1); |
627 | if (buf == NULL) | |
628 | throw runtime_error("out of memory for iconv buffer"); | |
e93545dd | 629 | |
5a4ecb51 | 630 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 RP |
631 | char *out = buf; |
632 | iconv(utf82iso, &in, &in_size, &out, &out_size); | |
118e216e | 633 | |
6ab3bc95 | 634 | buf[utf8string.size()-out_size]=0; |
118e216e | 635 | |
6ab3bc95 | 636 | result=buf; |
118e216e | 637 | |
6ab3bc95 RP |
638 | free(buf); |
639 | iconv_close(utf82iso); | |
e93545dd | 640 | |
6ab3bc95 | 641 | return result; |
e93545dd GE |
642 | } |
643 | ||
a5f3af6e GE |
644 | wchar_t* utf8_to_wbuf(const std::string& utf8string) |
645 | { | |
6ab3bc95 | 646 | iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8"); |
a5f3af6e | 647 | |
6ab3bc95 RP |
648 | if (utf82wstr == (iconv_t)-1) |
649 | throw runtime_error("iconv can't convert from UTF-8 to UCS-4"); | |
a5f3af6e | 650 | |
6ab3bc95 RP |
651 | size_t in_size=utf8string.size(); |
652 | size_t out_size= (in_size+1)*sizeof(wchar_t); | |
a5f3af6e | 653 | |
6ab3bc95 RP |
654 | wchar_t *buf = (wchar_t *)malloc(out_size); |
655 | if (buf == NULL) | |
656 | throw runtime_error("out of memory for iconv buffer"); | |
a5f3af6e | 657 | |
5a4ecb51 | 658 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 | 659 | char *out = (char*) buf; |
dbd6d77c | 660 | if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1) |
6ab3bc95 | 661 | throw runtime_error("error converting char encodings"); |
a5f3af6e | 662 | |
6ab3bc95 | 663 | buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0; |
a5f3af6e | 664 | |
6ab3bc95 | 665 | iconv_close(utf82wstr); |
a5f3af6e | 666 | |
6ab3bc95 | 667 | return buf; |
a5f3af6e GE |
668 | } |
669 | ||
13cc4db1 | 670 | std::string utf7imap_to_utf8(const std::string& utf7imapstring) |
d116a071 | 671 | { |
6ab3bc95 | 672 | string result; |
118e216e | 673 | |
6ab3bc95 | 674 | iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP"); |
118e216e | 675 | |
6ab3bc95 RP |
676 | if (utf7imap2utf8 == (iconv_t)-1) |
677 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
118e216e | 678 | |
6ab3bc95 RP |
679 | size_t in_size=utf7imapstring.size(); |
680 | size_t out_size=in_size*4; | |
118e216e | 681 | |
6ab3bc95 RP |
682 | char *buf = (char *)malloc(out_size+1); |
683 | if (buf == NULL) | |
684 | throw runtime_error("out of memory for iconv buffer"); | |
d116a071 | 685 | |
5a4ecb51 | 686 | char *in = (char *)utf7imapstring.c_str(); |
6ab3bc95 RP |
687 | char *out = buf; |
688 | iconv(utf7imap2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 689 | |
6ab3bc95 | 690 | buf[utf7imapstring.size()*4-out_size]=0; |
118e216e | 691 | |
6ab3bc95 | 692 | result=buf; |
118e216e | 693 | |
6ab3bc95 RP |
694 | free(buf); |
695 | iconv_close(utf7imap2utf8); | |
118e216e | 696 | |
6ab3bc95 | 697 | return result; |
118e216e TJ |
698 | } |
699 | ||
6a2b6dd1 TJ |
700 | std::string utf8_to_utf7imap(const std::string& utf8string) |
701 | { | |
6ab3bc95 | 702 | string result; |
6a2b6dd1 | 703 | |
6ab3bc95 | 704 | iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8"); |
6a2b6dd1 | 705 | |
6ab3bc95 RP |
706 | if (utf82utf7imap == (iconv_t)-1) |
707 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
6a2b6dd1 | 708 | |
6ab3bc95 RP |
709 | // UTF-7 is base64 encoded, a buffer 10x as large |
710 | // as the utf-8 buffer should be enough. If not the string will be truncated. | |
711 | size_t in_size=utf8string.size(); | |
712 | size_t out_size=in_size*10; | |
6a2b6dd1 | 713 | |
6ab3bc95 RP |
714 | char *buf = (char *)malloc(out_size+1); |
715 | if (buf == NULL) | |
716 | throw runtime_error("out of memory for iconv buffer"); | |
6a2b6dd1 | 717 | |
5a4ecb51 | 718 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 RP |
719 | char *out = buf; |
720 | iconv(utf82utf7imap, &in, &in_size, &out, &out_size); | |
6a2b6dd1 | 721 | |
6ab3bc95 | 722 | buf[utf8string.size()*10-out_size]= 0; |
6a2b6dd1 | 723 | |
6ab3bc95 | 724 | result=buf; |
6a2b6dd1 | 725 | |
6ab3bc95 RP |
726 | free(buf); |
727 | iconv_close(utf82utf7imap); | |
6a2b6dd1 | 728 | |
6ab3bc95 | 729 | return result; |
6a2b6dd1 TJ |
730 | } |
731 | ||
118e216e TJ |
732 | // Tokenize string by (html) tags |
733 | void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input) | |
734 | { | |
6ab3bc95 RP |
735 | string::size_type pos, len = input.size(); |
736 | bool inside_tag = false; | |
737 | string current; | |
738 | ||
739 | for (pos = 0; pos < len; pos++) | |
740 | { | |
741 | if (input[pos] == '<') | |
742 | { | |
743 | inside_tag = true; | |
744 | ||
745 | if (!current.empty() ) | |
746 | { | |
747 | tokenized.push_back( make_pair(current, false) ); | |
748 | current = ""; | |
749 | } | |
750 | ||
751 | current += input[pos]; | |
752 | } | |
753 | else if (input[pos] == '>' && inside_tag) | |
754 | { | |
755 | current += input[pos]; | |
756 | inside_tag = false; | |
757 | if (!current.empty() ) | |
758 | { | |
759 | tokenized.push_back( make_pair(current, true) ); | |
760 | current = ""; | |
761 | } | |
762 | } | |
763 | else | |
764 | current += input[pos]; | |
765 | } | |
766 | ||
767 | // String left over in buffer? | |
768 | if (!current.empty() ) | |
769 | tokenized.push_back( make_pair(current, false) ); | |
770 | } // eo tokenize_by_tag | |
118e216e | 771 | |
118e216e TJ |
772 | |
773 | std::string strip_html_tags(const std::string &input) | |
774 | { | |
6ab3bc95 RP |
775 | // Pair first: string, second: isTag |
776 | vector<pair<string,bool> > tokenized; | |
777 | tokenize_by_tag (tokenized, input); | |
118e216e | 778 | |
6ab3bc95 RP |
779 | string output; |
780 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
83d700e9 | 781 | for (token = tokenized.begin(); token != tokens_end; ++token) |
6ab3bc95 RP |
782 | if (!token->second) |
783 | output += token->first; | |
784 | ||
785 | return output; | |
786 | } // eo strip_html_tags | |
118e216e | 787 | |
118e216e TJ |
788 | |
789 | // Smart-encode HTML en | |
790 | string smart_html_entities(const std::string &input) | |
791 | { | |
6ab3bc95 RP |
792 | // Pair first: string, second: isTag |
793 | vector<pair<string,bool> > tokenized; | |
794 | tokenize_by_tag (tokenized, input); | |
795 | ||
796 | string output; | |
797 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
83d700e9 | 798 | for (token = tokenized.begin(); token != tokens_end; ++token) |
6ab3bc95 RP |
799 | { |
800 | // keep HTML tags as they are | |
801 | if (token->second) | |
802 | output += token->first; | |
803 | else | |
804 | output += html_entities(token->first); | |
805 | } | |
806 | ||
807 | return output; | |
118e216e TJ |
808 | } |
809 | ||
6ab3bc95 | 810 | |
a5f3af6e GE |
811 | string::size_type find_8bit(const std::string &str) |
812 | { | |
6ab3bc95 RP |
813 | string::size_type l=str.size(); |
814 | for (string::size_type p=0; p < l; p++) | |
815 | if (static_cast<unsigned char>(str[p]) > 127) | |
816 | return p; | |
a5f3af6e | 817 | |
6ab3bc95 | 818 | return string::npos; |
a5f3af6e GE |
819 | } |
820 | ||
118e216e TJ |
821 | // encoded UTF-8 chars into HTML entities |
822 | string html_entities(std::string str) | |
823 | { | |
6ab3bc95 RP |
824 | // Normal chars |
825 | replace_all (str, "&", "&"); | |
6ab3bc95 RP |
826 | replace_all (str, "<", "<"); |
827 | replace_all (str, ">", ">"); | |
980577e1 TJ |
828 | replace_all (str, "\"", """); |
829 | replace_all (str, "'", "'"); | |
830 | replace_all (str, "/", "/"); | |
6ab3bc95 RP |
831 | |
832 | // Umlauts | |
833 | replace_all (str, "\xC3\xA4", "ä"); | |
834 | replace_all (str, "\xC3\xB6", "ö"); | |
835 | replace_all (str, "\xC3\xBC", "ü"); | |
836 | replace_all (str, "\xC3\x84", "Ä"); | |
837 | replace_all (str, "\xC3\x96", "Ö"); | |
838 | replace_all (str, "\xC3\x9C", "Ü"); | |
839 | ||
840 | // Misc | |
841 | replace_all (str, "\xC3\x9F", "ß"); | |
842 | ||
843 | // conversion of remaining non-ASCII chars needed? | |
844 | // just do if needed because of performance | |
845 | if (find_8bit(str) != string::npos) | |
846 | { | |
847 | // convert to fixed-size encoding UTF-32 | |
848 | wchar_t* wbuf=utf8_to_wbuf(str); | |
849 | ostringstream target; | |
850 | ||
851 | // replace all non-ASCII chars with HTML representation | |
852 | for (int p=0; wbuf[p] != 0; p++) | |
853 | { | |
854 | unsigned int c=wbuf[p]; | |
855 | ||
856 | if (c <= 127) | |
857 | target << static_cast<unsigned char>(c); | |
858 | else | |
859 | target << "&#" << c << ';'; | |
860 | } | |
861 | ||
862 | free(wbuf); | |
863 | ||
864 | str=target.str(); | |
865 | } | |
866 | ||
867 | return str; | |
868 | } // eo html_entities(std::string) | |
869 | ||
554f813d GE |
870 | // convert HTML entities to something that can be viewed on a basic text console (restricted to ASCII-7) |
871 | string html_entities_to_console(std::string str) | |
872 | { | |
873 | // Normal chars | |
874 | replace_all (str, "&", "&"); | |
875 | replace_all (str, "<", "<"); | |
876 | replace_all (str, ">", ">"); | |
877 | replace_all (str, """, "\""); | |
878 | replace_all (str, "'", "'"); | |
879 | replace_all (str, "/", "/"); | |
880 | ||
881 | // Umlauts | |
882 | replace_all (str, "ä", "ae"); | |
883 | replace_all (str, "ö", "oe"); | |
884 | replace_all (str, "ü", "ue"); | |
885 | replace_all (str, "Ä", "Ae"); | |
886 | replace_all (str, "Ö", "Oe"); | |
887 | replace_all (str, "Ü", "Ue"); | |
888 | ||
889 | // Misc | |
890 | replace_all (str, "ß", "ss"); | |
891 | ||
892 | return str; | |
893 | } | |
118e216e | 894 | |
3f5c5ccd CH |
895 | // find_html_comments + remove_html_comments(str, comments) |
896 | void remove_html_comments(string &str) | |
897 | { | |
898 | vector<CommentZone> comments; | |
899 | find_html_comments(str, comments); | |
900 | remove_html_comments(str, comments); | |
901 | } | |
902 | ||
903 | // find all html comments, behaving correctly if they are nested; ignores comment tags ("<!--FOO .... BAR-->") | |
904 | // If there are invalid comments ("-->" before "<!--" or different number of closing and opening tags), | |
905 | // then the unknown index of corresponding start/end tag will be represented by a string::npos | |
906 | // Indices are from start of start tag until first index after closing tag | |
907 | void find_html_comments(const std::string &str, vector<CommentZone> &comments) | |
908 | { | |
909 | static const string START = "<!--"; | |
910 | static const string CLOSE = "-->"; | |
911 | static const string::size_type START_LEN = START.length(); | |
912 | static const string::size_type CLOSE_LEN = CLOSE.length(); | |
913 | ||
914 | // in order to find nested comments, need either recursion or a stack | |
915 | vector<string::size_type> starts; // stack of start tags | |
916 | ||
917 | string::size_type pos = 0; | |
918 | string::size_type len = str.length(); | |
919 | string::size_type next_start, next_close; | |
920 | ||
921 | while (pos < len) // not really needed but just in case | |
922 | { | |
923 | next_start = str.find(START, pos); | |
924 | next_close = str.find(CLOSE, pos); | |
925 | ||
926 | if ( (next_start == string::npos) && (next_close == string::npos) ) | |
927 | break; // we are done | |
928 | ||
929 | else if ( (next_start == string::npos) || (next_close < next_start) ) // close one comment (pop) | |
930 | { | |
931 | if (starts.empty()) // closing tag without a start | |
932 | comments.push_back(CommentZone(string::npos, next_close+CLOSE_LEN)); | |
933 | else | |
934 | { | |
935 | comments.push_back(CommentZone(starts.back(), next_close+CLOSE_LEN)); | |
936 | starts.pop_back(); | |
937 | } | |
938 | pos = next_close + CLOSE_LEN; | |
939 | } | |
940 | ||
941 | else if ( (next_close == string::npos) || (next_start < next_close) ) // start a new comment (push) | |
942 | { | |
943 | starts.push_back(next_start); | |
944 | pos = next_start + START_LEN; | |
945 | } | |
946 | } | |
947 | ||
948 | // add comments that have no closing tag from back to front (important for remove_html_comments!) | |
949 | while (!starts.empty()) | |
950 | { | |
951 | comments.push_back(CommentZone(starts.back(), string::npos)); | |
952 | starts.pop_back(); | |
953 | } | |
954 | } | |
955 | ||
956 | // remove all html comments foundby find_html_comments | |
957 | void remove_html_comments(std::string &str, const vector<CommentZone> &comments) | |
958 | { | |
959 | // remember position where last removal started | |
960 | string::size_type last_removal_start = str.length(); | |
961 | ||
962 | // Go from back to front to not mess up indices. | |
963 | // This requires that bigger comments, that contain smaller comments, come AFTER | |
964 | // the small contained comments in the comments vector (i.e. comments are ordered by | |
965 | // their closing tag, not their opening tag). This is true for results from find_html_comments | |
966 | BOOST_REVERSE_FOREACH(const CommentZone &comment, comments) | |
967 | { | |
968 | if (comment.first == string::npos) | |
969 | { | |
970 | str = str.replace(0, comment.second, ""); // comment starts "before" str --> delete from start | |
971 | break; // there can be no more | |
972 | } | |
973 | else if (comment.first >= last_removal_start) | |
974 | { | |
975 | continue; // this comment is inside another comment that we have removed already | |
976 | } | |
977 | else if (comment.second == string::npos) // comment ends "after" str --> delete until end | |
978 | { | |
979 | str = str.replace(comment.first, string::npos, ""); | |
980 | last_removal_start = comment.first; | |
981 | } | |
982 | else | |
983 | { | |
984 | str = str.replace(comment.first, comment.second-comment.first, ""); | |
985 | last_removal_start = comment.first; | |
986 | } | |
987 | } | |
988 | } | |
989 | ||
e93545dd GE |
990 | bool replace_all(string &base, const char *ist, const char *soll) |
991 | { | |
6ab3bc95 RP |
992 | string i=ist; |
993 | string s=soll; | |
994 | return replace_all(base,&i,&s); | |
e93545dd GE |
995 | } |
996 | ||
997 | bool replace_all(string &base, const string &ist, const char *soll) | |
998 | { | |
6ab3bc95 RP |
999 | string s=soll; |
1000 | return replace_all(base,&ist,&s); | |
e93545dd GE |
1001 | } |
1002 | ||
1003 | bool replace_all(string &base, const string *ist, const string *soll) | |
1004 | { | |
6ab3bc95 | 1005 | return replace_all(base,*ist,*soll); |
e93545dd GE |
1006 | } |
1007 | ||
1008 | bool replace_all(string &base, const char *ist, const string *soll) | |
1009 | { | |
6ab3bc95 RP |
1010 | string i=ist; |
1011 | return replace_all(base,&i,soll); | |
e93545dd GE |
1012 | } |
1013 | ||
1014 | bool replace_all(string &base, const string &ist, const string &soll) | |
1015 | { | |
6ab3bc95 RP |
1016 | bool found_ist = false; |
1017 | string::size_type a=0; | |
1018 | ||
1019 | if (ist.empty() ) | |
1020 | throw runtime_error ("replace_all called with empty search string"); | |
e93545dd | 1021 | |
6ab3bc95 RP |
1022 | while ( (a=base.find(ist,a) ) != string::npos) |
1023 | { | |
1024 | base.replace(a,ist.size(),soll); | |
1025 | a=a+soll.size(); | |
1026 | found_ist = true; | |
1027 | } | |
1ec2064e | 1028 | |
6ab3bc95 | 1029 | return found_ist; |
e93545dd GE |
1030 | } |
1031 | ||
b953bf36 GE |
1032 | /** |
1033 | * @brief replaces all characters that could be problematic or impose a security risk when being logged | |
1034 | * @param str the original string | |
1035 | * @param replace_with the character to replace the unsafe chars with | |
1036 | * @return a string that is safe to send to syslog or other logfiles | |
1037 | * | |
1038 | * All chars between 0x20 (space) and 0x7E (~) (including) are considered safe for logging. | |
1039 | * See e.g. RFC 5424, section 8.2 or the posix character class "printable". | |
1040 | * This eliminates all possible problems with NUL, control characters, 8 bit chars, UTF8. | |
1041 | * | |
1042 | */ | |
1043 | std::string sanitize_for_logging(const std::string &str, const char replace_with) | |
1044 | { | |
1045 | std::string output=str; | |
1046 | ||
c0e32d64 GE |
1047 | const string::size_type len = output.size(); |
1048 | for (std::string::size_type p=0; p < len; p++) | |
b953bf36 GE |
1049 | if (output[p] < 0x20 || output[p] > 0x7E) |
1050 | output[p]=replace_with; | |
1051 | ||
1052 | return output; | |
1053 | } | |
1054 | ||
e5b21dbb | 1055 | #if 0 |
e93545dd GE |
1056 | string to_lower(const string &src) |
1057 | { | |
6ab3bc95 | 1058 | string dst = src; |
e93545dd | 1059 | |
6ab3bc95 RP |
1060 | string::size_type pos, end = dst.size(); |
1061 | for (pos = 0; pos < end; pos++) | |
1062 | dst[pos] = tolower(dst[pos]); | |
e93545dd | 1063 | |
6ab3bc95 | 1064 | return dst; |
e93545dd GE |
1065 | } |
1066 | ||
1067 | string to_upper(const string &src) | |
1068 | { | |
6ab3bc95 | 1069 | string dst = src; |
e93545dd | 1070 | |
6ab3bc95 RP |
1071 | string::size_type pos, end = dst.size(); |
1072 | for (pos = 0; pos < end; pos++) | |
1073 | dst[pos] = toupper(dst[pos]); | |
e93545dd | 1074 | |
6ab3bc95 | 1075 | return dst; |
e93545dd | 1076 | } |
e5b21dbb | 1077 | #endif |
e93545dd | 1078 | |
83809f5e | 1079 | const int MAX_UNIT_FORMAT_SYMBOLS = 6; |
d1ea9075 | 1080 | |
2cb9a9c5 | 1081 | const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = { |
d1ea9075 GMF |
1082 | " B", |
1083 | " KB", | |
1084 | " MB", | |
1085 | " GB", | |
1086 | " TB", | |
83809f5e | 1087 | " PB" |
d1ea9075 GMF |
1088 | }; |
1089 | ||
2cb9a9c5 | 1090 | const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = { |
5cb766b9 GMF |
1091 | i18n_noop(" Bytes"), |
1092 | i18n_noop(" KBytes"), | |
1093 | i18n_noop(" MBytes"), | |
1094 | i18n_noop(" GBytes"), | |
1095 | i18n_noop(" TBytes"), | |
83809f5e | 1096 | i18n_noop(" PBytes") |
d1ea9075 GMF |
1097 | }; |
1098 | ||
72a94426 GMF |
1099 | |
1100 | long double rounding_upwards( | |
e91c1952 | 1101 | const long double number, |
72a94426 GMF |
1102 | const int rounding_multiplier |
1103 | ) | |
1104 | { | |
1105 | long double rounded_number; | |
1106 | rounded_number = number * rounding_multiplier; | |
1107 | rounded_number += 0.5; | |
1108 | rounded_number = (int64_t) (rounded_number); | |
1109 | rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier); | |
1110 | ||
1111 | return rounded_number; | |
1112 | } | |
1113 | ||
1114 | ||
81267544 GMF |
1115 | string nice_unit_format( |
1116 | const int64_t input, | |
70fc0674 GMF |
1117 | const UnitFormat format, |
1118 | const UnitBase base | |
81267544 | 1119 | ) |
6ab3bc95 | 1120 | { |
d1ea9075 | 1121 | // select the system of units (decimal or binary) |
81267544 | 1122 | int multiple = 0; |
a398513a | 1123 | if (base == UnitBase1000) |
81267544 GMF |
1124 | { |
1125 | multiple = 1000; | |
1126 | } | |
1127 | else | |
1128 | { | |
1129 | multiple = 1024; | |
1130 | } | |
1131 | ||
1132 | long double size = input; | |
6ab3bc95 | 1133 | |
d1ea9075 GMF |
1134 | // check the size of the input number to fit in the appropriate symbol |
1135 | int sizecount = 0; | |
81267544 | 1136 | while (size > multiple) |
6ab3bc95 | 1137 | { |
81267544 GMF |
1138 | size = size / multiple; |
1139 | sizecount++; | |
83809f5e GMF |
1140 | |
1141 | // rollback to the previous values and stop the loop when cannot | |
1142 | // represent the number length. | |
1143 | if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS) | |
1144 | { | |
1145 | size = size * multiple; | |
1146 | sizecount--; | |
1147 | break; | |
1148 | } | |
6ab3bc95 RP |
1149 | } |
1150 | ||
a398513a GMF |
1151 | // round the input number "half up" to multiples of 10 |
1152 | const int rounding_multiplier = 10; | |
72a94426 | 1153 | size = rounding_upwards(size, rounding_multiplier); |
6ab3bc95 | 1154 | |
d1ea9075 | 1155 | // format the input number, placing the appropriate symbol |
6ab3bc95 | 1156 | ostringstream out; |
6ab3bc95 | 1157 | out.setf (ios::fixed); |
a398513a | 1158 | if (format == ShortUnitFormat) |
d1ea9075 GMF |
1159 | { |
1160 | out.precision(1); | |
68d37a5c | 1161 | out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() ); |
d1ea9075 GMF |
1162 | } |
1163 | else | |
6ab3bc95 | 1164 | { |
d1ea9075 | 1165 | out.precision (2); |
68d37a5c | 1166 | out << size << i18n( longUnitFormatSymbols[sizecount].c_str() ); |
6ab3bc95 RP |
1167 | } |
1168 | ||
1169 | return out.str(); | |
1170 | } // eo nice_unit_format(int input) | |
1171 | ||
e93545dd | 1172 | |
5cd64148 CH |
1173 | string nice_unit_format( |
1174 | const double input, | |
1175 | const UnitFormat format, | |
1176 | const UnitBase base | |
1177 | ) | |
1178 | { | |
1179 | // round as double and cast to int64_t | |
1180 | // cast raised overflow error near max val of int64_t (~9.2e18, see unittest) | |
1181 | int64_t input_casted_and_rounded = | |
1182 | boost::numeric_cast<int64_t>( round(input) ); | |
1183 | ||
1184 | // now call other | |
1185 | return nice_unit_format( input_casted_and_rounded, format, base ); | |
1186 | } // eo nice_unit_format(double input) | |
1187 | ||
1188 | ||
47c07fba GE |
1189 | string escape(const string &s) |
1190 | { | |
6ab3bc95 RP |
1191 | string out(s); |
1192 | string::size_type p; | |
47c07fba | 1193 | |
6ab3bc95 RP |
1194 | p=0; |
1195 | while ( (p=out.find_first_of("\"\\",p) ) !=out.npos) | |
1196 | { | |
1197 | out.insert (p,"\\"); | |
1198 | p+=2; | |
1199 | } | |
47c07fba | 1200 | |
6ab3bc95 RP |
1201 | p=0; |
1202 | while ( (p=out.find_first_of("\r",p) ) !=out.npos) | |
1203 | { | |
1204 | out.replace (p,1,"\\r"); | |
1205 | p+=2; | |
1206 | } | |
47c07fba | 1207 | |
6ab3bc95 RP |
1208 | p=0; |
1209 | while ( (p=out.find_first_of("\n",p) ) !=out.npos) | |
1210 | { | |
1211 | out.replace (p,1,"\\n"); | |
1212 | p+=2; | |
1213 | } | |
47c07fba | 1214 | |
6ab3bc95 | 1215 | out='"'+out+'"'; |
47c07fba | 1216 | |
6ab3bc95 RP |
1217 | return out; |
1218 | } // eo scape(const std::string&) | |
47c07fba | 1219 | |
47c07fba | 1220 | |
6ab3bc95 RP |
1221 | string descape(const string &s, int startpos, int &endpos) |
1222 | { | |
1223 | string out; | |
1224 | ||
1225 | if (s.at(startpos) != '"') | |
1226 | throw out_of_range("value not type escaped string"); | |
1227 | ||
1228 | out=s.substr(startpos+1); | |
1229 | string::size_type p=0; | |
1230 | ||
1231 | // search for the end of the string | |
1232 | while ( (p=out.find("\"",p) ) !=out.npos) | |
1233 | { | |
1234 | int e=p-1; | |
1235 | bool escaped=false; | |
1236 | ||
1237 | // the " might be escaped with a backslash | |
1238 | while (e>=0 && out.at (e) =='\\') | |
1239 | { | |
1240 | if (escaped == false) | |
1241 | escaped=true; | |
1242 | else | |
1243 | escaped=false; | |
1244 | ||
1245 | e--; | |
1246 | } | |
1247 | ||
1248 | if (escaped==false) | |
1249 | break; | |
1250 | else | |
1251 | p++; | |
1252 | } | |
1253 | ||
1254 | // we now have the end of the string | |
1255 | out=out.substr(0,p); | |
1256 | ||
1257 | // tell calling prog about the endposition | |
1258 | endpos=startpos+p+1; | |
1259 | ||
1260 | // descape all \ stuff inside the string now | |
1261 | p=0; | |
1262 | while ( (p=out.find_first_of("\\",p) ) !=out.npos) | |
1263 | { | |
1264 | switch (out.at(p+1) ) | |
1265 | { | |
1266 | case 'r': | |
47c07fba GE |
1267 | out.replace(p,2,"\r"); |
1268 | break; | |
6ab3bc95 | 1269 | case 'n': |
47c07fba GE |
1270 | out.replace(p,2,"\n"); |
1271 | break; | |
6ab3bc95 | 1272 | default: |
47c07fba | 1273 | out.erase(p,1); |
6ab3bc95 RP |
1274 | } |
1275 | p++; | |
1276 | } | |
1277 | ||
1278 | return out; | |
1279 | } // eo descape(const std::string&,int,int&) | |
47c07fba | 1280 | |
e93545dd | 1281 | |
47c07fba GE |
1282 | string escape_shellarg(const string &input) |
1283 | { | |
6ab3bc95 RP |
1284 | string output = "'"; |
1285 | string::const_iterator it, it_end = input.end(); | |
83d700e9 | 1286 | for (it = input.begin(); it != it_end; ++it) |
6ab3bc95 RP |
1287 | { |
1288 | if ( (*it) == '\'') | |
1289 | output += "'\\'"; | |
1290 | ||
1291 | output += *it; | |
1292 | } | |
1293 | ||
1294 | output += "'"; | |
1295 | return output; | |
47c07fba | 1296 | } |