Commit | Line | Data |
---|---|---|
0e23f538 TJ |
1 | /* |
2 | The software in this package is distributed under the GNU General | |
3 | Public License version 2 (with a special exception described below). | |
4 | ||
5 | A copy of GNU General Public License (GPL) is included in this distribution, | |
6 | in the file COPYING.GPL. | |
7 | ||
8 | As a special exception, if other files instantiate templates or use macros | |
9 | or inline functions from this file, or you compile this file and link it | |
10 | with other works to produce a work based on this file, this file | |
11 | does not by itself cause the resulting work to be covered | |
12 | by the GNU General Public License. | |
13 | ||
14 | However the source code for this file must still be made available | |
15 | in accordance with section (3) of the GNU General Public License. | |
16 | ||
17 | This exception does not invalidate any other reasons why a work based | |
18 | on this file might be covered by the GNU General Public License. | |
19 | */ | |
6a93d84a TJ |
20 | /** @file |
21 | * | |
22 | * (c) Copyright 2007-2008 by Intra2net AG | |
6a93d84a | 23 | */ |
e93545dd GE |
24 | |
25 | #include <iostream> | |
26 | #include <string> | |
27 | #include <sstream> | |
28 | #include <stdexcept> | |
5efd35b1 | 29 | #include <algorithm> |
5cd64148 | 30 | #include <cmath> // for round() |
2bb72337 | 31 | #include <climits> |
e93545dd | 32 | |
a5f3af6e | 33 | #include <wchar.h> |
e93545dd GE |
34 | #include <stdlib.h> |
35 | #include <iconv.h> | |
36 | #include <i18n.h> | |
37 | ||
5cd64148 | 38 | #include <boost/numeric/conversion/cast.hpp> |
3f5c5ccd | 39 | #include <boost/foreach.hpp> |
5cd64148 | 40 | |
2bb72337 TJ |
41 | #include <boost/assert.hpp> |
42 | #include <boost/shared_ptr.hpp> | |
43 | #include <openssl/bio.h> | |
44 | #include <openssl/evp.h> | |
45 | ||
e93545dd GE |
46 | #include <stringfunc.hxx> |
47 | ||
48 | using namespace std; | |
49 | ||
6ab3bc95 RP |
50 | namespace I2n |
51 | { | |
6a93d84a TJ |
52 | |
53 | ||
6ab3bc95 RP |
54 | namespace |
55 | { | |
6a93d84a TJ |
56 | |
57 | const std::string hexDigitsLower("0123456789abcdef"); | |
58 | const std::string hexDigitsUpper("0123456789ABCDEF"); | |
59 | ||
60 | ||
61 | struct UpperFunc | |
62 | { | |
6ab3bc95 RP |
63 | char operator() (char c) |
64 | { | |
65 | return std::toupper(c); | |
66 | } | |
6a93d84a TJ |
67 | }; // eo struct UpperFunc |
68 | ||
69 | ||
70 | struct LowerFunc | |
71 | { | |
6ab3bc95 RP |
72 | char operator() (char c) |
73 | { | |
74 | return std::tolower(c); | |
75 | } | |
6a93d84a TJ |
76 | }; // eo struct LowerFunc |
77 | ||
78 | ||
79 | } // eo namespace <anonymous> | |
80 | ||
81 | ||
82 | ||
83 | /** | |
6ab3bc95 | 84 | * default list of Whitespaces (" \t\r\n"); |
6a93d84a | 85 | */ |
6ab3bc95 | 86 | const std::string Whitespaces = " \t\r\n"; |
6a93d84a TJ |
87 | |
88 | /** | |
89 | * default list of lineendings ("\r\n"); | |
90 | */ | |
6ab3bc95 | 91 | const std::string LineEndings= "\r\n"; |
6a93d84a TJ |
92 | |
93 | ||
94 | ||
95 | /** | |
96 | * @brief checks if a string begins with a given prefix. | |
97 | * @param[in,out] str the string which is tested | |
98 | * @param prefix the prefix which should be tested for. | |
99 | * @return @a true iff the prefix is not empty and the string begins with that prefix. | |
100 | */ | |
6ab3bc95 | 101 | bool has_prefix(const std::string& str, const std::string& prefix) |
6a93d84a | 102 | { |
6ab3bc95 RP |
103 | if (prefix.empty() || str.empty() || str.size() < prefix.size() ) |
104 | { | |
105 | return false; | |
106 | } | |
107 | return str.compare(0, prefix.size(), prefix) == 0; | |
108 | } // eo has_prefix(const std::string&,const std::string&) | |
6a93d84a TJ |
109 | |
110 | ||
111 | /** | |
112 | * @brief checks if a string ends with a given suffix. | |
113 | * @param[in,out] str the string which is tested | |
114 | * @param suffix the suffix which should be tested for. | |
115 | * @return @a true iff the suffix is not empty and the string ends with that suffix. | |
116 | */ | |
6ab3bc95 | 117 | bool has_suffix(const std::string& str, const std::string& suffix) |
6a93d84a | 118 | { |
6ab3bc95 RP |
119 | if (suffix.empty() || str.empty() || str.size() < suffix.size() ) |
120 | { | |
121 | return false; | |
122 | } | |
123 | return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; | |
124 | } // eo has_suffix(const std::string&,const std::string&) | |
6a93d84a TJ |
125 | |
126 | ||
127 | /** | |
128 | * cut off characters from a given list from front and end of a string. | |
129 | * @param[in,out] str the string which should be trimmed. | |
130 | * @param charlist the list of characters to remove from beginning and end of string | |
131 | * @return the result string. | |
132 | */ | |
6ab3bc95 RP |
133 | std::string trim_mod(std::string& str, const std::string& charlist) |
134 | { | |
135 | // first: trim the beginning: | |
136 | std::string::size_type pos= str.find_first_not_of (charlist); | |
137 | if (pos == std::string::npos) | |
138 | { | |
139 | // whole string consists of charlist (or is already empty) | |
140 | str.clear(); | |
141 | return str; | |
142 | } | |
143 | else if (pos>0) | |
144 | { | |
145 | // str starts with charlist | |
146 | str.erase(0,pos); | |
147 | } | |
148 | // now let's look at the tail: | |
149 | pos= str.find_last_not_of(charlist) +1; // note: we already know there is at least one other char! | |
150 | if ( pos < str.size() ) | |
151 | { | |
152 | str.erase(pos, str.size()-pos); | |
153 | } | |
154 | return str; | |
155 | } // eo trim_mod(std::string&,const std::string&) | |
6a93d84a TJ |
156 | |
157 | ||
158 | ||
159 | /** | |
160 | * removes last character from a string when it is in a list of chars to be removed. | |
161 | * @param[in,out] str the string. | |
162 | * @param what the list of chars which will be tested for. | |
163 | * @return the resulting string with last char removed (if applicable) | |
164 | */ | |
6ab3bc95 | 165 | std::string chomp_mod(std::string& str, const std::string& what) |
6a93d84a | 166 | { |
6ab3bc95 RP |
167 | if (str.empty() || what.empty() ) |
168 | { | |
169 | return str; | |
170 | } | |
171 | if (what.find(str.at (str.size()-1) ) != std::string::npos) | |
172 | { | |
173 | str.erase(str.size() - 1); | |
174 | } | |
175 | return str; | |
176 | } // eo chomp_mod(std::string&,const std::string&) | |
6a93d84a TJ |
177 | |
178 | ||
179 | /** | |
180 | * @brief converts a string to lower case. | |
181 | * @param[in,out] str the string to modify. | |
182 | * @return the string | |
183 | */ | |
6ab3bc95 | 184 | std::string to_lower_mod(std::string& str) |
6a93d84a | 185 | { |
6ab3bc95 RP |
186 | std::transform(str.begin(), str.end(), str.begin(), LowerFunc() ); |
187 | return str; | |
188 | } // eo to_lower_mod(std::string&) | |
6a93d84a TJ |
189 | |
190 | ||
191 | /** | |
192 | * @brief converts a string to upper case. | |
193 | * @param[in,out] str the string to modify. | |
194 | * @return the string | |
195 | */ | |
6ab3bc95 | 196 | std::string to_upper_mod(std::string& str) |
6a93d84a | 197 | { |
6ab3bc95 RP |
198 | std::transform( str.begin(), str.end(), str.begin(), UpperFunc() ); |
199 | return str; | |
200 | } // eo to_upper_mod(std::string&) | |
6a93d84a TJ |
201 | |
202 | ||
203 | ||
204 | /** | |
205 | * cut off characters from a given list from front and end of a string. | |
206 | * @param str the string which should be trimmed. | |
207 | * @param charlist the list of characters to remove from beginning and end of string | |
208 | * @return the result string. | |
209 | */ | |
6ab3bc95 RP |
210 | std::string trim (const std::string& str, const std::string& charlist) |
211 | { | |
212 | // first: trim the beginning: | |
213 | std::string::size_type pos0= str.find_first_not_of(charlist); | |
214 | if (pos0 == std::string::npos) | |
215 | { | |
216 | // whole string consists of charlist (or is already empty) | |
217 | return std::string(); | |
218 | } | |
219 | // now let's look at the end: | |
220 | std::string::size_type pos1= str.find_last_not_of(charlist); | |
221 | return str.substr(pos0, pos1 - pos0 + 1); | |
6a93d84a TJ |
222 | } // eo trim(const std:.string&,const std::string&) |
223 | ||
224 | ||
225 | /** | |
226 | * removes last character from a string when it is in a list of chars to be removed. | |
227 | * @param str the string. | |
228 | * @param what the list of chars which will be tested for. | |
229 | * @return the resulting string with last char removed (if applicable) | |
230 | */ | |
6ab3bc95 RP |
231 | std::string chomp (const std::string& str, const std::string& what) |
232 | { | |
233 | if (str.empty() || what.empty() ) | |
234 | { | |
235 | return str; | |
236 | } | |
237 | if (what.find(str.at (str.size()-1) ) != std::string::npos) | |
238 | { | |
239 | return str.substr(0, str.size()-1); | |
240 | } | |
241 | return str; | |
6a93d84a TJ |
242 | } // eo chomp(const std:.string&,const std::string&) |
243 | ||
244 | ||
245 | /** | |
246 | * @brief returns a lower case version of a given string. | |
247 | * @param str the string | |
248 | * @return the lower case version of the string | |
249 | */ | |
6ab3bc95 | 250 | std::string to_lower (const std::string& str) |
6a93d84a | 251 | { |
6ab3bc95 RP |
252 | std::string result(str); |
253 | return to_lower_mod(result); | |
254 | } // eo to_lower(const std::string&) | |
6a93d84a TJ |
255 | |
256 | ||
257 | /** | |
258 | * @brief returns a upper case version of a given string. | |
259 | * @param str the string | |
260 | * @return the upper case version of the string | |
261 | */ | |
6ab3bc95 | 262 | std::string to_upper(const std::string& str) |
6a93d84a | 263 | { |
6ab3bc95 RP |
264 | std::string result(str); |
265 | return to_upper_mod(result); | |
266 | } // eo to_upper(const std::string&) | |
6a93d84a TJ |
267 | |
268 | ||
269 | ||
270 | /** | |
271 | * @brief removes a given suffix from a string. | |
272 | * @param str the string. | |
273 | * @param suffix the suffix which should be removed if the string ends with it. | |
274 | * @return the string without the suffix. | |
275 | * | |
276 | * If the string ends with the suffix, it is removed. If the the string doesn't end | |
277 | * with the suffix the original string is returned. | |
278 | */ | |
6ab3bc95 | 279 | std::string remove_suffix(const std::string& str, const std::string& suffix) |
6a93d84a | 280 | { |
6ab3bc95 RP |
281 | if (has_suffix(str,suffix) ) |
282 | { | |
283 | return str.substr(0, str.size()-suffix.size() ); | |
284 | } | |
285 | return str; | |
286 | } // eo remove_suffix(const std::string&,const std::string&) | |
6a93d84a TJ |
287 | |
288 | ||
289 | ||
290 | /** | |
291 | * @brief removes a given prefix from a string. | |
292 | * @param str the string. | |
293 | * @param prefix the prefix which should be removed if the string begins with it. | |
294 | * @return the string without the prefix. | |
295 | * | |
296 | * If the string begins with the prefix, it is removed. If the the string doesn't begin | |
297 | * with the prefix the original string is returned. | |
298 | */ | |
6ab3bc95 | 299 | std::string remove_prefix(const std::string& str, const std::string& prefix) |
6a93d84a | 300 | { |
6ab3bc95 RP |
301 | if (has_prefix(str,prefix) ) |
302 | { | |
303 | return str.substr( prefix.size() ); | |
304 | } | |
305 | return str; | |
306 | } // eo remove_prefix(const std::string&,const std::string&) | |
6a93d84a TJ |
307 | |
308 | ||
309 | /** | |
310 | * split a string to key and value delimited by a given delimiter. | |
6ab3bc95 | 311 | * The resulting key and value strings are trimmed (Whitespaces removed at beginning and end). |
6a93d84a TJ |
312 | * @param str the string which should be splitted. |
313 | * @param[out] key the resulting key | |
314 | * @param[out] value the resulting value | |
315 | * @param delimiter the delimiter between key and value; default is '='. | |
316 | * @return @a true if the split was successful. | |
317 | */ | |
6ab3bc95 RP |
318 | bool pair_split( |
319 | const std::string& str, | |
320 | std::string& key, | |
321 | std::string& value, | |
322 | char delimiter) | |
323 | { | |
324 | std::string::size_type pos = str.find (delimiter); | |
325 | if (pos == std::string::npos) return false; | |
326 | key= str.substr(0,pos); | |
327 | value= str.substr(pos+1); | |
328 | trim_mod(key); | |
329 | trim_mod(value); | |
330 | return true; | |
331 | } // eo pair_split(const std::string&,std::string&,std::string&,char) | |
6a93d84a TJ |
332 | |
333 | ||
334 | /** | |
335 | * splits a string by given delimiter | |
336 | * | |
337 | * @param[in] str the string which should be splitted. | |
338 | * @param[out] result the list resulting from splitting @a str. | |
339 | * @param[in] delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
340 | * @param[in] omit_empty should empty parts not be stored? | |
341 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
342 | * (empty string results in no trim) | |
343 | */ | |
6ab3bc95 RP |
344 | void split_string( |
345 | const std::string& str, | |
346 | std::list<std::string>& result, | |
347 | const std::string& delimiter, | |
348 | bool omit_empty, | |
349 | const std::string& trim_list | |
6a93d84a TJ |
350 | ) |
351 | { | |
6ab3bc95 RP |
352 | std::string::size_type pos, last_pos=0; |
353 | bool delimiter_found= false; | |
354 | while ( last_pos < str.size() && last_pos != std::string::npos) | |
355 | { | |
356 | pos= str.find(delimiter, last_pos); | |
357 | std::string part; | |
358 | if (pos == std::string::npos) | |
359 | { | |
360 | part= str.substr(last_pos); | |
361 | delimiter_found= false; | |
362 | } | |
363 | else | |
364 | { | |
365 | part= str.substr(last_pos, pos-last_pos); | |
366 | delimiter_found=true; | |
367 | } | |
368 | if (pos != std::string::npos) | |
369 | { | |
370 | last_pos= pos+ delimiter.size(); | |
371 | } | |
372 | else | |
373 | { | |
374 | last_pos= std::string::npos; | |
375 | } | |
376 | if (!trim_list.empty() ) trim_mod (part, trim_list); | |
377 | if (omit_empty && part.empty() ) continue; | |
378 | result.push_back( part ); | |
379 | } | |
380 | // if the string ends with a delimiter we need to append an empty string if no omit_empty | |
381 | // was given. | |
382 | // (this way we keep the split result consistent to a join operation) | |
383 | if (delimiter_found && !omit_empty) | |
384 | { | |
385 | result.push_back(""); | |
386 | } | |
387 | } // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&) | |
6a93d84a TJ |
388 | |
389 | ||
338da253 CH |
390 | /** call split_string with list<string>, converts result to vector; vector is clear()-ed first |
391 | * | |
392 | * Note: Uses 3 O(n)-operations: list.size, vector.resize and std::swap_ranges; | |
393 | * not sure whether there is a better way to do this | |
394 | * */ | |
395 | void split_string( | |
396 | const std::string& str, | |
397 | std::vector<std::string>& result, | |
398 | const std::string& delimiter, | |
399 | bool omit_empty, | |
400 | const std::string& trim_list | |
401 | ) | |
402 | { | |
403 | std::list<std::string> tmp; | |
404 | split_string(str, tmp, delimiter, omit_empty, trim_list); | |
405 | std::size_t size = tmp.size(); // this is O(n) | |
406 | result.clear(); | |
407 | result.resize(size); // also O(n) | |
408 | std::swap_ranges(tmp.begin(), tmp.end(), result.begin()); // also O(n) | |
409 | } | |
410 | ||
6a93d84a TJ |
411 | /** |
412 | * splits a string by a given delimiter | |
413 | * @param str the string which should be splitted. | |
414 | * @param delimiter delimiter the delimiter (word/phrase) at which @a str should be splitted. | |
415 | * @param[in] omit_empty should empty parts not be stored? | |
416 | * @param[in] trim_list list of characters the parts should be trimmed by. | |
417 | * (empty string results in no trim) | |
418 | * @return the list resulting from splitting @a str. | |
419 | */ | |
6ab3bc95 RP |
420 | std::list<std::string> split_string( |
421 | const std::string& str, | |
422 | const std::string& delimiter, | |
423 | bool omit_empty, | |
424 | const std::string& trim_list | |
6a93d84a TJ |
425 | ) |
426 | { | |
6ab3bc95 RP |
427 | std::list<std::string> result; |
428 | split_string(str, result, delimiter, omit_empty, trim_list); | |
429 | return result; | |
430 | } // eo split_string(const std::string&,const std::string&,bool,const std::string&) | |
6a93d84a TJ |
431 | |
432 | ||
433 | /** | |
434 | * @brief joins a list of strings into a single string. | |
435 | * | |
6ab3bc95 RP |
436 | * This funtion is (basically) the reverse operation of @a split_string. |
437 | * | |
6a93d84a TJ |
438 | * @param parts the list of strings. |
439 | * @param delimiter the delimiter which is inserted between the strings. | |
440 | * @return the joined string. | |
441 | */ | |
6ab3bc95 RP |
442 | std::string join_string( |
443 | const std::list< std::string >& parts, | |
444 | const std::string& delimiter | |
6a93d84a TJ |
445 | ) |
446 | { | |
6ab3bc95 RP |
447 | std::string result; |
448 | if (! parts.empty() ) | |
449 | { | |
450 | std::list< std::string >::const_iterator it= parts.begin(); | |
451 | result = *it; | |
452 | while ( ++it != parts.end() ) | |
453 | { | |
454 | result+= delimiter; | |
455 | result+= *it; | |
456 | } | |
457 | } | |
458 | return result; | |
459 | } // eo join_string(const std::list< std::string >&,const std::string&) | |
6a93d84a TJ |
460 | |
461 | ||
376ec4fa CH |
462 | /** @brief same as join_string for list, except uses a vector */ |
463 | std::string join_string( | |
464 | const std::vector< std::string >& parts, | |
465 | const std::string& delimiter | |
466 | ) | |
467 | { | |
468 | std::string result; | |
469 | if (! parts.empty() ) | |
470 | { | |
471 | std::vector< std::string >::const_iterator it= parts.begin(); | |
472 | result = *it; | |
473 | while ( ++it != parts.end() ) | |
474 | { | |
475 | result+= delimiter; | |
476 | result+= *it; | |
477 | } | |
478 | } | |
479 | return result; | |
480 | } // eo join_string(const std::vector< std::string >&,const std::string&) | |
481 | ||
0aaf13b5 PG |
482 | /** @brief same as join_string for list, except uses a set */ |
483 | std::string join_string( | |
484 | const std::set< std::string >& parts, | |
485 | const std::string& delimiter | |
486 | ) | |
487 | { | |
488 | std::string result; | |
489 | ||
490 | if (! parts.empty() ) | |
491 | { | |
492 | BOOST_FOREACH(const std::string &part, parts) | |
493 | { | |
494 | if (!result.empty ()) | |
495 | { | |
496 | result += delimiter; | |
497 | } | |
498 | result += part; | |
499 | } | |
500 | } | |
501 | ||
502 | return result; | |
503 | } // eo join_string(const std::vector< std::string >&,const std::string&) | |
504 | ||
4f7a7b9f PG |
505 | std::string join_string ( |
506 | const char *const parts[], /* assumed NULL-terminated */ | |
507 | const std::string& delimiter | |
508 | ) | |
509 | { | |
510 | std::string result; | |
511 | ||
512 | if (parts != NULL) | |
513 | { | |
514 | const char *const *cur = parts; | |
515 | ||
516 | if (*cur != NULL) { | |
517 | result = std::string (*cur); | |
518 | ||
519 | while (*++cur != NULL) { | |
520 | result += delimiter; | |
521 | result += std::string (*cur); | |
522 | } | |
523 | } | |
524 | } | |
525 | ||
526 | return result; | |
527 | } | |
528 | ||
376ec4fa | 529 | |
6a93d84a TJ |
530 | |
531 | /* | |
532 | ** conversions | |
533 | */ | |
534 | ||
535 | ||
536 | /** | |
537 | * @brief returns a hex string from a binary string. | |
538 | * @param str the (binary) string | |
539 | * @param upper_case_digits determine whether to use upper case characters for digits A-F. | |
540 | * @return the string in hex notation. | |
541 | */ | |
6ab3bc95 RP |
542 | std::string convert_binary_to_hex( |
543 | const std::string& str, | |
544 | bool upper_case_digits | |
6a93d84a TJ |
545 | ) |
546 | { | |
6ab3bc95 RP |
547 | std::string result; |
548 | std::string hexDigits(upper_case_digits ? hexDigitsUpper : hexDigitsLower); | |
549 | for ( std::string::const_iterator it= str.begin(); | |
550 | it != str.end(); | |
551 | ++it) | |
552 | { | |
553 | result.push_back( hexDigits[ ( (*it) >> 4) & 0x0f ] ); | |
554 | result.push_back( hexDigits[ (*it) & 0x0f ] ); | |
555 | } | |
556 | return result; | |
557 | } // eo convert_binary_to_hex(const std::string&,bool) | |
6a93d84a TJ |
558 | |
559 | ||
560 | /** | |
561 | * @brief converts a hex digit string to binary string. | |
562 | * @param str hex digit string | |
563 | * @return the binary string. | |
564 | * | |
565 | * The hex digit string may contains white spaces or colons which are treated | |
566 | * as delimiters between hex digit groups. | |
567 | * | |
568 | * @todo rework the handling of half nibbles (consistency)! | |
569 | */ | |
6ab3bc95 RP |
570 | std::string convert_hex_to_binary( |
571 | const std::string& str | |
6a93d84a | 572 | ) |
6ab3bc95 RP |
573 | throw (std::runtime_error) |
574 | { | |
575 | std::string result; | |
576 | char c= 0; | |
577 | bool hasNibble= false; | |
578 | bool lastWasWS= true; | |
579 | for ( std::string::const_iterator it= str.begin(); | |
580 | it != str.end(); | |
581 | ++it) | |
582 | { | |
583 | std::string::size_type p = hexDigitsLower.find( *it ); | |
584 | if (p== std::string::npos) | |
585 | { | |
586 | p= hexDigitsUpper.find( *it ); | |
587 | } | |
588 | if (p == std::string::npos) | |
589 | { | |
590 | if ( ( Whitespaces.find( *it ) != std::string::npos) // is it a whitespace? | |
6a93d84a | 591 | or ( *it == ':') // or a colon? |
6ab3bc95 RP |
592 | ) |
593 | { | |
594 | // we treat that as a valid delimiter: | |
595 | if (hasNibble) | |
6a93d84a | 596 | { |
6ab3bc95 RP |
597 | // 1 nibble before WS is treate as lower part: |
598 | result.push_back(c); | |
599 | // reset state: | |
600 | hasNibble= false; | |
6a93d84a | 601 | } |
6ab3bc95 RP |
602 | lastWasWS= true; |
603 | continue; | |
604 | } | |
605 | } | |
606 | if (p == std::string::npos ) | |
607 | { | |
608 | throw runtime_error("illegal character in hex digit string: " + str); | |
609 | } | |
610 | lastWasWS= false; | |
611 | if (hasNibble) | |
612 | { | |
613 | c<<=4; | |
614 | } | |
615 | else | |
616 | { | |
617 | c=0; | |
618 | } | |
619 | c+= (p & 0x0f); | |
620 | if (hasNibble) | |
621 | { | |
622 | //we already had a nibble, so a char is complete now: | |
623 | result.push_back( c ); | |
624 | hasNibble=false; | |
625 | } | |
626 | else | |
627 | { | |
628 | // this is the first nibble of a new char: | |
629 | hasNibble=true; | |
630 | } | |
631 | } | |
632 | if (hasNibble) | |
633 | { | |
634 | //well, there is one nibble left | |
635 | // let's do some heuristics: | |
636 | if (lastWasWS) | |
637 | { | |
638 | // if the preceeding character was a white space (or a colon) | |
639 | // we treat the nibble as lower part: | |
640 | //( this is consistent with shortened hex notations where leading zeros are not noted) | |
641 | result.push_back( c ); | |
642 | } | |
643 | else | |
644 | { | |
645 | // if it was part of a hex digit chain, we treat it as UPPER part (!!) | |
646 | result.push_back( c << 4 ); | |
647 | } | |
648 | } | |
649 | return result; | |
650 | } // eo convert_hex_to_binary(const std::string&) | |
651 | ||
652 | ||
1a0267e5 CH |
653 | static list<string>& alloc_template_starts() |
654 | { | |
655 | static list<string> result; | |
656 | if (result.empty()) | |
657 | { | |
658 | result.push_back("std::list"); | |
659 | result.push_back("std::vector"); | |
660 | } | |
661 | return result; | |
662 | } | |
663 | ||
664 | string shorten_stl_types(const string &input) | |
665 | { | |
666 | string output = input; | |
667 | ||
668 | // first: replace fixed string for std::string | |
669 | replace_all(output, "std::basic_string<char, std::char_traits<char>, std::allocator<char> >", | |
670 | "std::string"); | |
671 | ||
672 | // loop over list/vector/... that have an allocator, e.g. | |
673 | // std::list< some_type_here, std::allocator<some_type_here> > | |
674 | string::size_type start, comma, end, len, start_text_len; | |
675 | int n_open_brackets; | |
676 | string allocator_text; | |
677 | BOOST_FOREACH(const string &start_text, alloc_template_starts()) | |
678 | { | |
679 | start = 0; | |
680 | comma = 0; | |
681 | end = 0; | |
682 | start_text_len = start_text.length(); | |
683 | while( (start=output.find(start_text+"<", start)) != string::npos ) | |
684 | { | |
685 | len = output.length(); | |
686 | start += start_text_len+1; // start next iter and tests here after opening bracket | |
687 | ||
688 | // now comes the tricky part: find matching ',' and the closing '>' even if "subtype" is template again | |
689 | comma = start; | |
690 | n_open_brackets = 1; // the bracket right after start_text counts as first | |
691 | while (comma < len && n_open_brackets > 0) | |
692 | { | |
693 | if (output[comma] == ',' && n_open_brackets == 1) | |
694 | break; | |
695 | else if (output[comma] == '<') | |
696 | ++n_open_brackets; | |
697 | else if (output[comma] == '>') | |
698 | --n_open_brackets; | |
699 | ++comma; | |
700 | } | |
701 | end = comma+1; | |
702 | while (end < len && n_open_brackets > 0) | |
703 | { | |
704 | if (output[end] == '<') | |
705 | ++n_open_brackets; | |
706 | else if (output[end] == '>') | |
707 | { | |
708 | --n_open_brackets; | |
709 | if (n_open_brackets == 0) | |
710 | break; // do not increment end | |
711 | } | |
712 | ++end; | |
713 | } | |
714 | ||
715 | // check that start < comma < end < len && n_open_brackets == 0 | |
716 | if (start >= comma || comma >= end || end >= len || n_open_brackets != 0) | |
717 | continue; // input seems to be of unexpected form | |
718 | ||
719 | // check that type in allocator is same as until comma | |
720 | string type = output.substr(start, comma-start); | |
721 | if (type[type.length()-1] == '>') | |
722 | allocator_text = string("std::allocator<") + type + " > "; | |
723 | else | |
724 | allocator_text = string("std::allocator<") + type + "> "; | |
725 | if (output.substr(comma+2, end-comma-2) == allocator_text) | |
726 | output.replace(comma+2, end-comma-2, "_alloc_"); | |
727 | } | |
728 | } | |
729 | ||
730 | return output; | |
731 | } | |
732 | ||
2bb72337 TJ |
733 | typedef boost::shared_ptr<BIO> BIO_Ptr; |
734 | ||
735 | /** | |
736 | * @brief Converts openssl generic input/output to std::string | |
737 | * | |
738 | * Code adapted from keymakerd. | |
739 | * | |
740 | * @param bio Openssl's generic input/output | |
741 | * @return :string STL string | |
742 | **/ | |
743 | static std::string _convert_BIO_to_string(BIO *input) | |
744 | { | |
745 | std::string rtn; | |
746 | ||
747 | char *output = NULL; | |
748 | long written = BIO_get_mem_data(input, &output); | |
749 | if (written <= 0 || output == NULL) | |
750 | return rtn; | |
751 | ||
752 | rtn.assign(output, written); //lint !e534 !e732 | |
753 | return rtn; | |
754 | } //lint !e1764 | |
755 | ||
756 | /** | |
757 | * @brief base64 encode a string using OpenSSL base64 functions | |
758 | * | |
759 | * Data size limit is 2GB on 32 bit (LONG_MAX) | |
760 | * | |
761 | * @param input String to encode | |
1ebab1e3 | 762 | * @param one_line Encode all data as one line, no wrapping with line feeds |
2bb72337 TJ |
763 | * @return base64 encoded string |
764 | */ | |
1ebab1e3 | 765 | std::string base64_encode(const std::string &input, bool one_line) |
2bb72337 TJ |
766 | { |
767 | // check for empty buffer | |
768 | if (input.empty()) | |
769 | return input; | |
770 | ||
771 | // safety check to ensure our check afer BIO_write() works | |
772 | if (input.size() >= LONG_MAX) | |
773 | throw runtime_error("base64 encode: Too much data"); | |
774 | ||
775 | // setup encoder. Note: BIO_free_all frees both BIOs. | |
776 | BIO_Ptr base64_encoder(BIO_new(BIO_f_base64()), BIO_free_all); | |
777 | BIO *encoder_bio = base64_encoder.get(); | |
1ebab1e3 TJ |
778 | if (one_line) |
779 | BIO_set_flags(encoder_bio, BIO_FLAGS_BASE64_NO_NL); | |
2bb72337 TJ |
780 | |
781 | // chain output buffer and encoder together | |
782 | BIO *encoded_result = BIO_new(BIO_s_mem()); | |
783 | BIO_push(encoder_bio, encoded_result); | |
784 | ||
785 | // encode | |
786 | long written = BIO_write(encoder_bio, input.c_str(), input.size()); | |
787 | if ((unsigned)written != input.size()) | |
788 | { | |
789 | ostringstream out; | |
790 | out << "base64 encoding failed: input size: " | |
791 | << input.size() << " vs. output size: " << written; | |
792 | throw runtime_error(out.str()); | |
793 | } | |
794 | if (BIO_flush(encoder_bio) != 1) | |
795 | throw runtime_error("base64 encode: BIO_flush() failed"); | |
796 | ||
797 | return _convert_BIO_to_string(encoded_result); | |
798 | } | |
799 | ||
800 | /** | |
801 | * @brief base64 decode a string using OpenSSL base64 functions | |
802 | * | |
803 | * @param input String to decode | |
1ebab1e3 | 804 | * @param one_line Expect all base64 data in one line. Input with line feeds will fail. |
2bb72337 TJ |
805 | * @return base64 decoded string |
806 | */ | |
1ebab1e3 | 807 | std::string base64_decode(const std::string &input, bool one_line) |
2bb72337 TJ |
808 | { |
809 | // check for empty buffer | |
810 | if (input.empty()) | |
811 | return input; | |
812 | ||
813 | // safety check for BIO_new_mem_buf() | |
814 | if (input.size() >= INT_MAX) | |
815 | throw runtime_error("base64 decode: Too much data"); | |
816 | ||
817 | // setup encoder. Note: BIO_free_all frees both BIOs. | |
818 | BIO_Ptr base64_decoder(BIO_new(BIO_f_base64()), BIO_free_all); | |
819 | BIO *bio_base64 = base64_decoder.get(); | |
1ebab1e3 TJ |
820 | if (one_line) |
821 | BIO_set_flags(bio_base64, BIO_FLAGS_BASE64_NO_NL); | |
2bb72337 TJ |
822 | |
823 | // chain input buffer and decoder together | |
824 | BIO *bio_input = BIO_new_mem_buf((void*)input.c_str(), input.size()); | |
825 | bio_input = BIO_push(bio_base64, bio_input); | |
826 | ||
827 | BIO_Ptr decoded_result(BIO_new(BIO_s_mem()), BIO_free_all); | |
828 | BIO *bio_decoded = decoded_result.get(); | |
829 | const int convbuf_size = 512; | |
830 | char convbuf[convbuf_size]; | |
831 | ||
832 | long read_bytes = 0; | |
833 | while((read_bytes = BIO_read(bio_input, convbuf, convbuf_size)) > 0) | |
834 | { | |
835 | BOOST_ASSERT(read_bytes <= convbuf_size); | |
836 | long written_bytes = BIO_write(bio_decoded, convbuf, read_bytes); | |
837 | if (written_bytes != read_bytes) | |
838 | { | |
839 | ostringstream out; | |
840 | out << "base64 decoding failed: read_bytes: " | |
841 | << read_bytes << " vs. written_bytes: " << written_bytes; | |
842 | throw runtime_error(out.str()); | |
843 | } | |
844 | } | |
845 | if (read_bytes == -2 || read_bytes == -1) | |
846 | throw runtime_error("base64 decode: Error during decoding"); | |
847 | ||
848 | return _convert_BIO_to_string(bio_decoded); | |
849 | } | |
850 | ||
6ab3bc95 RP |
851 | } // eo namespace I2n |
852 | ||
853 | ||
854 | ||
6a93d84a | 855 | |
e93545dd GE |
856 | std::string iso_to_utf8(const std::string& isostring) |
857 | { | |
6ab3bc95 | 858 | string result; |
118e216e | 859 | |
6ab3bc95 | 860 | iconv_t i2utf8 = iconv_open("UTF-8", "ISO-8859-1"); |
118e216e | 861 | |
6ab3bc95 RP |
862 | if (iso_to_utf8 == (iconv_t)-1) |
863 | throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8"); | |
118e216e | 864 | |
6ab3bc95 RP |
865 | size_t in_size=isostring.size(); |
866 | size_t out_size=in_size*4; | |
118e216e | 867 | |
6ab3bc95 RP |
868 | char *buf = (char *)malloc(out_size+1); |
869 | if (buf == NULL) | |
870 | throw runtime_error("out of memory for iconv buffer"); | |
e93545dd | 871 | |
5a4ecb51 | 872 | char *in = (char *)isostring.c_str(); |
6ab3bc95 RP |
873 | char *out = buf; |
874 | iconv(i2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 875 | |
6ab3bc95 | 876 | buf[isostring.size()*4-out_size]=0; |
118e216e | 877 | |
6ab3bc95 | 878 | result=buf; |
118e216e | 879 | |
6ab3bc95 RP |
880 | free(buf); |
881 | iconv_close(i2utf8); | |
118e216e | 882 | |
6ab3bc95 | 883 | return result; |
e93545dd GE |
884 | } |
885 | ||
886 | std::string utf8_to_iso(const std::string& utf8string) | |
887 | { | |
6ab3bc95 | 888 | string result; |
118e216e | 889 | |
6ab3bc95 | 890 | iconv_t utf82iso = iconv_open("ISO-8859-1","UTF-8"); |
118e216e | 891 | |
6ab3bc95 RP |
892 | if (utf82iso == (iconv_t)-1) |
893 | throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1"); | |
118e216e | 894 | |
6ab3bc95 RP |
895 | size_t in_size=utf8string.size(); |
896 | size_t out_size=in_size; | |
118e216e | 897 | |
6ab3bc95 RP |
898 | char *buf = (char *)malloc(out_size+1); |
899 | if (buf == NULL) | |
900 | throw runtime_error("out of memory for iconv buffer"); | |
e93545dd | 901 | |
5a4ecb51 | 902 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 RP |
903 | char *out = buf; |
904 | iconv(utf82iso, &in, &in_size, &out, &out_size); | |
118e216e | 905 | |
6ab3bc95 | 906 | buf[utf8string.size()-out_size]=0; |
118e216e | 907 | |
6ab3bc95 | 908 | result=buf; |
118e216e | 909 | |
6ab3bc95 RP |
910 | free(buf); |
911 | iconv_close(utf82iso); | |
e93545dd | 912 | |
6ab3bc95 | 913 | return result; |
e93545dd GE |
914 | } |
915 | ||
a5f3af6e GE |
916 | wchar_t* utf8_to_wbuf(const std::string& utf8string) |
917 | { | |
6ab3bc95 | 918 | iconv_t utf82wstr = iconv_open("UCS-4LE","UTF-8"); |
a5f3af6e | 919 | |
6ab3bc95 RP |
920 | if (utf82wstr == (iconv_t)-1) |
921 | throw runtime_error("iconv can't convert from UTF-8 to UCS-4"); | |
a5f3af6e | 922 | |
6ab3bc95 RP |
923 | size_t in_size=utf8string.size(); |
924 | size_t out_size= (in_size+1)*sizeof(wchar_t); | |
a5f3af6e | 925 | |
6ab3bc95 RP |
926 | wchar_t *buf = (wchar_t *)malloc(out_size); |
927 | if (buf == NULL) | |
928 | throw runtime_error("out of memory for iconv buffer"); | |
a5f3af6e | 929 | |
5a4ecb51 | 930 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 | 931 | char *out = (char*) buf; |
dbd6d77c | 932 | if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1) |
6ab3bc95 | 933 | throw runtime_error("error converting char encodings"); |
a5f3af6e | 934 | |
6ab3bc95 | 935 | buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0; |
a5f3af6e | 936 | |
6ab3bc95 | 937 | iconv_close(utf82wstr); |
a5f3af6e | 938 | |
6ab3bc95 | 939 | return buf; |
a5f3af6e GE |
940 | } |
941 | ||
13cc4db1 | 942 | std::string utf7imap_to_utf8(const std::string& utf7imapstring) |
d116a071 | 943 | { |
6ab3bc95 | 944 | string result; |
118e216e | 945 | |
6ab3bc95 | 946 | iconv_t utf7imap2utf8 = iconv_open("UTF-8","UTF-7-IMAP"); |
118e216e | 947 | |
6ab3bc95 RP |
948 | if (utf7imap2utf8 == (iconv_t)-1) |
949 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
118e216e | 950 | |
6ab3bc95 RP |
951 | size_t in_size=utf7imapstring.size(); |
952 | size_t out_size=in_size*4; | |
118e216e | 953 | |
6ab3bc95 RP |
954 | char *buf = (char *)malloc(out_size+1); |
955 | if (buf == NULL) | |
956 | throw runtime_error("out of memory for iconv buffer"); | |
d116a071 | 957 | |
5a4ecb51 | 958 | char *in = (char *)utf7imapstring.c_str(); |
6ab3bc95 RP |
959 | char *out = buf; |
960 | iconv(utf7imap2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 961 | |
6ab3bc95 | 962 | buf[utf7imapstring.size()*4-out_size]=0; |
118e216e | 963 | |
6ab3bc95 | 964 | result=buf; |
118e216e | 965 | |
6ab3bc95 RP |
966 | free(buf); |
967 | iconv_close(utf7imap2utf8); | |
118e216e | 968 | |
6ab3bc95 | 969 | return result; |
118e216e TJ |
970 | } |
971 | ||
6a2b6dd1 TJ |
972 | std::string utf8_to_utf7imap(const std::string& utf8string) |
973 | { | |
6ab3bc95 | 974 | string result; |
6a2b6dd1 | 975 | |
6ab3bc95 | 976 | iconv_t utf82utf7imap = iconv_open("UTF-7-IMAP", "UTF-8"); |
6a2b6dd1 | 977 | |
6ab3bc95 RP |
978 | if (utf82utf7imap == (iconv_t)-1) |
979 | throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8"); | |
6a2b6dd1 | 980 | |
6ab3bc95 RP |
981 | // UTF-7 is base64 encoded, a buffer 10x as large |
982 | // as the utf-8 buffer should be enough. If not the string will be truncated. | |
983 | size_t in_size=utf8string.size(); | |
984 | size_t out_size=in_size*10; | |
6a2b6dd1 | 985 | |
6ab3bc95 RP |
986 | char *buf = (char *)malloc(out_size+1); |
987 | if (buf == NULL) | |
988 | throw runtime_error("out of memory for iconv buffer"); | |
6a2b6dd1 | 989 | |
5a4ecb51 | 990 | char *in = (char *)utf8string.c_str(); |
6ab3bc95 RP |
991 | char *out = buf; |
992 | iconv(utf82utf7imap, &in, &in_size, &out, &out_size); | |
6a2b6dd1 | 993 | |
6ab3bc95 | 994 | buf[utf8string.size()*10-out_size]= 0; |
6a2b6dd1 | 995 | |
6ab3bc95 | 996 | result=buf; |
6a2b6dd1 | 997 | |
6ab3bc95 RP |
998 | free(buf); |
999 | iconv_close(utf82utf7imap); | |
6a2b6dd1 | 1000 | |
6ab3bc95 | 1001 | return result; |
6a2b6dd1 TJ |
1002 | } |
1003 | ||
118e216e TJ |
1004 | // Tokenize string by (html) tags |
1005 | void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input) | |
1006 | { | |
6ab3bc95 RP |
1007 | string::size_type pos, len = input.size(); |
1008 | bool inside_tag = false; | |
1009 | string current; | |
1010 | ||
1011 | for (pos = 0; pos < len; pos++) | |
1012 | { | |
1013 | if (input[pos] == '<') | |
1014 | { | |
1015 | inside_tag = true; | |
1016 | ||
1017 | if (!current.empty() ) | |
1018 | { | |
1019 | tokenized.push_back( make_pair(current, false) ); | |
1020 | current = ""; | |
1021 | } | |
1022 | ||
1023 | current += input[pos]; | |
1024 | } | |
1025 | else if (input[pos] == '>' && inside_tag) | |
1026 | { | |
1027 | current += input[pos]; | |
1028 | inside_tag = false; | |
1029 | if (!current.empty() ) | |
1030 | { | |
1031 | tokenized.push_back( make_pair(current, true) ); | |
1032 | current = ""; | |
1033 | } | |
1034 | } | |
1035 | else | |
1036 | current += input[pos]; | |
1037 | } | |
1038 | ||
1039 | // String left over in buffer? | |
1040 | if (!current.empty() ) | |
1041 | tokenized.push_back( make_pair(current, false) ); | |
1042 | } // eo tokenize_by_tag | |
118e216e | 1043 | |
118e216e TJ |
1044 | |
1045 | std::string strip_html_tags(const std::string &input) | |
1046 | { | |
6ab3bc95 RP |
1047 | // Pair first: string, second: isTag |
1048 | vector<pair<string,bool> > tokenized; | |
1049 | tokenize_by_tag (tokenized, input); | |
118e216e | 1050 | |
6ab3bc95 RP |
1051 | string output; |
1052 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
83d700e9 | 1053 | for (token = tokenized.begin(); token != tokens_end; ++token) |
6ab3bc95 RP |
1054 | if (!token->second) |
1055 | output += token->first; | |
1056 | ||
1057 | return output; | |
1058 | } // eo strip_html_tags | |
118e216e | 1059 | |
118e216e TJ |
1060 | |
1061 | // Smart-encode HTML en | |
1062 | string smart_html_entities(const std::string &input) | |
1063 | { | |
6ab3bc95 RP |
1064 | // Pair first: string, second: isTag |
1065 | vector<pair<string,bool> > tokenized; | |
1066 | tokenize_by_tag (tokenized, input); | |
1067 | ||
1068 | string output; | |
1069 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
83d700e9 | 1070 | for (token = tokenized.begin(); token != tokens_end; ++token) |
6ab3bc95 RP |
1071 | { |
1072 | // keep HTML tags as they are | |
1073 | if (token->second) | |
1074 | output += token->first; | |
1075 | else | |
1076 | output += html_entities(token->first); | |
1077 | } | |
1078 | ||
1079 | return output; | |
118e216e TJ |
1080 | } |
1081 | ||
6ab3bc95 | 1082 | |
a5f3af6e GE |
1083 | string::size_type find_8bit(const std::string &str) |
1084 | { | |
6ab3bc95 RP |
1085 | string::size_type l=str.size(); |
1086 | for (string::size_type p=0; p < l; p++) | |
1087 | if (static_cast<unsigned char>(str[p]) > 127) | |
1088 | return p; | |
a5f3af6e | 1089 | |
6ab3bc95 | 1090 | return string::npos; |
a5f3af6e GE |
1091 | } |
1092 | ||
118e216e TJ |
1093 | // encoded UTF-8 chars into HTML entities |
1094 | string html_entities(std::string str) | |
1095 | { | |
6ab3bc95 RP |
1096 | // Normal chars |
1097 | replace_all (str, "&", "&"); | |
6ab3bc95 RP |
1098 | replace_all (str, "<", "<"); |
1099 | replace_all (str, ">", ">"); | |
980577e1 TJ |
1100 | replace_all (str, "\"", """); |
1101 | replace_all (str, "'", "'"); | |
1102 | replace_all (str, "/", "/"); | |
6ab3bc95 RP |
1103 | |
1104 | // Umlauts | |
1105 | replace_all (str, "\xC3\xA4", "ä"); | |
1106 | replace_all (str, "\xC3\xB6", "ö"); | |
1107 | replace_all (str, "\xC3\xBC", "ü"); | |
1108 | replace_all (str, "\xC3\x84", "Ä"); | |
1109 | replace_all (str, "\xC3\x96", "Ö"); | |
1110 | replace_all (str, "\xC3\x9C", "Ü"); | |
1111 | ||
1112 | // Misc | |
1113 | replace_all (str, "\xC3\x9F", "ß"); | |
1114 | ||
1115 | // conversion of remaining non-ASCII chars needed? | |
1116 | // just do if needed because of performance | |
1117 | if (find_8bit(str) != string::npos) | |
1118 | { | |
1119 | // convert to fixed-size encoding UTF-32 | |
1120 | wchar_t* wbuf=utf8_to_wbuf(str); | |
1121 | ostringstream target; | |
1122 | ||
1123 | // replace all non-ASCII chars with HTML representation | |
1124 | for (int p=0; wbuf[p] != 0; p++) | |
1125 | { | |
1126 | unsigned int c=wbuf[p]; | |
1127 | ||
1128 | if (c <= 127) | |
1129 | target << static_cast<unsigned char>(c); | |
1130 | else | |
1131 | target << "&#" << c << ';'; | |
1132 | } | |
1133 | ||
1134 | free(wbuf); | |
1135 | ||
1136 | str=target.str(); | |
1137 | } | |
1138 | ||
1139 | return str; | |
1140 | } // eo html_entities(std::string) | |
1141 | ||
554f813d GE |
1142 | // convert HTML entities to something that can be viewed on a basic text console (restricted to ASCII-7) |
1143 | string html_entities_to_console(std::string str) | |
1144 | { | |
1145 | // Normal chars | |
1146 | replace_all (str, "&", "&"); | |
1147 | replace_all (str, "<", "<"); | |
1148 | replace_all (str, ">", ">"); | |
1149 | replace_all (str, """, "\""); | |
1150 | replace_all (str, "'", "'"); | |
1151 | replace_all (str, "/", "/"); | |
1152 | ||
1153 | // Umlauts | |
1154 | replace_all (str, "ä", "ae"); | |
1155 | replace_all (str, "ö", "oe"); | |
1156 | replace_all (str, "ü", "ue"); | |
1157 | replace_all (str, "Ä", "Ae"); | |
1158 | replace_all (str, "Ö", "Oe"); | |
1159 | replace_all (str, "Ü", "Ue"); | |
1160 | ||
1161 | // Misc | |
1162 | replace_all (str, "ß", "ss"); | |
1163 | ||
1164 | return str; | |
1165 | } | |
118e216e | 1166 | |
3f5c5ccd CH |
1167 | // find_html_comments + remove_html_comments(str, comments) |
1168 | void remove_html_comments(string &str) | |
1169 | { | |
46dd1321 | 1170 | vector<CommentZone> comments = find_html_comments(str); |
3f5c5ccd CH |
1171 | remove_html_comments(str, comments); |
1172 | } | |
1173 | ||
1174 | // find all html comments, behaving correctly if they are nested; ignores comment tags ("<!--FOO .... BAR-->") | |
1175 | // If there are invalid comments ("-->" before "<!--" or different number of closing and opening tags), | |
1176 | // then the unknown index of corresponding start/end tag will be represented by a string::npos | |
1177 | // Indices are from start of start tag until first index after closing tag | |
46dd1321 | 1178 | vector<CommentZone> find_html_comments(const std::string &str) |
3f5c5ccd CH |
1179 | { |
1180 | static const string START = "<!--"; | |
1181 | static const string CLOSE = "-->"; | |
1182 | static const string::size_type START_LEN = START.length(); | |
1183 | static const string::size_type CLOSE_LEN = CLOSE.length(); | |
1184 | ||
46dd1321 TJ |
1185 | vector<CommentZone> comments; |
1186 | ||
3f5c5ccd CH |
1187 | // in order to find nested comments, need either recursion or a stack |
1188 | vector<string::size_type> starts; // stack of start tags | |
1189 | ||
1190 | string::size_type pos = 0; | |
1191 | string::size_type len = str.length(); | |
1192 | string::size_type next_start, next_close; | |
1193 | ||
1194 | while (pos < len) // not really needed but just in case | |
1195 | { | |
1196 | next_start = str.find(START, pos); | |
1197 | next_close = str.find(CLOSE, pos); | |
1198 | ||
1199 | if ( (next_start == string::npos) && (next_close == string::npos) ) | |
1200 | break; // we are done | |
1201 | ||
1202 | else if ( (next_start == string::npos) || (next_close < next_start) ) // close one comment (pop) | |
1203 | { | |
1204 | if (starts.empty()) // closing tag without a start | |
1205 | comments.push_back(CommentZone(string::npos, next_close+CLOSE_LEN)); | |
1206 | else | |
1207 | { | |
1208 | comments.push_back(CommentZone(starts.back(), next_close+CLOSE_LEN)); | |
1209 | starts.pop_back(); | |
1210 | } | |
1211 | pos = next_close + CLOSE_LEN; | |
1212 | } | |
1213 | ||
1214 | else if ( (next_close == string::npos) || (next_start < next_close) ) // start a new comment (push) | |
1215 | { | |
1216 | starts.push_back(next_start); | |
1217 | pos = next_start + START_LEN; | |
1218 | } | |
1219 | } | |
1220 | ||
1221 | // add comments that have no closing tag from back to front (important for remove_html_comments!) | |
1222 | while (!starts.empty()) | |
1223 | { | |
1224 | comments.push_back(CommentZone(starts.back(), string::npos)); | |
1225 | starts.pop_back(); | |
1226 | } | |
46dd1321 TJ |
1227 | |
1228 | return comments; | |
3f5c5ccd CH |
1229 | } |
1230 | ||
1231 | // remove all html comments foundby find_html_comments | |
1232 | void remove_html_comments(std::string &str, const vector<CommentZone> &comments) | |
1233 | { | |
1234 | // remember position where last removal started | |
1235 | string::size_type last_removal_start = str.length(); | |
1236 | ||
1237 | // Go from back to front to not mess up indices. | |
1238 | // This requires that bigger comments, that contain smaller comments, come AFTER | |
1239 | // the small contained comments in the comments vector (i.e. comments are ordered by | |
1240 | // their closing tag, not their opening tag). This is true for results from find_html_comments | |
1241 | BOOST_REVERSE_FOREACH(const CommentZone &comment, comments) | |
1242 | { | |
1243 | if (comment.first == string::npos) | |
1244 | { | |
1245 | str = str.replace(0, comment.second, ""); // comment starts "before" str --> delete from start | |
1246 | break; // there can be no more | |
1247 | } | |
1248 | else if (comment.first >= last_removal_start) | |
1249 | { | |
1250 | continue; // this comment is inside another comment that we have removed already | |
1251 | } | |
1252 | else if (comment.second == string::npos) // comment ends "after" str --> delete until end | |
1253 | { | |
1254 | str = str.replace(comment.first, string::npos, ""); | |
1255 | last_removal_start = comment.first; | |
1256 | } | |
1257 | else | |
1258 | { | |
1259 | str = str.replace(comment.first, comment.second-comment.first, ""); | |
1260 | last_removal_start = comment.first; | |
1261 | } | |
1262 | } | |
1263 | } | |
1264 | ||
e93545dd GE |
1265 | bool replace_all(string &base, const char *ist, const char *soll) |
1266 | { | |
6ab3bc95 RP |
1267 | string i=ist; |
1268 | string s=soll; | |
1269 | return replace_all(base,&i,&s); | |
e93545dd GE |
1270 | } |
1271 | ||
1272 | bool replace_all(string &base, const string &ist, const char *soll) | |
1273 | { | |
6ab3bc95 RP |
1274 | string s=soll; |
1275 | return replace_all(base,&ist,&s); | |
e93545dd GE |
1276 | } |
1277 | ||
1278 | bool replace_all(string &base, const string *ist, const string *soll) | |
1279 | { | |
6ab3bc95 | 1280 | return replace_all(base,*ist,*soll); |
e93545dd GE |
1281 | } |
1282 | ||
1283 | bool replace_all(string &base, const char *ist, const string *soll) | |
1284 | { | |
6ab3bc95 RP |
1285 | string i=ist; |
1286 | return replace_all(base,&i,soll); | |
e93545dd GE |
1287 | } |
1288 | ||
1289 | bool replace_all(string &base, const string &ist, const string &soll) | |
1290 | { | |
6ab3bc95 RP |
1291 | bool found_ist = false; |
1292 | string::size_type a=0; | |
1293 | ||
1294 | if (ist.empty() ) | |
1295 | throw runtime_error ("replace_all called with empty search string"); | |
e93545dd | 1296 | |
6ab3bc95 RP |
1297 | while ( (a=base.find(ist,a) ) != string::npos) |
1298 | { | |
1299 | base.replace(a,ist.size(),soll); | |
1300 | a=a+soll.size(); | |
1301 | found_ist = true; | |
1302 | } | |
1ec2064e | 1303 | |
6ab3bc95 | 1304 | return found_ist; |
e93545dd GE |
1305 | } |
1306 | ||
b953bf36 GE |
1307 | /** |
1308 | * @brief replaces all characters that could be problematic or impose a security risk when being logged | |
1309 | * @param str the original string | |
1310 | * @param replace_with the character to replace the unsafe chars with | |
1311 | * @return a string that is safe to send to syslog or other logfiles | |
1312 | * | |
1313 | * All chars between 0x20 (space) and 0x7E (~) (including) are considered safe for logging. | |
1314 | * See e.g. RFC 5424, section 8.2 or the posix character class "printable". | |
1315 | * This eliminates all possible problems with NUL, control characters, 8 bit chars, UTF8. | |
1316 | * | |
1317 | */ | |
1318 | std::string sanitize_for_logging(const std::string &str, const char replace_with) | |
1319 | { | |
1320 | std::string output=str; | |
1321 | ||
c0e32d64 GE |
1322 | const string::size_type len = output.size(); |
1323 | for (std::string::size_type p=0; p < len; p++) | |
b953bf36 GE |
1324 | if (output[p] < 0x20 || output[p] > 0x7E) |
1325 | output[p]=replace_with; | |
1326 | ||
1327 | return output; | |
1328 | } | |
1329 | ||
e5b21dbb | 1330 | #if 0 |
e93545dd GE |
1331 | string to_lower(const string &src) |
1332 | { | |
6ab3bc95 | 1333 | string dst = src; |
e93545dd | 1334 | |
6ab3bc95 RP |
1335 | string::size_type pos, end = dst.size(); |
1336 | for (pos = 0; pos < end; pos++) | |
1337 | dst[pos] = tolower(dst[pos]); | |
e93545dd | 1338 | |
6ab3bc95 | 1339 | return dst; |
e93545dd GE |
1340 | } |
1341 | ||
1342 | string to_upper(const string &src) | |
1343 | { | |
6ab3bc95 | 1344 | string dst = src; |
e93545dd | 1345 | |
6ab3bc95 RP |
1346 | string::size_type pos, end = dst.size(); |
1347 | for (pos = 0; pos < end; pos++) | |
1348 | dst[pos] = toupper(dst[pos]); | |
e93545dd | 1349 | |
6ab3bc95 | 1350 | return dst; |
e93545dd | 1351 | } |
e5b21dbb | 1352 | #endif |
e93545dd | 1353 | |
83809f5e | 1354 | const int MAX_UNIT_FORMAT_SYMBOLS = 6; |
d1ea9075 | 1355 | |
2cb9a9c5 | 1356 | const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = { |
d1ea9075 GMF |
1357 | " B", |
1358 | " KB", | |
1359 | " MB", | |
1360 | " GB", | |
1361 | " TB", | |
83809f5e | 1362 | " PB" |
d1ea9075 GMF |
1363 | }; |
1364 | ||
2cb9a9c5 | 1365 | const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = { |
5cb766b9 GMF |
1366 | i18n_noop(" Bytes"), |
1367 | i18n_noop(" KBytes"), | |
1368 | i18n_noop(" MBytes"), | |
1369 | i18n_noop(" GBytes"), | |
1370 | i18n_noop(" TBytes"), | |
83809f5e | 1371 | i18n_noop(" PBytes") |
d1ea9075 GMF |
1372 | }; |
1373 | ||
72a94426 | 1374 | |
e26f7d51 | 1375 | static long double rounding_upwards( |
e91c1952 | 1376 | const long double number, |
72a94426 GMF |
1377 | const int rounding_multiplier |
1378 | ) | |
1379 | { | |
1380 | long double rounded_number; | |
1381 | rounded_number = number * rounding_multiplier; | |
1382 | rounded_number += 0.5; | |
1383 | rounded_number = (int64_t) (rounded_number); | |
1384 | rounded_number = (long double) (rounded_number) / (long double) (rounding_multiplier); | |
1385 | ||
1386 | return rounded_number; | |
1387 | } | |
1388 | ||
1389 | ||
81267544 GMF |
1390 | string nice_unit_format( |
1391 | const int64_t input, | |
70fc0674 GMF |
1392 | const UnitFormat format, |
1393 | const UnitBase base | |
81267544 | 1394 | ) |
6ab3bc95 | 1395 | { |
d1ea9075 | 1396 | // select the system of units (decimal or binary) |
81267544 | 1397 | int multiple = 0; |
a398513a | 1398 | if (base == UnitBase1000) |
81267544 GMF |
1399 | { |
1400 | multiple = 1000; | |
1401 | } | |
1402 | else | |
1403 | { | |
1404 | multiple = 1024; | |
1405 | } | |
1406 | ||
1407 | long double size = input; | |
6ab3bc95 | 1408 | |
d1ea9075 GMF |
1409 | // check the size of the input number to fit in the appropriate symbol |
1410 | int sizecount = 0; | |
81267544 | 1411 | while (size > multiple) |
6ab3bc95 | 1412 | { |
81267544 GMF |
1413 | size = size / multiple; |
1414 | sizecount++; | |
83809f5e GMF |
1415 | |
1416 | // rollback to the previous values and stop the loop when cannot | |
1417 | // represent the number length. | |
1418 | if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS) | |
1419 | { | |
1420 | size = size * multiple; | |
1421 | sizecount--; | |
1422 | break; | |
1423 | } | |
6ab3bc95 RP |
1424 | } |
1425 | ||
a398513a GMF |
1426 | // round the input number "half up" to multiples of 10 |
1427 | const int rounding_multiplier = 10; | |
72a94426 | 1428 | size = rounding_upwards(size, rounding_multiplier); |
6ab3bc95 | 1429 | |
d1ea9075 | 1430 | // format the input number, placing the appropriate symbol |
6ab3bc95 | 1431 | ostringstream out; |
6ab3bc95 | 1432 | out.setf (ios::fixed); |
a398513a | 1433 | if (format == ShortUnitFormat) |
d1ea9075 GMF |
1434 | { |
1435 | out.precision(1); | |
68d37a5c | 1436 | out << size << i18n( shortUnitFormatSymbols[sizecount].c_str() ); |
d1ea9075 GMF |
1437 | } |
1438 | else | |
6ab3bc95 | 1439 | { |
d1ea9075 | 1440 | out.precision (2); |
68d37a5c | 1441 | out << size << i18n( longUnitFormatSymbols[sizecount].c_str() ); |
6ab3bc95 RP |
1442 | } |
1443 | ||
1444 | return out.str(); | |
1445 | } // eo nice_unit_format(int input) | |
1446 | ||
e93545dd | 1447 | |
5cd64148 CH |
1448 | string nice_unit_format( |
1449 | const double input, | |
1450 | const UnitFormat format, | |
1451 | const UnitBase base | |
1452 | ) | |
1453 | { | |
1454 | // round as double and cast to int64_t | |
1455 | // cast raised overflow error near max val of int64_t (~9.2e18, see unittest) | |
1456 | int64_t input_casted_and_rounded = | |
1457 | boost::numeric_cast<int64_t>( round(input) ); | |
1458 | ||
1459 | // now call other | |
1460 | return nice_unit_format( input_casted_and_rounded, format, base ); | |
1461 | } // eo nice_unit_format(double input) | |
1462 | ||
1463 | ||
47c07fba GE |
1464 | string escape(const string &s) |
1465 | { | |
6ab3bc95 RP |
1466 | string out(s); |
1467 | string::size_type p; | |
47c07fba | 1468 | |
6ab3bc95 RP |
1469 | p=0; |
1470 | while ( (p=out.find_first_of("\"\\",p) ) !=out.npos) | |
1471 | { | |
1472 | out.insert (p,"\\"); | |
1473 | p+=2; | |
1474 | } | |
47c07fba | 1475 | |
6ab3bc95 RP |
1476 | p=0; |
1477 | while ( (p=out.find_first_of("\r",p) ) !=out.npos) | |
1478 | { | |
1479 | out.replace (p,1,"\\r"); | |
1480 | p+=2; | |
1481 | } | |
47c07fba | 1482 | |
6ab3bc95 RP |
1483 | p=0; |
1484 | while ( (p=out.find_first_of("\n",p) ) !=out.npos) | |
1485 | { | |
1486 | out.replace (p,1,"\\n"); | |
1487 | p+=2; | |
1488 | } | |
47c07fba | 1489 | |
6ab3bc95 | 1490 | out='"'+out+'"'; |
47c07fba | 1491 | |
6ab3bc95 RP |
1492 | return out; |
1493 | } // eo scape(const std::string&) | |
47c07fba | 1494 | |
47c07fba | 1495 | |
6ab3bc95 RP |
1496 | string descape(const string &s, int startpos, int &endpos) |
1497 | { | |
1498 | string out; | |
1499 | ||
1500 | if (s.at(startpos) != '"') | |
1501 | throw out_of_range("value not type escaped string"); | |
1502 | ||
1503 | out=s.substr(startpos+1); | |
1504 | string::size_type p=0; | |
1505 | ||
1506 | // search for the end of the string | |
1507 | while ( (p=out.find("\"",p) ) !=out.npos) | |
1508 | { | |
1509 | int e=p-1; | |
1510 | bool escaped=false; | |
1511 | ||
1512 | // the " might be escaped with a backslash | |
1513 | while (e>=0 && out.at (e) =='\\') | |
1514 | { | |
1515 | if (escaped == false) | |
1516 | escaped=true; | |
1517 | else | |
1518 | escaped=false; | |
1519 | ||
1520 | e--; | |
1521 | } | |
1522 | ||
1523 | if (escaped==false) | |
1524 | break; | |
1525 | else | |
1526 | p++; | |
1527 | } | |
1528 | ||
1529 | // we now have the end of the string | |
1530 | out=out.substr(0,p); | |
1531 | ||
1532 | // tell calling prog about the endposition | |
1533 | endpos=startpos+p+1; | |
1534 | ||
1535 | // descape all \ stuff inside the string now | |
1536 | p=0; | |
1537 | while ( (p=out.find_first_of("\\",p) ) !=out.npos) | |
1538 | { | |
1539 | switch (out.at(p+1) ) | |
1540 | { | |
1541 | case 'r': | |
47c07fba GE |
1542 | out.replace(p,2,"\r"); |
1543 | break; | |
6ab3bc95 | 1544 | case 'n': |
47c07fba GE |
1545 | out.replace(p,2,"\n"); |
1546 | break; | |
6ab3bc95 | 1547 | default: |
47c07fba | 1548 | out.erase(p,1); |
6ab3bc95 RP |
1549 | } |
1550 | p++; | |
1551 | } | |
1552 | ||
1553 | return out; | |
1554 | } // eo descape(const std::string&,int,int&) | |
47c07fba | 1555 | |
e93545dd | 1556 | |
47c07fba GE |
1557 | string escape_shellarg(const string &input) |
1558 | { | |
6ab3bc95 RP |
1559 | string output = "'"; |
1560 | string::const_iterator it, it_end = input.end(); | |
83d700e9 | 1561 | for (it = input.begin(); it != it_end; ++it) |
6ab3bc95 RP |
1562 | { |
1563 | if ( (*it) == '\'') | |
1564 | output += "'\\'"; | |
1565 | ||
1566 | output += *it; | |
1567 | } | |
1568 | ||
1569 | output += "'"; | |
1570 | return output; | |
47c07fba | 1571 | } |