Commit | Line | Data |
---|---|---|
e93545dd GE |
1 | /*************************************************************************** |
2 | escape.cpp - escaping of strings | |
3 | ------------------- | |
4 | begin : Sun Nov 14 1999 | |
5 | copyright : (C) 1999 by Intra2net AG | |
6 | email : info@intra2net.com | |
7 | ***************************************************************************/ | |
8 | ||
9 | #include <iostream> | |
10 | #include <string> | |
11 | #include <sstream> | |
12 | #include <stdexcept> | |
13 | ||
14 | #include <stdlib.h> | |
15 | #include <iconv.h> | |
16 | #include <i18n.h> | |
17 | ||
18 | #include <stringfunc.hxx> | |
19 | ||
20 | using namespace std; | |
21 | ||
22 | std::string iso_to_utf8(const std::string& isostring) | |
23 | { | |
24 | string result; | |
118e216e | 25 | |
e93545dd | 26 | iconv_t i2utf8 = iconv_open ("UTF-8", "ISO-8859-1"); |
118e216e | 27 | |
e93545dd GE |
28 | if (iso_to_utf8 == (iconv_t)-1) |
29 | throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8"); | |
118e216e | 30 | |
e93545dd GE |
31 | size_t in_size=isostring.size(); |
32 | size_t out_size=in_size*4; | |
118e216e | 33 | |
e93545dd GE |
34 | char *buf = (char *)malloc(out_size+1); |
35 | if (buf == NULL) | |
36 | throw runtime_error("out of memory for iconv buffer"); | |
37 | ||
38 | const char *in = isostring.c_str(); | |
39 | char *out = buf; | |
40 | iconv (i2utf8, &in, &in_size, &out, &out_size); | |
118e216e | 41 | |
e93545dd | 42 | buf[isostring.size()*4-out_size]=0; |
118e216e | 43 | |
e93545dd | 44 | result=buf; |
118e216e | 45 | |
e93545dd GE |
46 | free(buf); |
47 | iconv_close (i2utf8); | |
118e216e | 48 | |
e93545dd GE |
49 | return result; |
50 | } | |
51 | ||
52 | std::string utf8_to_iso(const std::string& utf8string) | |
53 | { | |
54 | string result; | |
118e216e | 55 | |
e93545dd | 56 | iconv_t utf82iso = iconv_open ("ISO-8859-1","UTF-8"); |
118e216e | 57 | |
e93545dd GE |
58 | if (utf82iso == (iconv_t)-1) |
59 | throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1"); | |
118e216e | 60 | |
e93545dd GE |
61 | size_t in_size=utf8string.size(); |
62 | size_t out_size=in_size; | |
118e216e | 63 | |
e93545dd GE |
64 | char *buf = (char *)malloc(out_size+1); |
65 | if (buf == NULL) | |
66 | throw runtime_error("out of memory for iconv buffer"); | |
67 | ||
68 | const char *in = utf8string.c_str(); | |
69 | char *out = buf; | |
70 | iconv (utf82iso, &in, &in_size, &out, &out_size); | |
118e216e | 71 | |
e93545dd | 72 | buf[utf8string.size()-out_size]=0; |
118e216e | 73 | |
e93545dd | 74 | result=buf; |
118e216e | 75 | |
e93545dd GE |
76 | free(buf); |
77 | iconv_close (utf82iso); | |
78 | ||
79 | return result; | |
80 | } | |
81 | ||
d116a071 TJ |
82 | std::string utf7imap_to_iso(const std::string& utf7imapstring) |
83 | { | |
84 | string result; | |
118e216e | 85 | |
d116a071 | 86 | iconv_t utf7imap2iso = iconv_open ("ISO-8859-1","UTF-7-IMAP"); |
118e216e | 87 | |
d116a071 TJ |
88 | if (utf7imap2iso == (iconv_t)-1) |
89 | throw runtime_error("iconv can't convert from UTF-7-IMAP to ISO-8859-1"); | |
118e216e | 90 | |
d116a071 TJ |
91 | size_t in_size=utf7imapstring.size(); |
92 | size_t out_size=in_size; | |
118e216e | 93 | |
d116a071 TJ |
94 | char *buf = (char *)malloc(out_size+1); |
95 | if (buf == NULL) | |
96 | throw runtime_error("out of memory for iconv buffer"); | |
97 | ||
98 | const char *in = utf7imapstring.c_str(); | |
99 | char *out = buf; | |
100 | iconv (utf7imap2iso, &in, &in_size, &out, &out_size); | |
118e216e | 101 | |
d116a071 | 102 | buf[utf7imapstring.size()-out_size]=0; |
118e216e | 103 | |
d116a071 | 104 | result=buf; |
118e216e | 105 | |
d116a071 TJ |
106 | free(buf); |
107 | iconv_close (utf7imap2iso); | |
108 | ||
109 | return result; | |
110 | } | |
111 | ||
118e216e TJ |
112 | // DEPRECATED, WILL BE REMOVED TOMORROW! |
113 | std::string iso_to_html(const std::string& isostring, bool showerr_bug) | |
114 | { | |
115 | string result = isostring; | |
116 | ||
117 | // TODO: This needs to be removed soon by a proper | |
118 | // HTML quoted chars engine. Then we can also remove ü from i18n files. | |
119 | if (!showerr_bug) { | |
120 | replace_all (result, "&", "&"); | |
121 | replace_all (result, "\"", """); | |
122 | replace_all (result, "<", "<"); | |
123 | replace_all (result, ">", ">"); | |
124 | } | |
125 | ||
126 | replace_all (result, utf8_to_iso("ä"), "ä"); | |
127 | replace_all (result, utf8_to_iso("ö"), "ö"); | |
128 | replace_all (result, utf8_to_iso("ü"), "ü"); | |
129 | replace_all (result, utf8_to_iso("Ä"), "Ä"); | |
130 | replace_all (result, utf8_to_iso("Ö"), "Ö"); | |
131 | replace_all (result, utf8_to_iso("Ü"), "Ü"); | |
132 | replace_all (result, utf8_to_iso("ß"), "ß"); | |
133 | ||
134 | return result; | |
135 | } | |
136 | ||
137 | // Tokenize string by (html) tags | |
138 | void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input) | |
139 | { | |
140 | string::size_type pos, len = input.size(); | |
141 | bool inside_tag = false; | |
142 | string current; | |
143 | ||
144 | for (pos = 0; pos < len; pos++) { | |
145 | if (input[pos] == '<') { | |
146 | inside_tag = true; | |
147 | ||
148 | if (!current.empty()) { | |
149 | tokenized.push_back(make_pair(current, false)); | |
150 | current = ""; | |
151 | } | |
152 | ||
153 | current += input[pos]; | |
154 | } else if (input[pos] == '>' && inside_tag) { | |
155 | current += input[pos]; | |
156 | inside_tag = false; | |
157 | if (!current.empty()) { | |
158 | tokenized.push_back(make_pair(current, true)); | |
159 | current = ""; | |
160 | } | |
161 | } else | |
162 | current += input[pos]; | |
163 | } | |
164 | ||
165 | // String left over in buffer? | |
166 | if (!current.empty()) | |
167 | tokenized.push_back(make_pair(current, false)); | |
168 | } | |
169 | ||
170 | std::string strip_html_tags(const std::string &input) | |
171 | { | |
172 | // Pair first: string, second: isTag | |
173 | vector<pair<string,bool> > tokenized; | |
174 | tokenize_by_tag(tokenized, input); | |
175 | ||
176 | string output; | |
177 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
178 | for (token = tokenized.begin(); token != tokens_end; token++) | |
179 | if (!token->second) | |
180 | output += token->first; | |
181 | ||
182 | return output; | |
183 | } | |
184 | ||
185 | // Smart-encode HTML en | |
186 | string smart_html_entities(const std::string &input) | |
187 | { | |
188 | // Pair first: string, second: isTag | |
189 | vector<pair<string,bool> > tokenized; | |
190 | tokenize_by_tag(tokenized, input); | |
191 | ||
192 | string output; | |
193 | vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end(); | |
194 | for (token = tokenized.begin(); token != tokens_end; token++) { | |
195 | // keep HTML tags as they are | |
196 | if (token->second) | |
197 | output += token->first; | |
198 | else | |
199 | output += html_entities(token->first); | |
200 | } | |
201 | ||
202 | return output; | |
203 | } | |
204 | ||
205 | // encoded UTF-8 chars into HTML entities | |
206 | string html_entities(std::string str) | |
207 | { | |
208 | // Normal chars | |
209 | replace_all (str, "&", "&"); | |
210 | replace_all (str, "\"", """); | |
211 | replace_all (str, "<", "<"); | |
212 | replace_all (str, ">", ">"); | |
213 | ||
214 | // Umlauts | |
215 | replace_all (str, "ä", "ä"); | |
216 | replace_all (str, "ö", "ö"); | |
217 | replace_all (str, "ü", "ü"); | |
218 | replace_all (str, "Ä", "Ä"); | |
219 | replace_all (str, "Ö", "Ö"); | |
220 | replace_all (str, "Ü", "Ü"); | |
221 | ||
222 | // Misc | |
223 | replace_all (str, "ß", "ß"); | |
224 | ||
225 | return str; | |
226 | } | |
227 | ||
e93545dd GE |
228 | bool replace_all(string &base, const char *ist, const char *soll) |
229 | { | |
230 | string i=ist; | |
231 | string s=soll; | |
232 | return replace_all(base,&i,&s); | |
233 | } | |
234 | ||
235 | bool replace_all(string &base, const string &ist, const char *soll) | |
236 | { | |
237 | string s=soll; | |
238 | return replace_all(base,&ist,&s); | |
239 | } | |
240 | ||
241 | bool replace_all(string &base, const string *ist, const string *soll) | |
242 | { | |
243 | return replace_all(base,*ist,*soll); | |
244 | } | |
245 | ||
246 | bool replace_all(string &base, const char *ist, const string *soll) | |
247 | { | |
248 | string i=ist; | |
249 | return replace_all(base,&i,soll); | |
250 | } | |
251 | ||
252 | bool replace_all(string &base, const string &ist, const string &soll) | |
253 | { | |
254 | bool found_ist = false; | |
255 | string::size_type a=0; | |
256 | ||
1ec2064e TJ |
257 | if (ist.empty()) |
258 | throw runtime_error("replace_all called with empty search string"); | |
259 | ||
e93545dd GE |
260 | while((a=base.find(ist,a))!=string::npos) |
261 | { | |
262 | base.replace(a,ist.size(),soll); | |
263 | a=a+soll.size(); | |
264 | found_ist = true; | |
265 | } | |
266 | ||
267 | return found_ist; | |
268 | } | |
269 | ||
270 | string to_lower(const string &src) | |
271 | { | |
272 | string dst = src; | |
273 | ||
274 | string::size_type pos = 0, end = dst.size(); | |
275 | for (pos = 0; pos < end; pos++) | |
276 | dst[pos] = tolower(dst[pos]); | |
277 | ||
278 | return dst; | |
279 | } | |
280 | ||
281 | string to_upper(const string &src) | |
282 | { | |
283 | string dst = src; | |
284 | ||
285 | string::size_type pos = 0, end = dst.size(); | |
286 | for (pos = 0; pos < end; pos++) | |
287 | dst[pos] = toupper(dst[pos]); | |
288 | ||
289 | return dst; | |
290 | } | |
291 | ||
292 | string nice_unit_format (int input) { | |
293 | float size = input; | |
294 | int sizecount = 0; | |
295 | ||
296 | while (size > 1000) { | |
297 | size = size / 1000; | |
298 | sizecount++; | |
299 | } | |
300 | ||
301 | float tmp; // round | |
302 | tmp = size*10; | |
303 | tmp += 0.5; | |
304 | tmp = int (tmp); | |
305 | tmp = float(tmp)/float(10); | |
306 | size = tmp; | |
307 | ||
308 | ostringstream out; | |
309 | ||
310 | out.setf (ios::fixed); | |
311 | out.precision(2); | |
312 | switch (sizecount) { | |
313 | case 1: | |
314 | out << size << i18n(" KBytes"); | |
315 | break; | |
316 | case 2: | |
317 | out << size << i18n(" MBytes"); | |
318 | break; | |
319 | case 3: | |
320 | out << size << i18n(" Gbytes"); | |
321 | break; | |
322 | default: | |
323 | out << size << i18n(" Bytes"); | |
324 | break; | |
325 | } | |
326 | ||
327 | return out.str(); | |
328 | } | |
329 | ||
47c07fba GE |
330 | string escape(const string &s) |
331 | { | |
332 | string out(s); | |
333 | string::size_type p; | |
334 | ||
335 | p=0; | |
336 | while ((p=out.find_first_of("\"\\",p))!=out.npos) | |
337 | { | |
338 | out.insert(p,"\\"); | |
339 | p+=2; | |
340 | } | |
341 | ||
342 | p=0; | |
343 | while ((p=out.find_first_of("\r",p))!=out.npos) | |
344 | { | |
345 | out.replace(p,1,"\\r"); | |
346 | p+=2; | |
347 | } | |
348 | ||
349 | p=0; | |
350 | while ((p=out.find_first_of("\n",p))!=out.npos) | |
351 | { | |
352 | out.replace(p,1,"\\n"); | |
353 | p+=2; | |
354 | } | |
355 | ||
356 | out='"'+out+'"'; | |
357 | ||
358 | return out; | |
359 | } | |
360 | ||
361 | string descape(const string &s, int startpos, int &endpos) | |
362 | { | |
363 | string out; | |
364 | ||
365 | if (s.at(startpos) != '"') | |
366 | throw out_of_range("value not type escaped string"); | |
367 | ||
368 | out=s.substr(startpos+1); | |
369 | string::size_type p=0; | |
370 | ||
371 | // search for the end of the string | |
372 | while((p=out.find("\"",p))!=out.npos) | |
373 | { | |
374 | int e=p-1; | |
375 | bool escaped=false; | |
376 | ||
377 | // the " might be escaped with a backslash | |
378 | while(e>=0 && out.at(e)=='\\') | |
379 | { | |
380 | if (escaped == false) | |
381 | escaped=true; | |
382 | else | |
383 | escaped=false; | |
384 | ||
385 | e--; | |
386 | } | |
387 | ||
388 | if (escaped==false) | |
389 | break; | |
390 | else | |
391 | p++; | |
392 | } | |
393 | ||
394 | // we now have the end of the string | |
395 | out=out.substr(0,p); | |
396 | ||
397 | // tell calling prog about the endposition | |
398 | endpos=startpos+p+1; | |
399 | ||
400 | // descape all \ stuff inside the string now | |
401 | p=0; | |
402 | while((p=out.find_first_of("\\",p))!=out.npos) | |
403 | { | |
404 | switch(out.at(p+1)) | |
405 | { | |
406 | case 'r': | |
407 | out.replace(p,2,"\r"); | |
408 | break; | |
409 | case 'n': | |
410 | out.replace(p,2,"\n"); | |
411 | break; | |
412 | default: | |
413 | out.erase(p,1); | |
414 | } | |
415 | p++; | |
416 | } | |
417 | ||
418 | return out; | |
419 | } | |
e93545dd | 420 | |
47c07fba GE |
421 | string escape_shellarg(const string &input) |
422 | { | |
423 | if (!input.size()) | |
424 | return ""; | |
425 | ||
426 | string output = "'"; | |
427 | string::const_iterator it, it_end = input.end(); | |
428 | for (it = input.begin(); it != it_end; it++) { | |
429 | if ((*it) == '\'') | |
430 | output += "'\\'"; | |
431 | ||
432 | output += *it; | |
433 | } | |
434 | ||
435 | output += "'"; | |
436 | return output; | |
437 | } |