1 /***************************************************************************
2 escape.cpp - escaping of strings
4 begin : Sun Nov 14 1999
5 copyright : (C) 1999 by Intra2net AG
6 email : info@intra2net.com
7 ***************************************************************************/
18 #include <stringfunc.hxx>
22 std::string iso_to_utf8(const std::string& isostring)
26 iconv_t i2utf8 = iconv_open ("UTF-8", "ISO-8859-1");
28 if (iso_to_utf8 == (iconv_t)-1)
29 throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
31 size_t in_size=isostring.size();
32 size_t out_size=in_size*4;
34 char *buf = (char *)malloc(out_size+1);
36 throw runtime_error("out of memory for iconv buffer");
38 const char *in = isostring.c_str();
40 iconv (i2utf8, &in, &in_size, &out, &out_size);
42 buf[isostring.size()*4-out_size]=0;
52 std::string utf8_to_iso(const std::string& utf8string)
56 iconv_t utf82iso = iconv_open ("ISO-8859-1","UTF-8");
58 if (utf82iso == (iconv_t)-1)
59 throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
61 size_t in_size=utf8string.size();
62 size_t out_size=in_size;
64 char *buf = (char *)malloc(out_size+1);
66 throw runtime_error("out of memory for iconv buffer");
68 const char *in = utf8string.c_str();
70 iconv (utf82iso, &in, &in_size, &out, &out_size);
72 buf[utf8string.size()-out_size]=0;
77 iconv_close (utf82iso);
82 std::string utf7imap_to_iso(const std::string& utf7imapstring)
86 iconv_t utf7imap2iso = iconv_open ("ISO-8859-1","UTF-7-IMAP");
88 if (utf7imap2iso == (iconv_t)-1)
89 throw runtime_error("iconv can't convert from UTF-7-IMAP to ISO-8859-1");
91 size_t in_size=utf7imapstring.size();
92 size_t out_size=in_size;
94 char *buf = (char *)malloc(out_size+1);
96 throw runtime_error("out of memory for iconv buffer");
98 const char *in = utf7imapstring.c_str();
100 iconv (utf7imap2iso, &in, &in_size, &out, &out_size);
102 buf[utf7imapstring.size()-out_size]=0;
107 iconv_close (utf7imap2iso);
112 // DEPRECATED, WILL BE REMOVED TOMORROW!
113 std::string iso_to_html(const std::string& isostring, bool showerr_bug)
115 string result = isostring;
117 // TODO: This needs to be removed soon by a proper
118 // HTML quoted chars engine. Then we can also remove ü from i18n files.
120 replace_all (result, "&", "&");
121 replace_all (result, "\"", """);
122 replace_all (result, "<", "<");
123 replace_all (result, ">", ">");
126 replace_all (result, utf8_to_iso("ä"), "ä");
127 replace_all (result, utf8_to_iso("ö"), "ö");
128 replace_all (result, utf8_to_iso("ü"), "ü");
129 replace_all (result, utf8_to_iso("Ä"), "Ä");
130 replace_all (result, utf8_to_iso("Ö"), "Ö");
131 replace_all (result, utf8_to_iso("Ü"), "Ü");
132 replace_all (result, utf8_to_iso("ß"), "ß");
137 // Tokenize string by (html) tags
138 void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
140 string::size_type pos, len = input.size();
141 bool inside_tag = false;
144 for (pos = 0; pos < len; pos++) {
145 if (input[pos] == '<') {
148 if (!current.empty()) {
149 tokenized.push_back(make_pair(current, false));
153 current += input[pos];
154 } else if (input[pos] == '>' && inside_tag) {
155 current += input[pos];
157 if (!current.empty()) {
158 tokenized.push_back(make_pair(current, true));
162 current += input[pos];
165 // String left over in buffer?
166 if (!current.empty())
167 tokenized.push_back(make_pair(current, false));
170 std::string strip_html_tags(const std::string &input)
172 // Pair first: string, second: isTag
173 vector<pair<string,bool> > tokenized;
174 tokenize_by_tag(tokenized, input);
177 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
178 for (token = tokenized.begin(); token != tokens_end; token++)
180 output += token->first;
185 // Smart-encode HTML en
186 string smart_html_entities(const std::string &input)
188 // Pair first: string, second: isTag
189 vector<pair<string,bool> > tokenized;
190 tokenize_by_tag(tokenized, input);
193 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
194 for (token = tokenized.begin(); token != tokens_end; token++) {
195 // keep HTML tags as they are
197 output += token->first;
199 output += html_entities(token->first);
205 // encoded UTF-8 chars into HTML entities
206 string html_entities(std::string str)
209 replace_all (str, "&", "&");
210 replace_all (str, "\"", """);
211 replace_all (str, "<", "<");
212 replace_all (str, ">", ">");
215 replace_all (str, "ä", "ä");
216 replace_all (str, "ö", "ö");
217 replace_all (str, "ü", "ü");
218 replace_all (str, "Ä", "Ä");
219 replace_all (str, "Ö", "Ö");
220 replace_all (str, "Ü", "Ü");
223 replace_all (str, "ß", "ß");
228 bool replace_all(string &base, const char *ist, const char *soll)
232 return replace_all(base,&i,&s);
235 bool replace_all(string &base, const string &ist, const char *soll)
238 return replace_all(base,&ist,&s);
241 bool replace_all(string &base, const string *ist, const string *soll)
243 return replace_all(base,*ist,*soll);
246 bool replace_all(string &base, const char *ist, const string *soll)
249 return replace_all(base,&i,soll);
252 bool replace_all(string &base, const string &ist, const string &soll)
254 bool found_ist = false;
255 string::size_type a=0;
258 throw runtime_error("replace_all called with empty search string");
260 while((a=base.find(ist,a))!=string::npos)
262 base.replace(a,ist.size(),soll);
270 string to_lower(const string &src)
274 string::size_type pos = 0, end = dst.size();
275 for (pos = 0; pos < end; pos++)
276 dst[pos] = tolower(dst[pos]);
281 string to_upper(const string &src)
285 string::size_type pos = 0, end = dst.size();
286 for (pos = 0; pos < end; pos++)
287 dst[pos] = toupper(dst[pos]);
292 string nice_unit_format (int input) {
296 while (size > 1000) {
305 tmp = float(tmp)/float(10);
310 out.setf (ios::fixed);
314 out << size << i18n(" KBytes");
317 out << size << i18n(" MBytes");
320 out << size << i18n(" Gbytes");
323 out << size << i18n(" Bytes");
330 string escape(const string &s)
336 while ((p=out.find_first_of("\"\\",p))!=out.npos)
343 while ((p=out.find_first_of("\r",p))!=out.npos)
345 out.replace(p,1,"\\r");
350 while ((p=out.find_first_of("\n",p))!=out.npos)
352 out.replace(p,1,"\\n");
361 string descape(const string &s, int startpos, int &endpos)
365 if (s.at(startpos) != '"')
366 throw out_of_range("value not type escaped string");
368 out=s.substr(startpos+1);
369 string::size_type p=0;
371 // search for the end of the string
372 while((p=out.find("\"",p))!=out.npos)
377 // the " might be escaped with a backslash
378 while(e>=0 && out.at(e)=='\\')
380 if (escaped == false)
394 // we now have the end of the string
397 // tell calling prog about the endposition
400 // descape all \ stuff inside the string now
402 while((p=out.find_first_of("\\",p))!=out.npos)
407 out.replace(p,2,"\r");
410 out.replace(p,2,"\n");
421 string escape_shellarg(const string &input)
427 string::const_iterator it, it_end = input.end();
428 for (it = input.begin(); it != it_end; it++) {