1 /***************************************************************************
2 escape.cpp - escaping of strings
4 begin : Sun Nov 14 1999
5 copyright : (C) 1999 by Intra2net AG
6 email : info@intra2net.com
7 ***************************************************************************/
18 #include <stringfunc.hxx>
22 std::string iso_to_utf8(const std::string& isostring)
26 iconv_t i2utf8 = iconv_open ("UTF-8", "ISO-8859-1");
28 if (iso_to_utf8 == (iconv_t)-1)
29 throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
31 size_t in_size=isostring.size();
32 size_t out_size=in_size*4;
34 char *buf = (char *)malloc(out_size+1);
36 throw runtime_error("out of memory for iconv buffer");
38 const char *in = isostring.c_str();
40 iconv (i2utf8, &in, &in_size, &out, &out_size);
42 buf[isostring.size()*4-out_size]=0;
52 std::string utf8_to_iso(const std::string& utf8string)
56 iconv_t utf82iso = iconv_open ("ISO-8859-1","UTF-8");
58 if (utf82iso == (iconv_t)-1)
59 throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
61 size_t in_size=utf8string.size();
62 size_t out_size=in_size;
64 char *buf = (char *)malloc(out_size+1);
66 throw runtime_error("out of memory for iconv buffer");
68 const char *in = utf8string.c_str();
70 iconv (utf82iso, &in, &in_size, &out, &out_size);
72 buf[utf8string.size()-out_size]=0;
77 iconv_close (utf82iso);
82 std::string utf7imap_to_utf8(const std::string& utf7imapstring)
86 iconv_t utf7imap2utf8 = iconv_open ("UTF-8","UTF-7-IMAP");
88 if (utf7imap2utf8 == (iconv_t)-1)
89 throw runtime_error("iconv can't convert from UTF-7-IMAP to UTF-8");
91 size_t in_size=utf7imapstring.size();
92 size_t out_size=in_size*4;
94 char *buf = (char *)malloc(out_size+1);
96 throw runtime_error("out of memory for iconv buffer");
98 const char *in = utf7imapstring.c_str();
100 iconv (utf7imap2utf8, &in, &in_size, &out, &out_size);
102 buf[utf7imapstring.size()*4-out_size]=0;
107 iconv_close (utf7imap2utf8);
112 // Tokenize string by (html) tags
113 void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
115 string::size_type pos, len = input.size();
116 bool inside_tag = false;
119 for (pos = 0; pos < len; pos++) {
120 if (input[pos] == '<') {
123 if (!current.empty()) {
124 tokenized.push_back(make_pair(current, false));
128 current += input[pos];
129 } else if (input[pos] == '>' && inside_tag) {
130 current += input[pos];
132 if (!current.empty()) {
133 tokenized.push_back(make_pair(current, true));
137 current += input[pos];
140 // String left over in buffer?
141 if (!current.empty())
142 tokenized.push_back(make_pair(current, false));
145 std::string strip_html_tags(const std::string &input)
147 // Pair first: string, second: isTag
148 vector<pair<string,bool> > tokenized;
149 tokenize_by_tag(tokenized, input);
152 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
153 for (token = tokenized.begin(); token != tokens_end; token++)
155 output += token->first;
160 // Smart-encode HTML en
161 string smart_html_entities(const std::string &input)
163 // Pair first: string, second: isTag
164 vector<pair<string,bool> > tokenized;
165 tokenize_by_tag(tokenized, input);
168 vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
169 for (token = tokenized.begin(); token != tokens_end; token++) {
170 // keep HTML tags as they are
172 output += token->first;
174 output += html_entities(token->first);
180 // encoded UTF-8 chars into HTML entities
181 string html_entities(std::string str)
184 replace_all (str, "&", "&");
185 replace_all (str, "\"", """);
186 replace_all (str, "<", "<");
187 replace_all (str, ">", ">");
190 replace_all (str, "ä", "ä");
191 replace_all (str, "ö", "ö");
192 replace_all (str, "ü", "ü");
193 replace_all (str, "Ä", "Ä");
194 replace_all (str, "Ö", "Ö");
195 replace_all (str, "Ü", "Ü");
198 replace_all (str, "ß", "ß");
203 bool replace_all(string &base, const char *ist, const char *soll)
207 return replace_all(base,&i,&s);
210 bool replace_all(string &base, const string &ist, const char *soll)
213 return replace_all(base,&ist,&s);
216 bool replace_all(string &base, const string *ist, const string *soll)
218 return replace_all(base,*ist,*soll);
221 bool replace_all(string &base, const char *ist, const string *soll)
224 return replace_all(base,&i,soll);
227 bool replace_all(string &base, const string &ist, const string &soll)
229 bool found_ist = false;
230 string::size_type a=0;
233 throw runtime_error("replace_all called with empty search string");
235 while((a=base.find(ist,a))!=string::npos)
237 base.replace(a,ist.size(),soll);
245 string to_lower(const string &src)
249 string::size_type pos, end = dst.size();
250 for (pos = 0; pos < end; pos++)
251 dst[pos] = tolower(dst[pos]);
256 string to_upper(const string &src)
260 string::size_type pos, end = dst.size();
261 for (pos = 0; pos < end; pos++)
262 dst[pos] = toupper(dst[pos]);
267 string nice_unit_format (int input) {
271 while (size > 1000) {
280 tmp = float(tmp)/float(10);
285 out.setf (ios::fixed);
289 out << size << i18n(" KBytes");
292 out << size << i18n(" MBytes");
295 out << size << i18n(" Gbytes");
298 out << size << i18n(" Bytes");
305 string escape(const string &s)
311 while ((p=out.find_first_of("\"\\",p))!=out.npos)
318 while ((p=out.find_first_of("\r",p))!=out.npos)
320 out.replace(p,1,"\\r");
325 while ((p=out.find_first_of("\n",p))!=out.npos)
327 out.replace(p,1,"\\n");
336 string descape(const string &s, int startpos, int &endpos)
340 if (s.at(startpos) != '"')
341 throw out_of_range("value not type escaped string");
343 out=s.substr(startpos+1);
344 string::size_type p=0;
346 // search for the end of the string
347 while((p=out.find("\"",p))!=out.npos)
352 // the " might be escaped with a backslash
353 while(e>=0 && out.at(e)=='\\')
355 if (escaped == false)
369 // we now have the end of the string
372 // tell calling prog about the endposition
375 // descape all \ stuff inside the string now
377 while((p=out.find_first_of("\\",p))!=out.npos)
382 out.replace(p,2,"\r");
385 out.replace(p,2,"\n");
396 string escape_shellarg(const string &input)
402 string::const_iterator it, it_end = input.end();
403 for (it = input.begin(); it != it_end; it++) {