developer.intra2net.com Git - libi2ncommon/blob - src/stringfunc.cpp

   1 /***************************************************************************
   2                           escape.cpp  -  escaping of strings
   3                              -------------------
   4     begin                : Sun Nov 14 1999
   5     copyright            : (C) 1999 by Intra2net AG
   6     email                : info@intra2net.com
   7  ***************************************************************************/
   8
   9 #include <iostream>
  10 #include <string>
  11 #include <sstream>
  12 #include <stdexcept>
  13
  14 #include <stdlib.h>
  15 #include <iconv.h>
  16 #include <i18n.h>
  17
  18 #include <stringfunc.hxx>
  19
  20 using namespace std;
  21
  22 std::string iso_to_utf8(const std::string& isostring)
  23 {
  24     string result;
  25
  26     iconv_t i2utf8 = iconv_open ("UTF-8", "ISO-8859-1");
  27
  28     if (iso_to_utf8 == (iconv_t)-1)
  29         throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
  30
  31     size_t in_size=isostring.size();
  32     size_t out_size=in_size*4;
  33
  34     char *buf = (char *)malloc(out_size+1);
  35     if (buf == NULL)
  36         throw runtime_error("out of memory for iconv buffer");
  37
  38     const char *in = isostring.c_str();
  39     char *out = buf;
  40     iconv (i2utf8, &in, &in_size, &out, &out_size);
  41
  42     buf[isostring.size()*4-out_size]=0;
  43
  44     result=buf;
  45
  46     free(buf);
  47     iconv_close (i2utf8);
  48
  49     return result;
  50 }
  51
  52 std::string utf8_to_iso(const std::string& utf8string)
  53 {
  54     string result;
  55
  56     iconv_t utf82iso = iconv_open ("ISO-8859-1","UTF-8");
  57
  58     if (utf82iso == (iconv_t)-1)
  59         throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
  60
  61     size_t in_size=utf8string.size();
  62     size_t out_size=in_size;
  63
  64     char *buf = (char *)malloc(out_size+1);
  65     if (buf == NULL)
  66         throw runtime_error("out of memory for iconv buffer");
  67
  68     const char *in = utf8string.c_str();
  69     char *out = buf;
  70     iconv (utf82iso, &in, &in_size, &out, &out_size);
  71
  72     buf[utf8string.size()-out_size]=0;
  73
  74     result=buf;
  75
  76     free(buf);
  77     iconv_close (utf82iso);
  78
  79     return result;
  80 }
  81
  82 std::string utf7imap_to_iso(const std::string& utf7imapstring)
  83 {
  84     string result;
  85
  86     iconv_t utf7imap2iso = iconv_open ("ISO-8859-1","UTF-7-IMAP");
  87
  88     if (utf7imap2iso == (iconv_t)-1)
  89         throw runtime_error("iconv can't convert from UTF-7-IMAP to ISO-8859-1");
  90
  91     size_t in_size=utf7imapstring.size();
  92     size_t out_size=in_size;
  93
  94     char *buf = (char *)malloc(out_size+1);
  95     if (buf == NULL)
  96         throw runtime_error("out of memory for iconv buffer");
  97
  98     const char *in = utf7imapstring.c_str();
  99     char *out = buf;
 100     iconv (utf7imap2iso, &in, &in_size, &out, &out_size);
 101
 102     buf[utf7imapstring.size()-out_size]=0;
 103
 104     result=buf;
 105
 106     free(buf);
 107     iconv_close (utf7imap2iso);
 108
 109     return result;
 110 }
 111
 112 // DEPRECATED, WILL BE REMOVED TOMORROW!
 113 std::string iso_to_html(const std::string& isostring, bool showerr_bug)
 114 {
 115     string result = isostring;
 116
 117     // TODO: This needs to be removed soon by a proper
 118     // HTML quoted chars engine. Then we can also remove &uuml; from i18n files.
 119     if (!showerr_bug) {
 120         replace_all (result, "&", "&amp;");
 121         replace_all (result, "\"", "&quot;");
 122         replace_all (result, "<", "&lt;");
 123         replace_all (result, ">", "&gt;");
 124     }
 125
 126     replace_all (result, utf8_to_iso("ä"), "&auml;");
 127     replace_all (result, utf8_to_iso("ö"), "&ouml;");
 128     replace_all (result, utf8_to_iso("ü"), "&uuml;");
 129     replace_all (result, utf8_to_iso("Ä"), "&Auml;");
 130     replace_all (result, utf8_to_iso("Ö"), "&Ouml;");
 131     replace_all (result, utf8_to_iso("Ü"), "&Uuml;");
 132     replace_all (result, utf8_to_iso("ß"), "&szlig;");
 133
 134     return result;
 135 }
 136
 137 // Tokenize string by (html) tags
 138 void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
 139 {
 140     string::size_type pos, len = input.size();
 141     bool inside_tag = false;
 142     string current;
 143
 144     for (pos = 0; pos < len; pos++) {
 145         if (input[pos] == '<') {
 146             inside_tag = true;
 147
 148             if (!current.empty()) {
 149                 tokenized.push_back(make_pair(current, false));
 150                 current = "";
 151             }
 152
 153             current += input[pos];
 154         } else if (input[pos] == '>' && inside_tag) {
 155             current += input[pos];
 156             inside_tag = false;
 157             if (!current.empty()) {
 158                 tokenized.push_back(make_pair(current, true));
 159                 current = "";
 160             }
 161         } else
 162             current += input[pos];
 163     }
 164
 165     // String left over in buffer?
 166     if (!current.empty())
 167         tokenized.push_back(make_pair(current, false));
 168 }
 169
 170 std::string strip_html_tags(const std::string &input)
 171 {
 172     // Pair first: string, second: isTag
 173     vector<pair<string,bool> > tokenized;
 174     tokenize_by_tag(tokenized, input);
 175
 176     string output;
 177     vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
 178     for (token = tokenized.begin(); token != tokens_end; token++)
 179         if (!token->second)
 180             output += token->first;
 181
 182     return output;
 183 }
 184
 185 // Smart-encode HTML en
 186 string smart_html_entities(const std::string &input)
 187 {
 188     // Pair first: string, second: isTag
 189     vector<pair<string,bool> > tokenized;
 190     tokenize_by_tag(tokenized, input);
 191
 192     string output;
 193     vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
 194     for (token = tokenized.begin(); token != tokens_end; token++) {
 195         // keep HTML tags as they are
 196         if (token->second)
 197             output += token->first;
 198         else
 199             output += html_entities(token->first);
 200     }
 201
 202     return output;
 203 }
 204
 205 // encoded UTF-8 chars into HTML entities
 206 string html_entities(std::string str)
 207 {
 208     // Normal chars
 209     replace_all (str, "&", "&amp;");
 210     replace_all (str, "\"", "&quot;");
 211     replace_all (str, "<", "&lt;");
 212     replace_all (str, ">", "&gt;");
 213
 214     // Umlauts
 215     replace_all (str, "ä", "&auml;");
 216     replace_all (str, "ö", "&ouml;");
 217     replace_all (str, "ü", "&uuml;");
 218     replace_all (str, "Ä", "&Auml;");
 219     replace_all (str, "Ö", "&Ouml;");
 220     replace_all (str, "Ü", "&Uuml;");
 221
 222     // Misc
 223     replace_all (str, "ß", "&szlig;");
 224
 225     return str;
 226 }
 227
 228 bool replace_all(string &base, const char *ist, const char *soll)
 229 {
 230     string i=ist;
 231     string s=soll;
 232     return replace_all(base,&i,&s);
 233 }
 234
 235 bool replace_all(string &base, const string &ist, const char *soll)
 236 {
 237     string s=soll;
 238     return replace_all(base,&ist,&s);
 239 }
 240
 241 bool replace_all(string &base, const string *ist, const string *soll)
 242 {
 243     return replace_all(base,*ist,*soll);
 244 }
 245
 246 bool replace_all(string &base, const char *ist, const string *soll)
 247 {
 248     string i=ist;
 249     return replace_all(base,&i,soll);
 250 }
 251
 252 bool replace_all(string &base, const string &ist, const string &soll)
 253 {
 254     bool found_ist = false;
 255     string::size_type a=0;
 256
 257     if (ist.empty())
 258         throw runtime_error("replace_all called with empty search string");
 259
 260     while((a=base.find(ist,a))!=string::npos)
 261     {
 262         base.replace(a,ist.size(),soll);
 263         a=a+soll.size();
 264         found_ist = true;
 265     }
 266
 267     return found_ist;
 268 }
 269
 270 string to_lower(const string &src)
 271 {
 272     string dst = src;
 273
 274     string::size_type pos = 0, end = dst.size();
 275     for (pos = 0; pos < end; pos++)
 276         dst[pos] = tolower(dst[pos]);
 277
 278     return dst;
 279 }
 280
 281 string to_upper(const string &src)
 282 {
 283     string dst = src;
 284
 285     string::size_type pos = 0, end = dst.size();
 286     for (pos = 0; pos < end; pos++)
 287         dst[pos] = toupper(dst[pos]);
 288
 289     return dst;
 290 }
 291
 292 string nice_unit_format (int input) {
 293     float size = input;
 294     int sizecount = 0;
 295
 296     while (size > 1000) {
 297         size = size / 1000;
 298         sizecount++;
 299     }
 300
 301     float tmp;                       // round
 302     tmp = size*10;
 303     tmp += 0.5;
 304     tmp = int (tmp);
 305     tmp = float(tmp)/float(10);
 306     size = tmp;
 307
 308     ostringstream out;
 309
 310     out.setf (ios::fixed);
 311     out.precision(2);
 312     switch (sizecount) {
 313     case 1:
 314         out << size << i18n(" KBytes");
 315         break;
 316     case 2:
 317         out << size << i18n(" MBytes");
 318         break;
 319     case 3:
 320         out << size << i18n(" Gbytes");
 321         break;
 322     default:
 323         out << size << i18n(" Bytes");
 324         break;
 325     }
 326
 327     return out.str();
 328 }
 329
 330 string escape(const string &s)
 331 {
 332     string out(s);
 333     string::size_type p;
 334
 335     p=0;
 336     while ((p=out.find_first_of("\"\\",p))!=out.npos)
 337     {
 338         out.insert(p,"\\");
 339         p+=2;
 340     }
 341
 342     p=0;
 343     while ((p=out.find_first_of("\r",p))!=out.npos)
 344     {
 345         out.replace(p,1,"\\r");
 346         p+=2;
 347     }
 348
 349     p=0;
 350     while ((p=out.find_first_of("\n",p))!=out.npos)
 351     {
 352         out.replace(p,1,"\\n");
 353         p+=2;
 354     }
 355
 356     out='"'+out+'"';
 357
 358     return out;
 359 }
 360
 361 string descape(const string &s, int startpos, int &endpos)
 362 {
 363     string out;
 364
 365     if (s.at(startpos) != '"')
 366         throw out_of_range("value not type escaped string");
 367
 368     out=s.substr(startpos+1);
 369     string::size_type p=0;
 370
 371     // search for the end of the string
 372     while((p=out.find("\"",p))!=out.npos)
 373     {
 374         int e=p-1;
 375         bool escaped=false;
 376
 377         // the " might be escaped with a backslash
 378         while(e>=0 && out.at(e)=='\\')
 379         {
 380             if (escaped == false)
 381                 escaped=true;
 382             else
 383                 escaped=false;
 384
 385             e--;
 386         }
 387
 388         if (escaped==false)
 389             break;
 390         else
 391             p++;
 392     }
 393
 394     // we now have the end of the string
 395     out=out.substr(0,p);
 396
 397     // tell calling prog about the endposition
 398     endpos=startpos+p+1;
 399
 400     // descape all \ stuff inside the string now
 401     p=0;
 402     while((p=out.find_first_of("\\",p))!=out.npos)
 403     {
 404         switch(out.at(p+1))
 405         {
 406         case 'r':
 407             out.replace(p,2,"\r");
 408             break;
 409         case 'n':
 410             out.replace(p,2,"\n");
 411             break;
 412         default:
 413             out.erase(p,1);
 414         }
 415         p++;
 416     }
 417
 418     return out;
 419 }
 420
 421 string escape_shellarg(const string &input)
 422 {
 423     if (!input.size())
 424         return "";
 425
 426     string output = "'";
 427     string::const_iterator it, it_end = input.end();
 428     for (it = input.begin(); it != it_end; it++) {
 429         if ((*it) == '\'')
 430             output += "'\\'";
 431
 432         output += *it;
 433     }
 434
 435     output += "'";
 436     return output;
 437 }