[libi2ncommon] / src / stringfunc.cpp

/***************************************************************************
                          escape.cpp  -  escaping of strings
                             -------------------
    begin                : Sun Nov 14 1999
    copyright            : (C) 1999 by Intra2net AG
    email                : info@intra2net.com
 ***************************************************************************/

#include <iostream>
#include <string>
#include <sstream>
#include <stdexcept>

#include <stdlib.h>
#include <iconv.h>
#include <i18n.h>

#include <stringfunc.hxx>

using namespace std;

std::string iso_to_utf8(const std::string& isostring)
{
    string result;

    iconv_t i2utf8 = iconv_open ("UTF-8", "ISO-8859-1");

    if (iso_to_utf8 == (iconv_t)-1)
        throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");

    size_t in_size=isostring.size();
    size_t out_size=in_size*4;

    char *buf = (char *)malloc(out_size+1);
    if (buf == NULL)
        throw runtime_error("out of memory for iconv buffer");

    const char *in = isostring.c_str();
    char *out = buf;
    iconv (i2utf8, &in, &in_size, &out, &out_size);

    buf[isostring.size()*4-out_size]=0;

    result=buf;

    free(buf);
    iconv_close (i2utf8);

    return result;
}

std::string utf8_to_iso(const std::string& utf8string)
{
    string result;

    iconv_t utf82iso = iconv_open ("ISO-8859-1","UTF-8");

    if (utf82iso == (iconv_t)-1)
        throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");

    size_t in_size=utf8string.size();
    size_t out_size=in_size;

    char *buf = (char *)malloc(out_size+1);
    if (buf == NULL)
        throw runtime_error("out of memory for iconv buffer");

    const char *in = utf8string.c_str();
    char *out = buf;
    iconv (utf82iso, &in, &in_size, &out, &out_size);

    buf[utf8string.size()-out_size]=0;

    result=buf;

    free(buf);
    iconv_close (utf82iso);

    return result;
}

std::string utf7imap_to_iso(const std::string& utf7imapstring)
{
    string result;

    iconv_t utf7imap2iso = iconv_open ("ISO-8859-1","UTF-7-IMAP");

    if (utf7imap2iso == (iconv_t)-1)
        throw runtime_error("iconv can't convert from UTF-7-IMAP to ISO-8859-1");

    size_t in_size=utf7imapstring.size();
    size_t out_size=in_size;

    char *buf = (char *)malloc(out_size+1);
    if (buf == NULL)
        throw runtime_error("out of memory for iconv buffer");

    const char *in = utf7imapstring.c_str();
    char *out = buf;
    iconv (utf7imap2iso, &in, &in_size, &out, &out_size);

    buf[utf7imapstring.size()-out_size]=0;

    result=buf;

    free(buf);
    iconv_close (utf7imap2iso);

    return result;
}

// DEPRECATED, WILL BE REMOVED TOMORROW!
std::string iso_to_html(const std::string& isostring, bool showerr_bug)
{
    string result = isostring;

    // TODO: This needs to be removed soon by a proper 
    // HTML quoted chars engine. Then we can also remove &uuml; from i18n files.
    if (!showerr_bug) {
        replace_all (result, "&", "&amp;");
        replace_all (result, "\"", "&quot;");
        replace_all (result, "<", "&lt;");
        replace_all (result, ">", "&gt;");
    }

    replace_all (result, utf8_to_iso("ä"), "&auml;");
    replace_all (result, utf8_to_iso("ö"), "&ouml;");
    replace_all (result, utf8_to_iso("ü"), "&uuml;");
    replace_all (result, utf8_to_iso("Ä"), "&Auml;");
    replace_all (result, utf8_to_iso("Ö"), "&Ouml;");
    replace_all (result, utf8_to_iso("Ü"), "&Uuml;");
    replace_all (result, utf8_to_iso("ß"), "&szlig;");

    return result;
}

// Tokenize string by (html) tags
void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
{
    string::size_type pos, len = input.size();
    bool inside_tag = false;
    string current;

    for (pos = 0; pos < len; pos++) {
        if (input[pos] == '<') {
            inside_tag = true;

            if (!current.empty()) {
                tokenized.push_back(make_pair(current, false));
                current = "";
            }

            current += input[pos];
        } else if (input[pos] == '>' && inside_tag) {
            current += input[pos];
            inside_tag = false;
            if (!current.empty()) {
                tokenized.push_back(make_pair(current, true));
                current = "";
            }
        } else
            current += input[pos];
    }

    // String left over in buffer?
    if (!current.empty())
        tokenized.push_back(make_pair(current, false));
}

std::string strip_html_tags(const std::string &input)
{
    // Pair first: string, second: isTag
    vector<pair<string,bool> > tokenized;
    tokenize_by_tag(tokenized, input);

    string output;
    vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
    for (token = tokenized.begin(); token != tokens_end; token++)
        if (!token->second)
            output += token->first;

    return output;
}

// Smart-encode HTML en
string smart_html_entities(const std::string &input)
{
    // Pair first: string, second: isTag
    vector<pair<string,bool> > tokenized;
    tokenize_by_tag(tokenized, input);

    string output;
    vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
    for (token = tokenized.begin(); token != tokens_end; token++) {
        // keep HTML tags as they are
        if (token->second)
            output += token->first;
        else
            output += html_entities(token->first);
    }

    return output;
}

// encoded UTF-8 chars into HTML entities
string html_entities(std::string str)
{
    // Normal chars
    replace_all (str, "&", "&amp;");
    replace_all (str, "\"", "&quot;");
    replace_all (str, "<", "&lt;");
    replace_all (str, ">", "&gt;");

    // Umlauts
    replace_all (str, "ä", "&auml;");
    replace_all (str, "ö", "&ouml;");
    replace_all (str, "ü", "&uuml;");
    replace_all (str, "Ä", "&Auml;");
    replace_all (str, "Ö", "&Ouml;");
    replace_all (str, "Ü", "&Uuml;");

    // Misc
    replace_all (str, "ß", "&szlig;");

    return str;
}

bool replace_all(string &base, const char *ist, const char *soll)
{
    string i=ist;
    string s=soll;
    return replace_all(base,&i,&s);
}

bool replace_all(string &base, const string &ist, const char *soll)
{
    string s=soll;
    return replace_all(base,&ist,&s);
}

bool replace_all(string &base, const string *ist, const string *soll)
{
    return replace_all(base,*ist,*soll);
}

bool replace_all(string &base, const char *ist, const string *soll)
{
    string i=ist;
    return replace_all(base,&i,soll);
}

bool replace_all(string &base, const string &ist, const string &soll)
{
    bool found_ist = false;
    string::size_type a=0;

    if (ist.empty())
        throw runtime_error("replace_all called with empty search string");

    while((a=base.find(ist,a))!=string::npos)
    {
        base.replace(a,ist.size(),soll);
        a=a+soll.size();
        found_ist = true;
    }
    
    return found_ist;
}

string to_lower(const string &src)
{
    string dst = src;

    string::size_type pos = 0, end = dst.size();
    for (pos = 0; pos < end; pos++)
        dst[pos] = tolower(dst[pos]);

    return dst;
}

string to_upper(const string &src)
{
    string dst = src;

    string::size_type pos = 0, end = dst.size();
    for (pos = 0; pos < end; pos++)
        dst[pos] = toupper(dst[pos]);

    return dst;
}

string nice_unit_format (int input) {
    float size = input;
    int sizecount = 0;

    while (size > 1000) {
        size = size / 1000;
        sizecount++;
    }

    float tmp;                       // round
    tmp = size*10;
    tmp += 0.5;
    tmp = int (tmp);
    tmp = float(tmp)/float(10);
    size = tmp;

    ostringstream out;

    out.setf (ios::fixed);
    out.precision(2);
    switch (sizecount) {
    case 1:
        out << size << i18n(" KBytes");
        break;
    case 2:
        out << size << i18n(" MBytes");
        break;
    case 3:
        out << size << i18n(" Gbytes");
        break;
    default:
        out << size << i18n(" Bytes");
        break;
    }

    return out.str();
}

string escape(const string &s)
{
    string out(s);
    string::size_type p;

    p=0;
    while ((p=out.find_first_of("\"\\",p))!=out.npos)
    {
        out.insert(p,"\\");
        p+=2;
    }

    p=0;
    while ((p=out.find_first_of("\r",p))!=out.npos)
    {
        out.replace(p,1,"\\r");
        p+=2;
    }

    p=0;
    while ((p=out.find_first_of("\n",p))!=out.npos)
    {
        out.replace(p,1,"\\n");
        p+=2;
    }

    out='"'+out+'"';

    return out;
}

string descape(const string &s, int startpos, int &endpos)
{
    string out;

    if (s.at(startpos) != '"')
        throw out_of_range("value not type escaped string");

    out=s.substr(startpos+1);
    string::size_type p=0;

    // search for the end of the string
    while((p=out.find("\"",p))!=out.npos)
    {
        int e=p-1;
        bool escaped=false;

        // the " might be escaped with a backslash
        while(e>=0 && out.at(e)=='\\')
        {
            if (escaped == false)
                escaped=true;
            else
                escaped=false;

            e--;
        }

        if (escaped==false)
            break;
        else
            p++;
    }

    // we now have the end of the string
    out=out.substr(0,p);

    // tell calling prog about the endposition
    endpos=startpos+p+1;

    // descape all \ stuff inside the string now
    p=0;
    while((p=out.find_first_of("\\",p))!=out.npos)
    {
        switch(out.at(p+1))
        {
        case 'r':
            out.replace(p,2,"\r");
            break;
        case 'n':
            out.replace(p,2,"\n");
            break;
        default:
            out.erase(p,1);
        }
        p++;
    }

    return out;
}

string escape_shellarg(const string &input)
{
    if (!input.size())
        return "";
    
    string output = "'";
    string::const_iterator it, it_end = input.end();
    for (it = input.begin(); it != it_end; it++) {
        if ((*it) == '\'')
            output += "'\\'";
        
        output += *it;
    }
    
    output += "'";
    return output;
}
Commit	Line	Data
e93545dd GE	1	/***************************************************************************
	2	escape.cpp - escaping of strings
	3	-------------------
	4	begin : Sun Nov 14 1999
	5	copyright : (C) 1999 by Intra2net AG
	6	email : info@intra2net.com
	7	***************************************************************************/
	8
	9	#include <iostream>
	10	#include <string>
	11	#include <sstream>
	12	#include <stdexcept>
	13
	14	#include <stdlib.h>
	15	#include <iconv.h>
	16	#include <i18n.h>
	17
	18	#include <stringfunc.hxx>
	19
	20	using namespace std;
	21
	22	std::string iso_to_utf8(const std::string& isostring)
	23	{
	24	string result;
118e216e	25
e93545dd	26	iconv_t i2utf8 = iconv_open ("UTF-8", "ISO-8859-1");
118e216e	27
e93545dd GE	28	if (iso_to_utf8 == (iconv_t)-1)
e93545dd GE	29	throw runtime_error("iconv can't convert from ISO-8859-1 to UTF-8");
118e216e	30
e93545dd GE	31	size_t in_size=isostring.size();
e93545dd GE	32	size_t out_size=in_size*4;
118e216e	33
e93545dd GE	34	char buf = (char )malloc(out_size+1);
	35	if (buf == NULL)
	36	throw runtime_error("out of memory for iconv buffer");
	37
	38	const char *in = isostring.c_str();
	39	char *out = buf;
	40	iconv (i2utf8, &in, &in_size, &out, &out_size);
118e216e	41
e93545dd	42	buf[isostring.size()*4-out_size]=0;
118e216e	43
e93545dd	44	result=buf;
118e216e	45
e93545dd GE	46	free(buf);
e93545dd GE	47	iconv_close (i2utf8);
118e216e	48
e93545dd GE	49	return result;
	50	}
	51
	52	std::string utf8_to_iso(const std::string& utf8string)
	53	{
	54	string result;
118e216e	55
e93545dd	56	iconv_t utf82iso = iconv_open ("ISO-8859-1","UTF-8");
118e216e	57
e93545dd GE	58	if (utf82iso == (iconv_t)-1)
e93545dd GE	59	throw runtime_error("iconv can't convert from UTF-8 to ISO-8859-1");
118e216e	60
e93545dd GE	61	size_t in_size=utf8string.size();
e93545dd GE	62	size_t out_size=in_size;
118e216e	63
e93545dd GE	64	char buf = (char )malloc(out_size+1);
	65	if (buf == NULL)
	66	throw runtime_error("out of memory for iconv buffer");
	67
	68	const char *in = utf8string.c_str();
	69	char *out = buf;
	70	iconv (utf82iso, &in, &in_size, &out, &out_size);
118e216e	71
e93545dd	72	buf[utf8string.size()-out_size]=0;
118e216e	73
e93545dd	74	result=buf;
118e216e	75
e93545dd GE	76	free(buf);
	77	iconv_close (utf82iso);
	78
	79	return result;
	80	}
	81
d116a071 TJ	82	std::string utf7imap_to_iso(const std::string& utf7imapstring)
	83	{
	84	string result;
118e216e	85
d116a071	86	iconv_t utf7imap2iso = iconv_open ("ISO-8859-1","UTF-7-IMAP");
118e216e	87
d116a071 TJ	88	if (utf7imap2iso == (iconv_t)-1)
d116a071 TJ	89	throw runtime_error("iconv can't convert from UTF-7-IMAP to ISO-8859-1");
118e216e	90
d116a071 TJ	91	size_t in_size=utf7imapstring.size();
d116a071 TJ	92	size_t out_size=in_size;
118e216e	93
d116a071 TJ	94	char buf = (char )malloc(out_size+1);
	95	if (buf == NULL)
	96	throw runtime_error("out of memory for iconv buffer");
	97
	98	const char *in = utf7imapstring.c_str();
	99	char *out = buf;
	100	iconv (utf7imap2iso, &in, &in_size, &out, &out_size);
118e216e	101
d116a071	102	buf[utf7imapstring.size()-out_size]=0;
118e216e	103
d116a071	104	result=buf;
118e216e	105
d116a071 TJ	106	free(buf);
	107	iconv_close (utf7imap2iso);
	108
	109	return result;
	110	}
	111
118e216e TJ	112	// DEPRECATED, WILL BE REMOVED TOMORROW!
	113	std::string iso_to_html(const std::string& isostring, bool showerr_bug)
	114	{
	115	string result = isostring;
	116
	117	// TODO: This needs to be removed soon by a proper
	118	// HTML quoted chars engine. Then we can also remove ü from i18n files.
	119	if (!showerr_bug) {
	120	replace_all (result, "&", "&");
	121	replace_all (result, "\"", """);
	122	replace_all (result, "<", "<");
	123	replace_all (result, ">", ">");
	124	}
	125
	126	replace_all (result, utf8_to_iso("ä"), "ä");
	127	replace_all (result, utf8_to_iso("ö"), "ö");
	128	replace_all (result, utf8_to_iso("ü"), "ü");
	129	replace_all (result, utf8_to_iso("Ä"), "Ä");
	130	replace_all (result, utf8_to_iso("Ö"), "Ö");
	131	replace_all (result, utf8_to_iso("Ü"), "Ü");
	132	replace_all (result, utf8_to_iso("ß"), "ß");
	133
	134	return result;
	135	}
	136
	137	// Tokenize string by (html) tags
	138	void tokenize_by_tag(vector<pair<string,bool> > &tokenized, const std::string &input)
	139	{
	140	string::size_type pos, len = input.size();
	141	bool inside_tag = false;
	142	string current;
	143
	144	for (pos = 0; pos < len; pos++) {
	145	if (input[pos] == '<') {
	146	inside_tag = true;
	147
	148	if (!current.empty()) {
	149	tokenized.push_back(make_pair(current, false));
	150	current = "";
	151	}
	152
	153	current += input[pos];
	154	} else if (input[pos] == '>' && inside_tag) {
	155	current += input[pos];
	156	inside_tag = false;
	157	if (!current.empty()) {
	158	tokenized.push_back(make_pair(current, true));
	159	current = "";
	160	}
	161	} else
	162	current += input[pos];
	163	}
	164
	165	// String left over in buffer?
	166	if (!current.empty())
	167	tokenized.push_back(make_pair(current, false));
	168	}
	169
	170	std::string strip_html_tags(const std::string &input)
	171	{
	172	// Pair first: string, second: isTag
	173	vector<pair<string,bool> > tokenized;
	174	tokenize_by_tag(tokenized, input);
	175
176	string output;
177	vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
178	for (token = tokenized.begin(); token != tokens_end; token++)
179	if (!token->second)
180	output += token->first;
181
182	return output;
183	}
184
185	// Smart-encode HTML en
186	string smart_html_entities(const std::string &input)
187	{
188	// Pair first: string, second: isTag
189	vector<pair<string,bool> > tokenized;
190	tokenize_by_tag(tokenized, input);
191
192	string output;
193	vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
194	for (token = tokenized.begin(); token != tokens_end; token++) {
195	// keep HTML tags as they are
196	if (token->second)
197	output += token->first;
198	else
199	output += html_entities(token->first);
200	}
201
202	return output;
203	}
204
205	// encoded UTF-8 chars into HTML entities
206	string html_entities(std::string str)
207	{
208	// Normal chars
209	replace_all (str, "&", "&");
210	replace_all (str, "\"", """);
211	replace_all (str, "<", "<");
212	replace_all (str, ">", ">");
213
214	// Umlauts
215	replace_all (str, "ä", "ä");
216	replace_all (str, "ö", "ö");
217	replace_all (str, "ü", "ü");
218	replace_all (str, "Ä", "Ä");
219	replace_all (str, "Ö", "Ö");
220	replace_all (str, "Ü", "Ü");
221
222	// Misc
223	replace_all (str, "ß", "ß");
224
225	return str;
226	}
227
e93545dd GE	228	bool replace_all(string &base, const char ist, const char soll)
	229	{
	230	string i=ist;
	231	string s=soll;
	232	return replace_all(base,&i,&s);
	233	}
	234
	235	bool replace_all(string &base, const string &ist, const char *soll)
	236	{
	237	string s=soll;
	238	return replace_all(base,&ist,&s);
	239	}
	240
	241	bool replace_all(string &base, const string ist, const string soll)
	242	{
	243	return replace_all(base,ist,soll);
	244	}
	245
	246	bool replace_all(string &base, const char ist, const string soll)
	247	{
	248	string i=ist;
	249	return replace_all(base,&i,soll);
	250	}
	251
	252	bool replace_all(string &base, const string &ist, const string &soll)
	253	{
	254	bool found_ist = false;
	255	string::size_type a=0;
	256
1ec2064e TJ	257	if (ist.empty())
	258	throw runtime_error("replace_all called with empty search string");
	259
e93545dd GE	260	while((a=base.find(ist,a))!=string::npos)
	261	{
	262	base.replace(a,ist.size(),soll);
	263	a=a+soll.size();
	264	found_ist = true;
	265	}
	266
	267	return found_ist;
	268	}
	269
	270	string to_lower(const string &src)
	271	{
	272	string dst = src;
	273
	274	string::size_type pos = 0, end = dst.size();
	275	for (pos = 0; pos < end; pos++)
	276	dst[pos] = tolower(dst[pos]);
	277
	278	return dst;
	279	}
	280
	281	string to_upper(const string &src)
	282	{
	283	string dst = src;
	284
	285	string::size_type pos = 0, end = dst.size();
	286	for (pos = 0; pos < end; pos++)
	287	dst[pos] = toupper(dst[pos]);
	288
	289	return dst;
	290	}
	291
	292	string nice_unit_format (int input) {
	293	float size = input;
	294	int sizecount = 0;
	295
	296	while (size > 1000) {
	297	size = size / 1000;
	298	sizecount++;
	299	}
	300
	301	float tmp; // round
	302	tmp = size*10;
	303	tmp += 0.5;
	304	tmp = int (tmp);
	305	tmp = float(tmp)/float(10);
	306	size = tmp;
	307
	308	ostringstream out;
	309
	310	out.setf (ios::fixed);
	311	out.precision(2);
	312	switch (sizecount) {
	313	case 1:
	314	out << size << i18n(" KBytes");
	315	break;
	316	case 2:
	317	out << size << i18n(" MBytes");
	318	break;
	319	case 3:
	320	out << size << i18n(" Gbytes");
	321	break;
	322	default:
	323	out << size << i18n(" Bytes");
324	break;
325	}
326
327	return out.str();
328	}
329
47c07fba GE	330	string escape(const string &s)
	331	{
	332	string out(s);
	333	string::size_type p;
	334
	335	p=0;
	336	while ((p=out.find_first_of("\"\\",p))!=out.npos)
	337	{
	338	out.insert(p,"\\");
	339	p+=2;
	340	}
	341
	342	p=0;
	343	while ((p=out.find_first_of("\r",p))!=out.npos)
	344	{
	345	out.replace(p,1,"\\r");
	346	p+=2;
	347	}
	348
	349	p=0;
	350	while ((p=out.find_first_of("\n",p))!=out.npos)
	351	{
	352	out.replace(p,1,"\\n");
	353	p+=2;
	354	}
	355
	356	out='"'+out+'"';
	357
	358	return out;
	359	}
	360
	361	string descape(const string &s, int startpos, int &endpos)
	362	{
	363	string out;
	364
	365	if (s.at(startpos) != '"')
	366	throw out_of_range("value not type escaped string");
	367
	368	out=s.substr(startpos+1);
	369	string::size_type p=0;
	370
	371	// search for the end of the string
	372	while((p=out.find("\"",p))!=out.npos)
	373	{
	374	int e=p-1;
	375	bool escaped=false;
	376
	377	// the " might be escaped with a backslash
	378	while(e>=0 && out.at(e)=='\\')
	379	{
	380	if (escaped == false)
	381	escaped=true;
	382	else
	383	escaped=false;
	384
	385	e--;
	386	}
	387
	388	if (escaped==false)
	389	break;
	390	else
	391	p++;
	392	}
	393
394	// we now have the end of the string
395	out=out.substr(0,p);
396
397	// tell calling prog about the endposition
398	endpos=startpos+p+1;
399
400	// descape all \ stuff inside the string now
401	p=0;
402	while((p=out.find_first_of("\\",p))!=out.npos)
403	{
404	switch(out.at(p+1))
405	{
406	case 'r':
407	out.replace(p,2,"\r");
408	break;
409	case 'n':
410	out.replace(p,2,"\n");
411	break;
412	default:
413	out.erase(p,1);
414	}
415	p++;
416	}
417
418	return out;
419	}
e93545dd	420
47c07fba GE	421	string escape_shellarg(const string &input)
	422	{
	423	if (!input.size())
	424	return "";
	425
	426	string output = "'";
	427	string::const_iterator it, it_end = input.end();
	428	for (it = input.begin(); it != it_end; it++) {
	429	if ((*it) == '\'')
	430	output += "'\\'";
	431
	432	output += *it;
	433	}
	434
	435	output += "'";
	436	return output;
	437	}