+/*
+The software in this package is distributed under the GNU General
+Public License version 2 (with a special exception described below).
+
+A copy of GNU General Public License (GPL) is included in this distribution,
+in the file COPYING.GPL.
+
+As a special exception, if other files instantiate templates or use macros
+or inline functions from this file, or you compile this file and link it
+with other works to produce a work based on this file, this file
+does not by itself cause the resulting work to be covered
+by the GNU General Public License.
+
+However the source code for this file must still be made available
+in accordance with section (3) of the GNU General Public License.
+
+This exception does not invalidate any other reasons why a work based
+on this file might be covered by the GNU General Public License.
+*/
/** @file
*
* (c) Copyright 2007-2008 by Intra2net AG
- *
- * info@intra2net.com
*/
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <algorithm>
+#include <cmath> // for round()
+#include <climits>
#include <wchar.h>
#include <stdlib.h>
#include <iconv.h>
#include <i18n.h>
+#include <boost/numeric/conversion/cast.hpp>
+#include <boost/foreach.hpp>
+
+#include <boost/assert.hpp>
+#include <boost/shared_ptr.hpp>
+#include <openssl/bio.h>
+#include <openssl/evp.h>
+
#include <stringfunc.hxx>
using namespace std;
} // eo split_string(const std::string&,std::list< std::string >&,const std::string&,bool,const std::string&)
+/** call split_string with list<string>, converts result to vector; vector is clear()-ed first
+ *
+ * Note: Uses 3 O(n)-operations: list.size, vector.resize and std::swap_ranges;
+ * not sure whether there is a better way to do this
+ * */
+void split_string(
+ const std::string& str,
+ std::vector<std::string>& result,
+ const std::string& delimiter,
+ bool omit_empty,
+ const std::string& trim_list
+)
+{
+ std::list<std::string> tmp;
+ split_string(str, tmp, delimiter, omit_empty, trim_list);
+ std::size_t size = tmp.size(); // this is O(n)
+ result.clear();
+ result.resize(size); // also O(n)
+ std::swap_ranges(tmp.begin(), tmp.end(), result.begin()); // also O(n)
+}
+
/**
* splits a string by a given delimiter
* @param str the string which should be splitted.
} // eo split_string(const std::string&,const std::string&,bool,const std::string&)
-/**
- * @brief joins a list of strings into a single string.
- *
- * This funtion is (basically) the reverse operation of @a split_string.
- *
- * @param parts the list of strings.
- * @param delimiter the delimiter which is inserted between the strings.
- * @return the joined string.
- */
-std::string join_string(
- const std::list< std::string >& parts,
+std::string join_string (
+ const char *const parts[], /* assumed NULL-terminated */
const std::string& delimiter
)
{
std::string result;
- if (! parts.empty() )
+
+ if (parts != NULL)
{
- std::list< std::string >::const_iterator it= parts.begin();
- result = *it;
- while ( ++it != parts.end() )
- {
- result+= delimiter;
- result+= *it;
- }
+ const char *const *cur = parts;
+
+ if (*cur != NULL) {
+ result = std::string (*cur);
+
+ while (*++cur != NULL) {
+ result += delimiter;
+ result += std::string (*cur);
+ }
+ }
}
+
return result;
-} // eo join_string(const std::list< std::string >&,const std::string&)
+}
} // eo convert_hex_to_binary(const std::string&)
+static list<string>& alloc_template_starts()
+{
+ static list<string> result;
+ if (result.empty())
+ {
+ result.push_back("std::list");
+ result.push_back("std::vector");
+ }
+ return result;
+}
+
+string shorten_stl_types(const string &input)
+{
+ string output = input;
+
+ // first: replace fixed string for std::string
+ replace_all(output, "std::basic_string<char, std::char_traits<char>, std::allocator<char> >",
+ "std::string");
+
+ // loop over list/vector/... that have an allocator, e.g.
+ // std::list< some_type_here, std::allocator<some_type_here> >
+ string::size_type start, comma, end, len, start_text_len;
+ int n_open_brackets;
+ string allocator_text;
+ BOOST_FOREACH(const string &start_text, alloc_template_starts())
+ {
+ start = 0;
+ comma = 0;
+ end = 0;
+ start_text_len = start_text.length();
+ while( (start=output.find(start_text+"<", start)) != string::npos )
+ {
+ len = output.length();
+ start += start_text_len+1; // start next iter and tests here after opening bracket
+
+ // now comes the tricky part: find matching ',' and the closing '>' even if "subtype" is template again
+ comma = start;
+ n_open_brackets = 1; // the bracket right after start_text counts as first
+ while (comma < len && n_open_brackets > 0)
+ {
+ if (output[comma] == ',' && n_open_brackets == 1)
+ break;
+ else if (output[comma] == '<')
+ ++n_open_brackets;
+ else if (output[comma] == '>')
+ --n_open_brackets;
+ ++comma;
+ }
+ end = comma+1;
+ while (end < len && n_open_brackets > 0)
+ {
+ if (output[end] == '<')
+ ++n_open_brackets;
+ else if (output[end] == '>')
+ {
+ --n_open_brackets;
+ if (n_open_brackets == 0)
+ break; // do not increment end
+ }
+ ++end;
+ }
+
+ // check that start < comma < end < len && n_open_brackets == 0
+ if (start >= comma || comma >= end || end >= len || n_open_brackets != 0)
+ continue; // input seems to be of unexpected form
+
+ // check that type in allocator is same as until comma
+ string type = output.substr(start, comma-start);
+ if (type[type.length()-1] == '>')
+ allocator_text = string("std::allocator<") + type + " > ";
+ else
+ allocator_text = string("std::allocator<") + type + "> ";
+ if (output.substr(comma+2, end-comma-2) == allocator_text)
+ output.replace(comma+2, end-comma-2, "_alloc_");
+ }
+ }
+
+ return output;
+}
+
+typedef boost::shared_ptr<BIO> BIO_Ptr;
+
+/**
+* @brief Converts openssl generic input/output to std::string
+*
+* Code adapted from keymakerd.
+*
+* @param bio Openssl's generic input/output
+* @return :string STL string
+**/
+static std::string _convert_BIO_to_string(BIO *input)
+{
+ std::string rtn;
+
+ char *output = NULL;
+ long written = BIO_get_mem_data(input, &output);
+ if (written <= 0 || output == NULL)
+ return rtn;
+
+ rtn.assign(output, written); //lint !e534 !e732
+ return rtn;
+} //lint !e1764
+
+/**
+ * @brief base64 encode a string using OpenSSL base64 functions
+ *
+ * Data size limit is 2GB on 32 bit (LONG_MAX)
+ *
+ * @param input String to encode
+ * @param one_line Encode all data as one line, no wrapping with line feeds
+ * @return base64 encoded string
+ */
+std::string base64_encode(const std::string &input, bool one_line)
+{
+ // check for empty buffer
+ if (input.empty())
+ return input;
+
+ // safety check to ensure our check afer BIO_write() works
+ if (input.size() >= LONG_MAX)
+ throw runtime_error("base64 encode: Too much data");
+
+ // setup encoder. Note: BIO_free_all frees both BIOs.
+ BIO_Ptr base64_encoder(BIO_new(BIO_f_base64()), BIO_free_all);
+ BIO *encoder_bio = base64_encoder.get();
+ if (one_line)
+ BIO_set_flags(encoder_bio, BIO_FLAGS_BASE64_NO_NL);
+
+ // chain output buffer and encoder together
+ BIO *encoded_result = BIO_new(BIO_s_mem());
+ BIO_push(encoder_bio, encoded_result);
+
+ // encode
+ long written = BIO_write(encoder_bio, input.c_str(), input.size());
+ if ((unsigned)written != input.size())
+ {
+ ostringstream out;
+ out << "base64 encoding failed: input size: "
+ << input.size() << " vs. output size: " << written;
+ throw runtime_error(out.str());
+ }
+ if (BIO_flush(encoder_bio) != 1)
+ throw runtime_error("base64 encode: BIO_flush() failed");
+
+ return _convert_BIO_to_string(encoded_result);
+}
+
+/**
+ * @brief base64 decode a string using OpenSSL base64 functions
+ *
+ * @param input String to decode
+ * @param one_line Expect all base64 data in one line. Input with line feeds will fail.
+ * @return base64 decoded string
+ */
+std::string base64_decode(const std::string &input, bool one_line)
+{
+ // check for empty buffer
+ if (input.empty())
+ return input;
+
+ // safety check for BIO_new_mem_buf()
+ if (input.size() >= INT_MAX)
+ throw runtime_error("base64 decode: Too much data");
+
+ // setup encoder. Note: BIO_free_all frees both BIOs.
+ BIO_Ptr base64_decoder(BIO_new(BIO_f_base64()), BIO_free_all);
+ BIO *bio_base64 = base64_decoder.get();
+ if (one_line)
+ BIO_set_flags(bio_base64, BIO_FLAGS_BASE64_NO_NL);
+
+ // chain input buffer and decoder together
+ BIO *bio_input = BIO_new_mem_buf((void*)input.c_str(), input.size());
+ bio_input = BIO_push(bio_base64, bio_input);
+
+ BIO_Ptr decoded_result(BIO_new(BIO_s_mem()), BIO_free_all);
+ BIO *bio_decoded = decoded_result.get();
+ const int convbuf_size = 512;
+ char convbuf[convbuf_size];
+
+ long read_bytes = 0;
+ while((read_bytes = BIO_read(bio_input, convbuf, convbuf_size)) > 0)
+ {
+ BOOST_ASSERT(read_bytes <= convbuf_size);
+ long written_bytes = BIO_write(bio_decoded, convbuf, read_bytes);
+ if (written_bytes != read_bytes)
+ {
+ ostringstream out;
+ out << "base64 decoding failed: read_bytes: "
+ << read_bytes << " vs. written_bytes: " << written_bytes;
+ throw runtime_error(out.str());
+ }
+ }
+ if (read_bytes == -2 || read_bytes == -1)
+ throw runtime_error("base64 decode: Error during decoding");
+
+ return _convert_BIO_to_string(bio_decoded);
+}
+
} // eo namespace I2n
char *in = (char *)utf8string.c_str();
char *out = (char*) buf;
- if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == -1)
+ if (iconv(utf82wstr, &in, &in_size, &out, &out_size) == (size_t)-1)
throw runtime_error("error converting char encodings");
buf[ ( (utf8string.size()+1)*sizeof(wchar_t)-out_size) /sizeof(wchar_t) ]=0;
string output;
vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
- for (token = tokenized.begin(); token != tokens_end; token++)
+ for (token = tokenized.begin(); token != tokens_end; ++token)
if (!token->second)
output += token->first;
string output;
vector<pair<string,bool> >::const_iterator token, tokens_end = tokenized.end();
- for (token = tokenized.begin(); token != tokens_end; token++)
+ for (token = tokenized.begin(); token != tokens_end; ++token)
{
// keep HTML tags as they are
if (token->second)
return str;
} // eo html_entities(std::string)
+// convert HTML entities to something that can be viewed on a basic text console (restricted to ASCII-7)
+string html_entities_to_console(std::string str)
+{
+ // Normal chars
+ replace_all (str, "&", "&");
+ replace_all (str, "<", "<");
+ replace_all (str, ">", ">");
+ replace_all (str, """, "\"");
+ replace_all (str, "'", "'");
+ replace_all (str, "/", "/");
+
+ // Umlauts
+ replace_all (str, "ä", "ae");
+ replace_all (str, "ö", "oe");
+ replace_all (str, "ü", "ue");
+ replace_all (str, "Ä", "Ae");
+ replace_all (str, "Ö", "Oe");
+ replace_all (str, "Ü", "Ue");
+
+ // Misc
+ replace_all (str, "ß", "ss");
+
+ return str;
+}
+
+// find_html_comments + remove_html_comments(str, comments)
+void remove_html_comments(string &str)
+{
+ vector<CommentZone> comments = find_html_comments(str);
+ remove_html_comments(str, comments);
+}
+
+// find all html comments, behaving correctly if they are nested; ignores comment tags ("<!--FOO .... BAR-->")
+// If there are invalid comments ("-->" before "<!--" or different number of closing and opening tags),
+// then the unknown index of corresponding start/end tag will be represented by a string::npos
+// Indices are from start of start tag until first index after closing tag
+vector<CommentZone> find_html_comments(const std::string &str)
+{
+ static const string START = "<!--";
+ static const string CLOSE = "-->";
+ static const string::size_type START_LEN = START.length();
+ static const string::size_type CLOSE_LEN = CLOSE.length();
+
+ vector<CommentZone> comments;
+
+ // in order to find nested comments, need either recursion or a stack
+ vector<string::size_type> starts; // stack of start tags
+
+ string::size_type pos = 0;
+ string::size_type len = str.length();
+ string::size_type next_start, next_close;
+
+ while (pos < len) // not really needed but just in case
+ {
+ next_start = str.find(START, pos);
+ next_close = str.find(CLOSE, pos);
+
+ if ( (next_start == string::npos) && (next_close == string::npos) )
+ break; // we are done
+
+ else if ( (next_start == string::npos) || (next_close < next_start) ) // close one comment (pop)
+ {
+ if (starts.empty()) // closing tag without a start
+ comments.push_back(CommentZone(string::npos, next_close+CLOSE_LEN));
+ else
+ {
+ comments.push_back(CommentZone(starts.back(), next_close+CLOSE_LEN));
+ starts.pop_back();
+ }
+ pos = next_close + CLOSE_LEN;
+ }
+
+ else if ( (next_close == string::npos) || (next_start < next_close) ) // start a new comment (push)
+ {
+ starts.push_back(next_start);
+ pos = next_start + START_LEN;
+ }
+ }
+
+ // add comments that have no closing tag from back to front (important for remove_html_comments!)
+ while (!starts.empty())
+ {
+ comments.push_back(CommentZone(starts.back(), string::npos));
+ starts.pop_back();
+ }
+
+ return comments;
+}
+
+// remove all html comments foundby find_html_comments
+void remove_html_comments(std::string &str, const vector<CommentZone> &comments)
+{
+ // remember position where last removal started
+ string::size_type last_removal_start = str.length();
+
+ // Go from back to front to not mess up indices.
+ // This requires that bigger comments, that contain smaller comments, come AFTER
+ // the small contained comments in the comments vector (i.e. comments are ordered by
+ // their closing tag, not their opening tag). This is true for results from find_html_comments
+ BOOST_REVERSE_FOREACH(const CommentZone &comment, comments)
+ {
+ if (comment.first == string::npos)
+ {
+ str = str.replace(0, comment.second, ""); // comment starts "before" str --> delete from start
+ break; // there can be no more
+ }
+ else if (comment.first >= last_removal_start)
+ {
+ continue; // this comment is inside another comment that we have removed already
+ }
+ else if (comment.second == string::npos) // comment ends "after" str --> delete until end
+ {
+ str = str.replace(comment.first, string::npos, "");
+ last_removal_start = comment.first;
+ }
+ else
+ {
+ str = str.replace(comment.first, comment.second-comment.first, "");
+ last_removal_start = comment.first;
+ }
+ }
+}
bool replace_all(string &base, const char *ist, const char *soll)
{
return found_ist;
}
+/**
+ * @brief replaces all characters that could be problematic or impose a security risk when being logged
+ * @param str the original string
+ * @param replace_with the character to replace the unsafe chars with
+ * @return a string that is safe to send to syslog or other logfiles
+ *
+ * All chars between 0x20 (space) and 0x7E (~) (including) are considered safe for logging.
+ * See e.g. RFC 5424, section 8.2 or the posix character class "printable".
+ * This eliminates all possible problems with NUL, control characters, 8 bit chars, UTF8.
+ *
+ */
+std::string sanitize_for_logging(const std::string &str, const char replace_with)
+{
+ std::string output=str;
+
+ const string::size_type len = output.size();
+ for (std::string::size_type p=0; p < len; p++)
+ if (output[p] < 0x20 || output[p] > 0x7E)
+ output[p]=replace_with;
+
+ return output;
+}
+
#if 0
string to_lower(const string &src)
{
}
#endif
-const int MAX_UNIT_FORMAT_SYMBOLS = 9;
+const int MAX_UNIT_FORMAT_SYMBOLS = 6;
const string shortUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
" B",
" MB",
" GB",
" TB",
- " PB",
- " EB",
- " ZB",
- " YB"
+ " PB"
};
const string longUnitFormatSymbols[MAX_UNIT_FORMAT_SYMBOLS] = {
i18n_noop(" MBytes"),
i18n_noop(" GBytes"),
i18n_noop(" TBytes"),
- i18n_noop(" PBytes"),
- i18n_noop(" EBytes"),
- i18n_noop(" ZBytes"),
- i18n_noop(" YBytes")
+ i18n_noop(" PBytes")
};
-long double rounding_upwards(
- long double number,
+static long double rounding_upwards(
+ const long double number,
const int rounding_multiplier
)
{
string nice_unit_format(
const int64_t input,
- const UnitBase base,
- const UnitFormat format
+ const UnitFormat format,
+ const UnitBase base
)
{
// select the system of units (decimal or binary)
{
size = size / multiple;
sizecount++;
+
+ // rollback to the previous values and stop the loop when cannot
+ // represent the number length.
+ if (sizecount >= MAX_UNIT_FORMAT_SYMBOLS)
+ {
+ size = size * multiple;
+ sizecount--;
+ break;
+ }
}
// round the input number "half up" to multiples of 10
} // eo nice_unit_format(int input)
+string nice_unit_format(
+ const double input,
+ const UnitFormat format,
+ const UnitBase base
+)
+{
+ // round as double and cast to int64_t
+ // cast raised overflow error near max val of int64_t (~9.2e18, see unittest)
+ int64_t input_casted_and_rounded =
+ boost::numeric_cast<int64_t>( round(input) );
+
+ // now call other
+ return nice_unit_format( input_casted_and_rounded, format, base );
+} // eo nice_unit_format(double input)
+
+
string escape(const string &s)
{
string out(s);
{
string output = "'";
string::const_iterator it, it_end = input.end();
- for (it = input.begin(); it != it_end; it++)
+ for (it = input.begin(); it != it_end; ++it)
{
if ( (*it) == '\'')
output += "'\\'";