From: Juliana Rodrigueiro Date: Wed, 8 Aug 2018 12:40:24 +0000 (+0200) Subject: Implement percent url encoder and decoder X-Git-Tag: v2.10~2 X-Git-Url: http://developer.intra2net.com/git/?p=libi2ncommon;a=commitdiff_plain;h=a93685ca7c6c9e7ba75ff2144eb8fa429ec27716 Implement percent url encoder and decoder --- diff --git a/src/restricted_html.cpp b/src/restricted_html.cpp index a157564..d59bcff 100644 --- a/src/restricted_html.cpp +++ b/src/restricted_html.cpp @@ -25,6 +25,8 @@ on this file might be covered by the GNU General Public License. */ #include +#include +#include #include #include @@ -37,4 +39,129 @@ namespace I2n +/** + * @brief Replace all "+" characters found in s to spaces (" "). + * + * @param s string that will be modified. + */ +static void unescape_space(string &s) +{ + string::size_type pos; + while ((pos=s.find('+')) != string::npos) + s[pos]=' '; +} + +/** + * @brief Converts a hexadecimal sequence to its respective character. + * + * @param s string of size 2. Example: "77" + * @return respective character represented by the hex sequence. + */ +static char x2c(const string& s) +{ + char digit; + digit=(s[0]>='A' ? ((s[0] & 0xdf)-'A')+10 : (s[0]-'0')); + digit*=16; + digit+=(s[1]>='A' ? ((s[1] & 0xdf)-'A')+10 : (s[1]-'0')); + return digit; +} + +/** + * @brief Scan a string to find escaped hex chars in the format "%HH" and replace + * for their respective character. + * Example: "www%2E" becomes "www." + * + * @param s String that will be modified. + */ +static void unescape_hex(string& s) +{ + static char hex_escape='%'; + string::size_type escape_pos; + string hex_seq; + string rest=s; + for (s=""; ((escape_pos=rest.find(hex_escape)) != string::npos);) + { + if (escape_pos+2(rest.length()) + && ::isalnum(rest[escape_pos+1]) && ::isalnum(rest[escape_pos+2])) + { + hex_seq=rest.substr(escape_pos+1,2); + s=s+rest.substr(0,escape_pos)+x2c(hex_seq); + rest=rest.erase(0,escape_pos+3); + } + else + { + s=s+rest.substr(0,escape_pos+1); + rest=rest.erase(0,escape_pos+1); + } + } + s+=rest; +} + +/** +* @brief Decode url that contains percent-encoding. Replace space " " with "+". +* Example: "%77%77%77%2E" becomes "www." +* +* @param s url string. +* @return the decoded string. +*/ +string decode_url(string s) +{ + unescape_space (s); + unescape_hex (s); + return (s); +} + +/** + * @brief Verify if the parameter character requires encoding, If it is non + * alphanumeric or valid ascii signs. + * + * @param c character to be verified. + * @return true if the character should be encoded. + */ +bool needs_encoding (const char &c) +{ + // some valid ascii signs + if (c == '_' || c == '-') + return false; + + // is digit? + if (c > 47 && c < 58) + return false; + + // is uppercase letter? + if (c > 64 && c < 91) + return false; + + // is lowercase letter? + if (c > 96 && c < 123) + return false; + + return true; +} + +/** +* @brief Encode url with percent-encoding. Any non-alphanumeric character is +* converted to its hex value with the percent character (%) as prefix, except "_" +* and "-". Replace space " " with "+". +* +* @param s url string. +* @return the encoded url string. +*/ +string encode_url(string s) +{ + // convert non-alphanumeric characters to hex, convert space to + + ostringstream out; + for (string::iterator pos2=s.begin(); pos2 != s.end(); pos2++) + { + if (*pos2 == ' ') + out << '+'; + else if (needs_encoding (*pos2)) + out << '%' << std::uppercase << setw(2) << setfill('0') << \ + std::hex << (int)(unsigned char)*pos2; + else + out << (*pos2); + } + + return out.str(); +} } // eo namespace I2n diff --git a/src/restricted_html.hpp b/src/restricted_html.hpp index 43bf2e4..09b3d14 100644 --- a/src/restricted_html.hpp +++ b/src/restricted_html.hpp @@ -33,6 +33,10 @@ namespace I2n { + std::string decode_url(std::string s); + + std::string encode_url(std::string s); + } // eo namespace I2n #endif diff --git a/test/test_restricted_html.cpp b/test/test_restricted_html.cpp index c3971ce..d744ed2 100644 --- a/test/test_restricted_html.cpp +++ b/test/test_restricted_html.cpp @@ -34,4 +34,31 @@ using namespace I2n; BOOST_AUTO_TEST_SUITE(test_restricted_html) +BOOST_AUTO_TEST_CASE(DecodeStringURL) +{ + string output = decode_url("%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D"); + BOOST_CHECK_EQUAL(string("www.google.com"), output); +} + +BOOST_AUTO_TEST_CASE(DecodeStringURL2) +{ + string output = decode_url("%3Cscript%3Ealert%28document.cookie%29%3C%2Fscr" + "ipt%3E"); + BOOST_CHECK_EQUAL(string(""), output); +} + +BOOST_AUTO_TEST_CASE(EncodeStringURL) +{ + string output = encode_url("http://www.domain.com/params?param=b'ar:!~/"); + BOOST_CHECK_EQUAL(string("http%3A%2F%2Fwww%2Edomain%2Ecom%2Fparams%3Fparam%" + "3Db%27ar%3A%21%7E%2F"), output); +} + +BOOST_AUTO_TEST_CASE(EncodeStringURL2) +{ + string output = encode_url("http://www.google.com/