Fix typos and adapt tests as well.
INCLUDE_DIRECTORIES(${OPENSSL_INCLUDE_DIRS})
LINK_DIRECTORIES(${OPENSSL_LIBRARY_DIRS})
+pkg_check_modules(PCRECPP REQUIRED libpcrecpp)
+include_directories(${PCRECPP_INCLUDE_DIRS})
+link_directories(${PCRECPP_LIBRARY_DIRS})
+
# pkgconfig output
set(prefix ${CMAKE_INSTALL_PREFIX})
set(exec_prefix ${CMAKE_INSTALL_PREFIX}/bin)
${Boost_IOSTREAMS_LIBRARIES}
${Boost_THREAD_LIBRARIES}
${ICONV_LIBRARIES}
- ${OPENSSL_LIBRARIES})
+ ${OPENSSL_LIBRARIES}
+ ${PCRECPP_LIBRARIES})
set_target_properties(i2ncommon PROPERTIES VERSION ${VERSION} SOVERSION 7)
set_target_properties(i2ncommon PROPERTIES OUTPUT_NAME i2ncommon CLEAN_DIRECT_OUTPUT 1)
#include <iomanip>
#include <vector>
+#include <pcrecpp.h>
#include <boost/foreach.hpp>
#include <stringfunc.hxx>
typedef pair<string,bool> TOKEN;
-const vector<string> ALLOWED_PROTOCOLS = {"http://", "https://"};
const vector<string> ALLOWED_TAGS = {"h1", "h2", "h3", "h4", "h5", "h6",
"a", "p", "br", "i", "ul", "li",
"table", "tr", "th", "td"
};
const string AHREF = "<a href=";
-const string SAFE_URL_CHARS = "$-_.+!*'(),;/?:@=&abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
const string REDIRECT_PREFIX = "/arnie?form=redirect&url=";
const string TARGET_BLANK = "target=_blank";
+const pcrecpp::RE SAFE_URL("^http(s)?:\\/\\/[\\w.-]+(?:\\.[\\w\\.-]+)+"
+ "[\\w\\-\\._~:/?#@!\\$&'\\(\\)\\*\\+,;=.]+$");
namespace I2n
{
- /**
- * @brief Compare protocol found in link against a whitelist.
- *
- * @param link url that will be checked.
- * @return true if link has an allowed protocol, false otherwise.
- */
- bool is_protocol_allowed(const string &link)
-{
- BOOST_FOREACH(const string &protocol, ALLOWED_PROTOCOLS)
- {
- if (has_prefix(link, protocol))
- return true;
- }
- return false;
-}
-
/**
* @brief Verifies if a html "a" tag has a valid link and sanitize it if necessary.
- * Modify tag nd add redirector prefix if link has a valid protocol.
+ * Modify tag and add redirector prefix if link has a valid protocol.
* Example: <a href="http://somelink.com">
* returns <a href="/arnie?form=redirect&url=http://somelink.com" target=_blank>
*
{
// tag = <a href="somelink">
string link = tag.substr(AHREF.size());
- if (link.find_first_of("\"\'") == 0)
+ if (link.find_first_of("\"") == 0)
{
- size_t pos = link.find_first_of("\"\'", 1);
+ size_t pos = link.find_first_of("\"", 1);
if (pos == string::npos)
return false; // Quotation mark never closes.
string end(link, pos+1);
if (end.compare(" >") != 0 && end.compare(">") != 0)
- return false; //Probably extra attributes.
+ return false; //Probably extra attributes. Or " inside the link (invalid).
link = link.substr(1, pos -1);
}
link = link.substr(0, space);
}
- if (is_protocol_allowed(link))
- tag = AHREF + "\"" + REDIRECT_PREFIX + link + "\" " + TARGET_BLANK + ">";
- else if (link[0] != '/')
+ if (has_prefix(link, REDIRECT_PREFIX))
+ link = remove_prefix(link, REDIRECT_PREFIX);
+
+ link = decode_url(link);
+
+ if (!SAFE_URL.FullMatch(link))
return false;
- if (link.find_first_not_of(SAFE_URL_CHARS) != string::npos)
+ if (link.find("javascript:") != string::npos)
return false;
+ tag = AHREF + "\"" + REDIRECT_PREFIX + link + "\" " + TARGET_BLANK + ">";
+
return true;
}
using namespace I2n;
BOOST_AUTO_TEST_SUITE(test_restricted_html)
-/**
- * TODO Create more tests for:
- * html comments removed
- * Test the transformation from non asccii to html_entities
- */
+
BOOST_AUTO_TEST_CASE(BasicTest)
{
string output = restrict_html("<h1>Table</h1><table> <tr><th>Month</th> <th>Savings</th> </tr> <tr> <td>January</td> <td>$100</td> </tr></table> <p>Paragraph with a <a href=\"https://example.de\"><i>Acceptable Link</i></a>.</p> <ul> <li>Coffee</li> <li>Tea</li> <li>Milk</li></ul>");
BOOST_CHECK_EQUAL(string("<scri<script>pt>alert(1)"), output);
}
-BOOST_AUTO_TEST_CASE(AhrefLink)
+BOOST_AUTO_TEST_CASE(ExtraAttribute)
{
- string output = restrict_html("<a onclick=\"evil\" href=\"form\">test</a>");
+ string output = restrict_html("<a onclick=\"evil\" href=\"http://i2n.de/\">test</a>");
BOOST_CHECK_EQUAL(string("test"), output);
}
-BOOST_AUTO_TEST_CASE(AhrefLink2)
-{
- string output = restrict_html("<a href=\"http://i2n.de/\" >test</a>");
- BOOST_CHECK_EQUAL(string("<a href=\"/arnie?form=redirect&url=http://i2n.de/\" target=_blank>test</a>"), output);
-}
-
-BOOST_AUTO_TEST_CASE(AhrefLink3)
+BOOST_AUTO_TEST_CASE(ExtraAttribute2)
{
string output = restrict_html("<a href=\"http://site.com/dir\" onclick=\"evil\">test</a>");
BOOST_CHECK_EQUAL(string("test"), output);
}
-BOOST_AUTO_TEST_CASE(AhrefLink4)
+BOOST_AUTO_TEST_CASE(ExtraAttribute3)
{
string output = restrict_html("<a href=\"http://site.com/dir\"onclick=\"evil\">test</a>");
BOOST_CHECK_EQUAL(string("test"), output);
}
-BOOST_AUTO_TEST_CASE(AhrefLink5)
+BOOST_AUTO_TEST_CASE(AhrefLink)
{
- string output = restrict_html("\"<a href=\"http://\"onclick=\"\\u0061\"> Test Me</a>");
- BOOST_CHECK_EQUAL(string("" Test Me"), output);
+ string output = restrict_html("<a href=\"http://i2n.de/\" >test</a>");
+ BOOST_CHECK_EQUAL(string("<a href=\"/arnie?form=redirect&url=http://i2n.de/\" target=_blank>test</a>"), output);
+}
+
+BOOST_AUTO_TEST_CASE(AhrefLink2)
+{
+ string output = restrict_html("<a href=http://;href=javascript:alert(String.fromCharCode(88,83,83)) target=_blank>test</a>");
+ BOOST_CHECK_EQUAL(string("test"), output);
}
BOOST_AUTO_TEST_CASE(AhrefProtocol)
{
- string output = restrict_html("<a href=\"/foo\">foo</a>");
- BOOST_CHECK_EQUAL(string("<a href=\"/foo\">foo</a>"), output);
+ string output = restrict_html("<a href=\"http://www.foo.com\">foo</a>");
+ BOOST_CHECK_EQUAL(string("<a href=\"/arnie?form=redirect&url=http://www.foo.com\" target=_blank>foo</a>"), output);
}
BOOST_AUTO_TEST_CASE(AhrefWrongProtocol)
{
- string output = restrict_html("<a href=\"ftp://foo\">foo</a>");
+ string output = restrict_html("<a href=\"ftp://foo.com\">foo</a>");
BOOST_CHECK_EQUAL(string("foo"), output);
}
BOOST_AUTO_TEST_CASE(UnsafeURLChars2)
{
- string output = restrict_html("<a href=/http://aa.com\nonclick=\\u0061> Test Me!!</a>");
+ string output = restrict_html("<a href=http://aa.com\nonclick=\\u0061> Test Me!!</a>");
BOOST_CHECK_EQUAL(string(" Test Me!!"), output);
}
string output = encode_url("http://www.google.com/<script>");
BOOST_CHECK_EQUAL(string("http%3A%2F%2Fwww%2Egoogle%2Ecom%2F%3Cscript%3E"),
output);
+
}
BOOST_AUTO_TEST_SUITE_END()