add latin-1 wrapper for html_entities
[libi2ncommon] / test / stringfunc.cpp
index fe50d4c..90bd63a 100644 (file)
@@ -30,6 +30,7 @@ on this file might be covered by the GNU General Public License.
 
 #define BOOST_TEST_DYN_LINK
 #include <boost/test/unit_test.hpp>
+#include <boost/foreach.hpp>
 #include <boost/numeric/conversion/cast.hpp>
 
 #include <stringfunc.hxx>
@@ -127,6 +128,12 @@ BOOST_AUTO_TEST_CASE(html_entities3)
     BOOST_CHECK_EQUAL(string("&#270; &#3889; &#32904;"), output);
 }
 
+BOOST_AUTO_TEST_CASE(entities_latin1)
+{
+    BOOST_CHECK_EQUAL((std::string)"noop", html_entities_iso ("noop"));
+    BOOST_CHECK_EQUAL((std::string)"t&auml;st", html_entities_iso ("t\xe4st"));
+    BOOST_CHECK_EQUAL((std::string)"T&Auml;ST", html_entities_iso ("T\xc4ST"));
+}
 
 
 BOOST_AUTO_TEST_CASE(nice_unit_format1)
@@ -598,13 +605,32 @@ BOOST_AUTO_TEST_CASE(SplitStringDelimiterOnly)
 } // eo SplitStringDelimiterOnly
 
 
+BOOST_AUTO_TEST_CASE(SplitToVector)
+{
+    std::string line("0;1;2;3;4;5;6;7;8;9");
+    std::vector<std::string> result;
+    split_string(line, result, ";");
+    BOOST_REQUIRE_EQUAL(result.size(), 10);
+    BOOST_CHECK_EQUAL(result[0], "0");
+    BOOST_CHECK_EQUAL(result[4], "4");
+    BOOST_CHECK_EQUAL(result[9], "9");
+}
+
 
 BOOST_AUTO_TEST_CASE(JoinString1)
 {
     std::list< std::string > parts;
-    get_push_back_filler(parts)("1")("2")("drei");
-
     std::string joined_string= join_string(parts,"/");
+    BOOST_CHECK_EQUAL( std::string("") , joined_string );
+
+    parts.push_back ("1");
+    joined_string= join_string(parts,"/");
+    // we should have slashes between the strings:
+    BOOST_CHECK_EQUAL( std::string("1") , joined_string );
+
+    get_push_back_filler(parts)("2")("drei");
+
+    joined_string= join_string(parts,"/");
     // we should have slashes between the strings:
     BOOST_CHECK_EQUAL( std::string("1/2/drei") , joined_string );
 
@@ -621,6 +647,142 @@ BOOST_AUTO_TEST_CASE(JoinString1)
 } // eo JoinString1
 
 
+BOOST_AUTO_TEST_CASE(JoinStringVector)
+{
+    std::vector< std::string > parts;
+    get_push_back_filler(parts)("1")("2")("drei");
+
+    std::string joined_string= join_string(parts,"/");
+    // we should have slashes between the strings:
+    BOOST_CHECK_EQUAL( std::string("1/2/drei") , joined_string );
+
+    parts.push_back( std::string() );
+    joined_string= join_string(parts,"/");
+    // now we should have an additional trailing slash:
+    BOOST_CHECK_EQUAL( std::string("1/2/drei/") , joined_string );
+
+    parts.insert(parts.begin(), "");
+    joined_string= join_string(parts,"/");
+    // now we should have an additional leading slash:
+    BOOST_CHECK_EQUAL( std::string("/1/2/drei/") , joined_string );
+
+} // eo JoinStringVector
+
+
+BOOST_AUTO_TEST_CASE(JoinStringSet)
+{
+    std::set< std::string > parts;
+
+    std::string joined_string= join_string(parts,"/");
+    BOOST_CHECK_EQUAL( std::string() , joined_string );
+
+    parts.insert ("foo");
+    joined_string= join_string(parts,"/");
+    BOOST_CHECK_EQUAL( std::string("foo") , joined_string );
+
+    parts.insert ("bar");
+    parts.insert ("baz");
+
+    joined_string= join_string(parts,"/");
+    // we should have slashes between the strings:
+    BOOST_CHECK_EQUAL( std::string("bar/baz/foo") , joined_string );
+
+    parts.insert( std::string() );
+    joined_string= join_string(parts,"/");
+    // now we should have an additional trailing slash:
+    BOOST_CHECK_EQUAL( std::string("/bar/baz/foo") , joined_string );
+} // eo JoinStringSet
+
+
+BOOST_AUTO_TEST_CASE(JoinStringIterSet_Empty)
+{
+    std::set< std::string > parts;
+
+    // empty sequence → empty string
+    BOOST_CHECK_EQUAL(join_string (parts.begin (), parts.end ()     ), "");
+    BOOST_CHECK_EQUAL(join_string (parts.begin (), parts.end (), "/"), "");
+} // eo JoinStringSet
+
+BOOST_AUTO_TEST_CASE(JoinStringIterSet_One)
+{
+    std::set< std::string > parts;
+
+    parts.insert ("foo");
+
+    // cardinality == 1 → no delimiter
+    BOOST_CHECK_EQUAL(join_string (parts.begin (), parts.end ()     ), "foo");
+    BOOST_CHECK_EQUAL(join_string (parts.begin (), parts.end (), "/"), "foo");
+} // eo JoinStringSet
+
+BOOST_AUTO_TEST_CASE(JoinStringIterSet)
+{
+    std::set< std::string > parts;
+
+    parts.insert ("foo");
+    parts.insert ("bar");
+    parts.insert ("baz");
+
+    std::string joined_string= join_string(parts.begin (), parts.end (), "/");
+    // we should have slashes between the strings:
+    BOOST_CHECK_EQUAL( std::string("bar/baz/foo") , joined_string );
+
+    parts.insert( std::string() );
+    joined_string= join_string(parts.begin (), parts.end (),"/");
+    // now we should have an additional trailing slash:
+    BOOST_CHECK_EQUAL( std::string("/bar/baz/foo") , joined_string );
+} // eo JoinStringSet
+
+
+BOOST_AUTO_TEST_CASE(JoinStringIterSet_Default)
+{   /* default delimiter is newline */
+    std::set< std::string > parts;
+
+    parts.insert ("foo");
+    parts.insert ("bar");
+    parts.insert ("baz");
+
+    BOOST_CHECK_EQUAL(join_string (parts.begin (), parts.end ()), "bar\nbaz\nfoo");
+    BOOST_CHECK_EQUAL(join_string (parts                       ), "bar\nbaz\nfoo");
+
+} // eo JoinStringSet
+
+
+BOOST_AUTO_TEST_CASE(JoinStringArray_Empty)
+{
+    const char *const parts [] = { NULL };
+
+    BOOST_CHECK_EQUAL(join_string(&parts [0], &parts [0], "/"), "");
+    BOOST_CHECK_EQUAL(join_string(parts                 , "/"), "");
+
+    BOOST_CHECK_EQUAL(join_string(&parts [0], &parts [0]), "");
+    BOOST_CHECK_EQUAL(join_string(parts                 ), "");
+
+} // eo JoinStringSet
+
+BOOST_AUTO_TEST_CASE(JoinStringArray_One)
+{
+    const char *const parts [] = { "one", NULL };
+
+    BOOST_CHECK_EQUAL(join_string(&parts [0], &parts [1], "/"), "one");
+    BOOST_CHECK_EQUAL(join_string(parts                 , "/"), "one");
+
+    BOOST_CHECK_EQUAL(join_string(&parts [0], &parts [1]), "one");
+    BOOST_CHECK_EQUAL(join_string(parts                 ), "one");
+
+} // eo JoinStringSet
+
+BOOST_AUTO_TEST_CASE(JoinStringArray_Many)
+{
+    const char *const parts [5] = { "one", "two", "three", "many", NULL };
+
+    BOOST_CHECK_EQUAL(join_string(&parts [0], &parts [4], "/"), "one/two/three/many");
+    BOOST_CHECK_EQUAL(join_string(parts                 , "/"), "one/two/three/many");
+
+    BOOST_CHECK_EQUAL(join_string(&parts [0], &parts [4]), "one\ntwo\nthree\nmany");
+    BOOST_CHECK_EQUAL(join_string(parts                 ), "one\ntwo\nthree\nmany");
+
+} // eo JoinStringSet
+
 
 BOOST_AUTO_TEST_CASE(ConversionStringInt)
 {
@@ -702,4 +864,199 @@ BOOST_AUTO_TEST_CASE(sanitize_for_logging3)
     BOOST_CHECK_EQUAL(string("l??uter ??mlaute utf8"), output);
 }
 
+BOOST_AUTO_TEST_CASE(find_html_comments_test)
+{
+    string text = "bla-->"  // ==> (npos, 6)
+               //  0     6
+                  "bla<!--bla<!--bla-->bla-->"  // ==> (16, 26), (9, 32)
+               //  6  9     16        26     32
+                  "bla<!--bla-->"  // ==> (35, 45)
+               // 32 35        45
+                  "--><!----><!--"    // ==> (npos, 48), (48, 55), (55, npos)
+               // 45 48      55 59
+                  "bla<!--bla-->";  // ==> (62, 72)
+               // 59 62        72
+    BOOST_REQUIRE_EQUAL(text.length(), 72);
+    vector<CommentZone> expect;
+    expect.push_back(CommentZone(string::npos, 6));
+    expect.push_back(CommentZone(16, 26));
+    expect.push_back(CommentZone( 9, 32));
+    expect.push_back(CommentZone(35, 45));
+    expect.push_back(CommentZone(string::npos, 48));
+    expect.push_back(CommentZone(48, 55));
+    expect.push_back(CommentZone(55, string::npos));
+    expect.push_back(CommentZone(62, 72));
+    vector<CommentZone> result = find_html_comments(text);
+    //BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(),   not working, requires ...
+    //                              expect.begin(), expect.end());  ... operator<<(CommentZone)
+    BOOST_CHECK_EQUAL(result.size(), expect.size());
+    BOOST_FOREACH(const CommentZone &comment, expect)
+        BOOST_CHECK_MESSAGE(find(result.begin(), result.end(), comment) != result.end(),
+                            "Find (" << comment.first << "-" << comment.second << ")");
+}
+
+BOOST_AUTO_TEST_CASE(remove_html_comments_test)
+{
+    const string original = "First line outside\n"
+                            "text <!--FOO\n"
+                            "Inside foo\n"
+                            "<!--BAR\n"
+                            "foo bar, what a surprise.\n"
+                            "<!-- short tag-less comment -->\n"
+                            " Html cannot handle this, thinks that FOO ended above\n"
+                            "BAR-->\n"
+                            "This, neither. No nested comments\n"
+                            "some text <!--BAZ more text\n"
+                            "Aaarggh!"
+                            "more text BAZ--> even more\n"
+                            "FOO--> text\n"
+                            "second line outside\n"
+                            "<!-- second comment -->";
+    string text = original;
+    string expect = "First line outside\n"
+                    "text  text\n"
+                    "second line outside\n";
+    remove_html_comments(text);
+    BOOST_CHECK_EQUAL(text, expect);
+    remove_html_comments(text);   // should not have any effect
+    BOOST_CHECK_EQUAL(text, expect);
+
+    text = string("test<!--") + original;
+    remove_html_comments(text);
+    BOOST_CHECK_EQUAL(text, "test");
+
+    text = original + "-->test";
+    remove_html_comments(text);
+    BOOST_CHECK_EQUAL(text, "test");
+}
+
+BOOST_AUTO_TEST_CASE(shorten_stl_types_string)
+{
+    BOOST_CHECK_EQUAL(shorten_stl_types("std::basic_string<char, std::char_traits<char>, std::allocator<char> >"),
+                      "std::string");
+}
+
+BOOST_AUTO_TEST_CASE(shorten_stl_types_simple)
+{
+    BOOST_CHECK_EQUAL(shorten_stl_types("std::list<some_type, std::allocator<some_type> >"),
+                      "std::list<some_type, _alloc_>");
+}
+
+BOOST_AUTO_TEST_CASE(shorten_stl_types_multiple)
+{
+    BOOST_CHECK_EQUAL(shorten_stl_types("std::basic_string<char, std::char_traits<char>, std::allocator<char> > my_func(std::list<some_type, std::allocator<some_type> >, std::vector<int, std::allocator<int> >)"),
+                      "std::string my_func(std::list<some_type, _alloc_>, std::vector<int, _alloc_>)");
+}
+
+BOOST_AUTO_TEST_CASE(shorten_stl_types_complex)
+{
+    BOOST_CHECK_EQUAL(shorten_stl_types("std::list<boost::shared_ptr<some_type>, std::allocator<boost::shared_ptr<some_type> > >"),
+                      "std::list<boost::shared_ptr<some_type>, _alloc_>");
+}
+
+BOOST_AUTO_TEST_CASE(shorten_stl_types_nested)
+{
+              //"std::list<int, std::allocator<int> >"
+    //"std::list<std::list<int, std::allocator<int> >, std::allocator<std::list<int, std::allocator<int> > > >"
+    BOOST_CHECK_EQUAL(shorten_stl_types("std::list<std::list<int, std::allocator<int> >, std::allocator<std::list<int, std::allocator<int> > > >"),
+                      "std::list<std::list<int, _alloc_>, _alloc_>");
+}
+
+BOOST_AUTO_TEST_CASE(shorten_stl_types_nothing)
+{
+    string text = "";
+    BOOST_CHECK_EQUAL(shorten_stl_types(text), text);
+    text = "int f(void)";
+    BOOST_CHECK_EQUAL(shorten_stl_types(text), text);
+    text = "std::cout << \"Test\" << std::endl;";
+    BOOST_CHECK_EQUAL(shorten_stl_types(text), text);
+    text = "bla<blubb>";
+    BOOST_CHECK_EQUAL(shorten_stl_types(text), text);
+    text = "std::list<> is a class template";
+    BOOST_CHECK_EQUAL(shorten_stl_types(text), text);
+    text = "std::list<int, std::allocator<int>\n>";
+    BOOST_CHECK_EQUAL(shorten_stl_types(text), text);
+}
+
+BOOST_AUTO_TEST_CASE(base64_encode_decode)
+{
+    string text = "Hello World\n";
+
+    string encoded = base64_encode(text);
+
+    BOOST_CHECK_EQUAL("SGVsbG8gV29ybGQK", encoded);
+    BOOST_CHECK_EQUAL(text, base64_decode(encoded));
+}
+
+BOOST_AUTO_TEST_CASE(base64_empty_string)
+{
+    string text = "";
+    string encoded = base64_encode(text);
+
+    BOOST_CHECK_EQUAL("", encoded);
+    BOOST_CHECK_EQUAL(text, base64_decode(encoded));
+}
+
+BOOST_AUTO_TEST_CASE(base64_large_string_with_zero)
+{
+    // 10 MB data
+    int data_size = 1024 * 1024 * 10;
+
+    string large_binary_data(data_size, 0);
+    BOOST_CHECK_EQUAL(data_size, large_binary_data.size());
+
+    string encoded = base64_encode(large_binary_data);
+
+    string decoded = base64_decode(encoded);
+    BOOST_CHECK_EQUAL(large_binary_data, decoded);
+}
+
+BOOST_AUTO_TEST_CASE(base64_large_string_with_zero_encode_linefeeds)
+{
+    // 10 MB data
+    int data_size = 1024 * 1024 * 10;
+
+    string large_binary_data(data_size, 0);
+    BOOST_CHECK_EQUAL(data_size, large_binary_data.size());
+
+    const bool one_line_mode = false;
+    string encoded = base64_encode(large_binary_data, one_line_mode);
+
+    string decoded = base64_decode(encoded, one_line_mode);
+    BOOST_CHECK_EQUAL(large_binary_data, decoded);
+}
+
+BOOST_AUTO_TEST_CASE(base64_decode_garbage)
+{
+    std::string data = "Hello World, this is unencoded data";
+    string decoded = base64_decode(data);
+
+    // garbage turns out to be an empty string
+    BOOST_CHECK_EQUAL(0, decoded.size());
+}
+
+BOOST_AUTO_TEST_CASE(base64_encode_with_linefeeds)
+{
+    const string data = string("Hello World\n")
+                       + "Hello World\n"
+                       + "Hello World\n"
+                       + "Hello World\n"
+                       + "Hello World\n"
+                       + "Hello World\n"
+                       + "Hello World\n";
+
+    const string encoded = base64_encode(data, false);
+
+    const std::string expected = string("SGVsbG8gV29ybGQKSGVsbG8gV29ybGQKSGVsbG8gV29ybGQKSGVsbG8gV29ybGQK\n")
+                                 + "SGVsbG8gV29ybGQKSGVsbG8gV29ybGQKSGVsbG8gV29ybGQK\n";
+    BOOST_CHECK_EQUAL(expected, encoded);
+
+    // decode and compare
+    BOOST_CHECK_EQUAL(data, base64_decode(encoded, false));
+
+    // expected empty string when switching on single line base64 mode
+    // (openssl is very strict about this)
+    BOOST_CHECK_EQUAL("", base64_decode(encoded, true));
+}
+
 BOOST_AUTO_TEST_SUITE_END()