From 823ad66f073b14f7b407a43d47f50e9df2bf7f60 Mon Sep 17 00:00:00 2001 From: Gabriel Braga Date: Mon, 15 Apr 2024 17:50:11 +0200 Subject: [PATCH] Adding support to netstring (#8788) This commit adds a support for netstrings encoding, decoding and a buffer to automaticaly extract valid data encapsulated on it. Unit tests for it's functionalities were also implemented. --- src/CMakeLists.txt | 2 + src/exception.hxx | 7 + src/netstring.cpp | 298 +++++++++++++++++++++++++++++++++++++++++++++++ src/netstring.hpp | 105 +++++++++++++++++ test/CMakeLists.txt | 1 + test/test_netstring.cpp | 286 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 699 insertions(+), 0 deletions(-) create mode 100644 src/netstring.cpp create mode 100644 src/netstring.hpp create mode 100644 test/test_netstring.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a1894cc..80af086 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -27,6 +27,7 @@ SET(cpp_sources tribool.cpp userfunc.cpp week.cpp + netstring.cpp ) SET(cpp_headers @@ -58,6 +59,7 @@ SET(cpp_headers tribool.hpp userfunc.hpp week.hpp + netstring.hpp ) add_library(i2ncommon SHARED ${cpp_sources} ${cpp_headers}) diff --git a/src/exception.hxx b/src/exception.hxx index b372977..db5a669 100644 --- a/src/exception.hxx +++ b/src/exception.hxx @@ -216,4 +216,11 @@ class variable_error : public logic_error_src : logic_error_src(__arg,l,f,t) {} }; +class netstring_error : public std::runtime_error +{ + public: + netstring_error(const std::string& _what) + : runtime_error(_what) {} +}; + #endif diff --git a/src/netstring.cpp b/src/netstring.cpp new file mode 100644 index 0000000..5a53dde --- /dev/null +++ b/src/netstring.cpp @@ -0,0 +1,298 @@ +/* +The software in this package is distributed under the GNU General +Public License version 2 (with a special exception described below). + +A copy of GNU General Public License (GPL) is included in this distribution, +in the file COPYING.GPL. + +As a special exception, if other files instantiate templates or use macros +or inline functions from this file, or you compile this file and link it +with other works to produce a work based on this file, this file +does not by itself cause the resulting work to be covered +by the GNU General Public License. + +However, the source code for this file must still be made available +in accordance with section (3) of the GNU General Public License. + +This exception does not invalidate any other reasons why a work based +on this file might be covered by the GNU General Public License. +*/ +/** @file + * @brief netstring related functions implementations. + * + * @copyright Copyright © 2024 by Intra2net AG + * + */ + +#include +#include +#include +#include "exception.hxx" +#include "boost/lexical_cast.hpp" + +using namespace I2n; + +/** + * @brief Constructor for NetstringBuffer + */ +NetstringBuffer::NetstringBuffer() : + SizeLimit (1073741824), //1gb + ExpectedBytes(0), + NumBytesRead (0), + LengthBeforeColon (0), + BuffCursor(0), + CurrentState(ReadLength) + {} + +/** + * @brief Constructor for NetstringBuffer + * @param size_limit Maximum package size (recommended = 1073741824) + */ +NetstringBuffer::NetstringBuffer(size_t size_limit) : + SizeLimit (size_limit), + ExpectedBytes(0), + NumBytesRead (0), + LengthBeforeColon (0), + BuffCursor(0), + CurrentState(ReadLength) + {} + +/** + * @brief Extract a full and validated netstring from the buffer + * + * If this function throws an exception, the object may be in an unclean state, + * so should not be used any more. + * + * @note Assumes that there is a validated netstring on the buffer + * Should only be called by the append function + * @return string decoded from the buffer + * @throws netstring_error if the buffer is empty or contains invalid data + */ +std::string NetstringBuffer::get_next_from_buffer() +{ + if(!this->InputBuffer.empty()) + { + return decode_string(this->InputBuffer); + } + else + throw netstring_error("ERROR: trying to decode string from empty buffer"); +} + +/** + * @brief Try to read the length of a netstring + * + * If this function throws an exception, the object may be in an unclean state, + * so should not be used any more. + * + * @note Should be only called by append_to_buf() + * @throws netstring_error - input had an invalid format; do not use object afterwards. + */ +void NetstringBuffer::parse_read_length() +{ + // we can assume here, that the buffer starts with a new encoded netstring, + // there can be no left-overs from previous netstrings at the start of the buffer + string::size_type colon_i = InputBuffer.find_first_of(':'); + + if (colon_i != std::string::npos) + { + // found colon, read length and start reading bytes + try + { + string length_part = this->InputBuffer.substr(0, colon_i); + this->ExpectedBytes = boost::lexical_cast(length_part); + + this->CurrentState = ReadNBytes; + this->LengthBeforeColon = colon_i + 1; + this->BuffCursor = this->LengthBeforeColon; + + if (this->ExpectedBytes > this->SizeLimit) + { + throw netstring_error("ERROR: data package to big"); + } + } + // didn't find the number of bytes (invalid) + catch (boost::bad_lexical_cast &) + { + throw netstring_error("ERROR: invalid netstring, format for length should be '%d:%s,'"); + } + + } + + // no colon: verify if there's only digits + else + { + try + { + size_t num = boost::lexical_cast(this->InputBuffer); + if (num > this->SizeLimit) + { + throw netstring_error("ERROR: data package to big"); + } + } + catch (boost::bad_lexical_cast &) + { + throw netstring_error("ERROR: invalid netstring, should only have digits before colon"); + } + this->BuffCursor = InputBuffer.length(); + //continue waiting for colon and receiving only digits + } +} + +/** + * @brief Try to read the bytes part of the netstring + * + * @note Should be only called by append_to_buf() + */ +void NetstringBuffer::parse_read_n_bytes() +{ + size_t str_len = (this->InputBuffer.length() - this->BuffCursor); + + // has more than one netstring + if (str_len > (this->ExpectedBytes - this->NumBytesRead + 1 ) ) { + this->NumBytesRead = this->NumBytesRead + this->ExpectedBytes; + this->add_cursor(this->ExpectedBytes); + } + // only bytes of one netstring + else + { + this->NumBytesRead = this->NumBytesRead + str_len; + this->add_cursor(str_len); + } + + // finished reading everything + if (this->NumBytesRead >= this->ExpectedBytes) + { + this->CurrentState = ReadTerminatingChar; + } +} + +/** + * @brief Try to read the terminating char (comma) ',' + * + * If this function throws an exception, the object may be in an unclean state, + * so should not be used any more. + * + * Removes one complete encoded netstring from the buffer, decodes it and pushes it + * onto the ValidNetstrings. + * + * @note Should be only called by append_to_buf() + * @throws netstring_error - input had an invalid format; do not use object afterwards. + */ +void NetstringBuffer:: parse_read_terminating_char() +{ + if (this->InputBuffer.find_last_of(',',this->BuffCursor) != std::string::npos) { + if (this->InputBuffer[(this->LengthBeforeColon + this->ExpectedBytes)] == ',') { + string valid_str = this->get_next_from_buffer(); + this->ValidNetstrings.push_back(valid_str); + + this->InputBuffer = this->InputBuffer + .replace(0, (this->LengthBeforeColon + this->ExpectedBytes + 1), ""); + + this->add_cursor(1); // ',' + this->reset_state_machine(); + } + else + { + throw netstring_error("ERROR: invalid netstring, should always end with comma"); + } + } + // If already read more then the size (should have read comma) + // and still missing a comma + else if( this->InputBuffer.length() > this->BuffCursor + || this->NumBytesRead > this->ExpectedBytes) + { + throw netstring_error("ERROR: invalid netstring, should always end with comma"); + } +} + +/** + * @brief Append a string(bytes) to buffer and check if there is a + * valid netstring already, if so will extract it and put it + * on to the deque for valid data. + * + * If this function throws an exception, the object may be in an unclean state, + * so should not be used any more. + * + * @param str - new bytes to append + * @throws netstring_error - input had an invalid format; do not use object afterwards. + */ +void NetstringBuffer::append_to_buf(const string& str) +{ + if(str.empty()) + { + return; + } + this->InputBuffer.append(str); + do { + if (this->CurrentState == ReadLength) { + parse_read_length(); + } + if (this->CurrentState == ReadNBytes) { + parse_read_n_bytes(); + } + if (this->CurrentState == ReadTerminatingChar) { + parse_read_terminating_char(); + } + } while (this->InputBuffer.length() > this->BuffCursor); + +} + +/** + * @brief Encode string in the netstring format: + * ':,' + * + * @param original - string to be encoded + * @return string encoded in netstring format + */ +string NetstringBuffer::encode_string(const string& original) +{ + unsigned int len = original.length(); + return std::string((to_string(len)) + ":" + original + ","); +} + +/** + * @brief Decodes a string encoded in the netstring format + * + * @note Assuming ALWAYS that the string is valid + * @param original - string with encoded in netstring + * @return string decoded + * @throws netstring_error if the input is not valid netstring format + */ +string NetstringBuffer::decode_string(const string &original) +{ + int len, colon_i = original.find_first_of(":"); + + if(sscanf(original.c_str(),"%d:", &len) != 1 || colon_i == std::string::npos) + throw netstring_error("ERROR: invalid netstring format, impossible to decode"); + + return original.substr(colon_i + 1,len); +} + +/** + * @brief Reset state machine to read another netstring from buffer + */ +void NetstringBuffer::reset_state_machine() +{ + this->CurrentState = ReadLength; + this->NumBytesRead = 0; + this->ExpectedBytes = -1; + this->LengthBeforeColon = 0; + this->BuffCursor = 0; +} + +/** + * @brief Returns validated data from the netstring buffer + * + * @return string - data extracted from netstring buffer + * or empty string if there are no more strings + * in the buffer + */ +string NetstringBuffer::pop_data() +{ + if(ValidNetstrings.empty()) + return ""; + + string str = ValidNetstrings.front(); + ValidNetstrings.pop_front(); + return str; +} \ No newline at end of file diff --git a/src/netstring.hpp b/src/netstring.hpp new file mode 100644 index 0000000..1f76a94 --- /dev/null +++ b/src/netstring.hpp @@ -0,0 +1,105 @@ +/* +The software in this package is distributed under the GNU General +Public License version 2 (with a special exception described below). + +A copy of GNU General Public License (GPL) is included in this distribution, +in the file COPYING.GPL. + +As a special exception, if other files instantiate templates or use macros +or inline functions from this file, or you compile this file and link it +with other works to produce a work based on this file, this file +does not by itself cause the resulting work to be covered +by the GNU General Public License. + +However the source code for this file must still be made available +in accordance with section (3) of the GNU General Public License. + +This exception does not invalidate any other reasons why a work based +on this file might be covered by the GNU General Public License. +*/ +/** @file + * @brief netstring related functions and classes. + * + * @copyright Intra2net AG 2024 + */ + +#ifndef LIBI2NCOMMON_NETSTRING_H +#define LIBI2NCOMMON_NETSTRING_H + +#include +#include + +using namespace std; + +namespace I2n { + +/** + * @brief Small state machine: + * .-> 1 - ReadLength -v + * ' 2 - ReadNBytes <' --v + * '-- 3 - ReadTerminatingChar ',' -> move to ValidNetstrings, remove from InputBuffer via + * .append(), switch to ReadLength state. + */ +enum NetstringStates +{ + ReadLength = 0, + ReadNBytes = 1, + ReadTerminatingChar = 2, +}; + +/** + * @brief Class to control netstring buffering and extraction + * + */ +class NetstringBuffer { + + private: + size_t SizeLimit; + size_t ExpectedBytes; + size_t NumBytesRead; + size_t LengthBeforeColon; + size_t BuffCursor; + NetstringStates CurrentState; + + deque ValidNetstrings; + string InputBuffer; + + string get_next_from_buffer(); + + void add_cursor(const size_t size) + { + BuffCursor+=size; + } + + static string decode_string(const string &str); + + void parse_read_length(); + + void parse_read_n_bytes(); + + void parse_read_terminating_char(); + + public: + NetstringBuffer(); + + NetstringBuffer(size_t size_limit); + + + size_t get_amount_of_valid_strings() const + { + return ValidNetstrings.size(); + } + + string pop_data(); + + void append_to_buf(const string& str); + + static string encode_string(const string &original); + + void reset_state_machine(); + +}; + +} // namespace I2n + +#endif //LIBI2NCOMMON_NETSTRING_H \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6694bff..d8fe2fd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -22,6 +22,7 @@ SET(cpp_sources test_tmpfstream.cpp test_tribool.cpp test_crypto.cpp + test_netstring.cpp ) if (IMAP_UTF7_SUPPORT) SET(cpp_sources stringfunc_imaputf7.cpp ${cpp_sources}) diff --git a/test/test_netstring.cpp b/test/test_netstring.cpp new file mode 100644 index 0000000..8ebac4e --- /dev/null +++ b/test/test_netstring.cpp @@ -0,0 +1,286 @@ +/* +The software in this package is distributed under the GNU General +Public License version 2 (with a special exception described below). + +A copy of GNU General Public License (GPL) is included in this distribution, +in the file COPYING.GPL. + +As a special exception, if other files instantiate templates or use macros +or inline functions from this file, or you compile this file and link it +with other works to produce a work based on this file, this file +does not by itself cause the resulting work to be covered +by the GNU General Public License. + +However, the source code for this file must still be made available +in accordance with section (3) of the GNU General Public License. + +This exception does not invalidate any other reasons why a work based +on this file might be covered by the GNU General Public License. +*/ +/** @file + * @brief unit test for the netstring functionalities. + * + * @copyright © Copyright 2024 Intra2net AG + * + */ +#include +#include +#include + +#include "netstring.hpp" +#include "stringfunc.hxx" +#include "exception.hxx" + +using namespace I2n; + +BOOST_AUTO_TEST_SUITE(TestNetstring) + +BOOST_AUTO_TEST_CASE(TestEnconding) +{ + string test1 = "hallo?_this!^#&$(-=+*&^%$#@!/>?<_is a test:{><>?P}"; + + BOOST_CHECK_EQUAL("50:hallo?_this!^#&$(-=+*&^%$#@!/>?<_is a test:{><>?P},", NetstringBuffer::encode_string(test1)); +} + +BOOST_AUTO_TEST_CASE(TestEncondingEmpty) +{ + string test1 = ""; + + BOOST_CHECK_EQUAL(I2n::to_string(test1.length())+":"+test1+",", NetstringBuffer::encode_string(test1)); + BOOST_CHECK_EQUAL(0, atoi(&(NetstringBuffer::encode_string(test1)[0]))); + BOOST_CHECK_EQUAL(3,NetstringBuffer::encode_string(test1).length()); + BOOST_CHECK_EQUAL("0:,",NetstringBuffer::encode_string(test1)); +} + +BOOST_AUTO_TEST_CASE(TestAppendingToBuffer) +{ + NetstringBuffer netbuff = NetstringBuffer(); + + netbuff.append_to_buf("10:testtest12,"); + + BOOST_CHECK_EQUAL(1,netbuff.get_amount_of_valid_strings()); +} + +BOOST_AUTO_TEST_CASE(TestAutoDecoding) +{ + string test = "testtest12"; + + NetstringBuffer netbuff = NetstringBuffer(); + + netbuff.append_to_buf(NetstringBuffer::encode_string(test)); + + BOOST_CHECK_EQUAL("testtest12", netbuff.pop_data()); +} + +BOOST_AUTO_TEST_CASE(TestAutoDecodingMoreThenOne) +{ + string test = "testtest12"; + string test2 = "123teste123"; + + NetstringBuffer netbuff = NetstringBuffer(); + + netbuff.append_to_buf(NetstringBuffer::encode_string(test)); + netbuff.append_to_buf(NetstringBuffer::encode_string(test2)); + + BOOST_CHECK_EQUAL(test, netbuff.pop_data()); + BOOST_CHECK_EQUAL(test2, netbuff.pop_data()); +} + +BOOST_AUTO_TEST_CASE(TestAutoDecodingThree) +{ + NetstringBuffer netbuff = NetstringBuffer(); + + string test = "testtest12"; + string test2 = "123teste123"; + string test3 = "hallo"; + + netbuff.append_to_buf(NetstringBuffer::encode_string(test)); + netbuff.append_to_buf(NetstringBuffer::encode_string(test2)); + netbuff.append_to_buf(NetstringBuffer::encode_string(test3)); + + BOOST_CHECK_EQUAL(test, netbuff.pop_data()); + BOOST_CHECK_EQUAL(test2, netbuff.pop_data()); + BOOST_CHECK_EQUAL(test3, netbuff.pop_data()); +} + +BOOST_AUTO_TEST_CASE(TestSingleAppend) +{ + NetstringBuffer netbuff = NetstringBuffer(); + + netbuff.append_to_buf("10:testtest12,10:othertest1,"); + + BOOST_CHECK_EQUAL("testtest12", netbuff.pop_data()); + BOOST_CHECK_EQUAL("othertest1", netbuff.pop_data()); +} + +BOOST_AUTO_TEST_CASE(TestHalfNetstring) +{ + NetstringBuffer netbuff = NetstringBuffer(); + + netbuff.append_to_buf("10:test"); + netbuff.append_to_buf("test12,"); + + BOOST_CHECK_EQUAL("testtest12", netbuff.pop_data()); +} + +BOOST_AUTO_TEST_CASE(TestFormat_Comma) +{ + NetstringBuffer netbuff = NetstringBuffer(); + + netbuff.append_to_buf("10:testtest12"); + + string test2 = "10:othertest1,"; + BOOST_CHECK_THROW(netbuff.append_to_buf(test2), netstring_error) +} + +BOOST_AUTO_TEST_CASE(TestFormat_CommaBreak) +{ + NetstringBuffer netbuff = NetstringBuffer(); + + netbuff.append_to_buf("10:test"); + netbuff.append_to_buf("test12"); + + string test2 = "10:othertest1,"; + BOOST_CHECK_THROW(netbuff.append_to_buf(test2), netstring_error) +} + +BOOST_AUTO_TEST_CASE(TestFormat_Colon) +{ + string test = "10test"; + NetstringBuffer netbuff = NetstringBuffer(); + try + { + netbuff.append_to_buf(test); + } + catch (netstring_error &e ) + { + BOOST_CHECK_EQUAL(e.what(),"ERROR: invalid netstring, should only have digits before colon"); + } +} + +BOOST_AUTO_TEST_CASE(TestFormat_NoNumber) +{ + string test = ":test"; + NetstringBuffer netbuff = NetstringBuffer(); + try + { + netbuff.append_to_buf(test); + } + catch (netstring_error &e ) + { + BOOST_CHECK_EQUAL(e.what(),"ERROR: invalid netstring, format for length should be '%d:%s,'"); + } +} + +BOOST_AUTO_TEST_CASE(TestSendByteByByte) +{ + NetstringBuffer netbuff = NetstringBuffer(); + + netbuff.append_to_buf("5"); + netbuff.append_to_buf(":"); + netbuff.append_to_buf("H"); + netbuff.append_to_buf("a"); + netbuff.append_to_buf("l"); + netbuff.append_to_buf("l"); + netbuff.append_to_buf("o"); + netbuff.append_to_buf(","); + + BOOST_CHECK_EQUAL("Hallo", netbuff.pop_data()); +} + +BOOST_AUTO_TEST_CASE(TestSendByteByByteTwo) +{ + NetstringBuffer netbuff = NetstringBuffer(); + string test = "5:Hallo,3:Ola,"; + + for (size_t i = 0; i < test.length(); ++i) + { + netbuff.append_to_buf(I2n::to_string(test[i])); + } + + BOOST_CHECK_EQUAL("Hallo", netbuff.pop_data()); + BOOST_CHECK_EQUAL("Ola", netbuff.pop_data()); +} + +BOOST_AUTO_TEST_CASE(TestPkgOverflow) +{ + NetstringBuffer netbuff = NetstringBuffer(); + string test = "1073741826:"; // bigger then 1073741824 + BOOST_CHECK_THROW(netbuff.append_to_buf(test), netstring_error) +} + +BOOST_AUTO_TEST_CASE(TestAppendingEmptyBuffer) +{ + NetstringBuffer netbuff = NetstringBuffer(); + netbuff.append_to_buf(""); + BOOST_CHECK(true); +} + +BOOST_AUTO_TEST_CASE(TestAppendingEmptyBufferAndNetstrings) +{ + NetstringBuffer netbuff = NetstringBuffer(); + netbuff.append_to_buf(""); + netbuff.append_to_buf("5:Hallo,3:Ola,"); + BOOST_CHECK_EQUAL("Hallo", netbuff.pop_data()); + BOOST_CHECK_EQUAL("Ola", netbuff.pop_data()); +} + +BOOST_AUTO_TEST_CASE(TestAppendingHalfWithOtherNetstring) +{ + NetstringBuffer netbuff = NetstringBuffer(); + netbuff.append_to_buf("5:Ha"); + netbuff.append_to_buf("llo,3:Ola,"); + BOOST_CHECK_EQUAL("Hallo", netbuff.pop_data()); + BOOST_CHECK_EQUAL("Ola", netbuff.pop_data()); +} + +BOOST_AUTO_TEST_CASE(TestAppendingHalfWithInvalidNetstring) +{ + NetstringBuffer netbuff = NetstringBuffer(); + netbuff.append_to_buf("5:Ha"); + + BOOST_CHECK_THROW(netbuff.append_to_buf("llo3:Ola,"), netstring_error); +} + +BOOST_AUTO_TEST_CASE(TestNestedNetstring) +{ + NetstringBuffer netbuff = NetstringBuffer(); + netbuff.append_to_buf("6:3:Ola,,"); + + BOOST_CHECK_EQUAL("3:Ola,", netbuff.pop_data()); + +} + +BOOST_AUTO_TEST_CASE(TestAllSplits) +{ + NetstringBuffer netbuff = NetstringBuffer(); + netbuff.append_to_buf("10:abcdefghij"); + netbuff.append_to_buf(",10:abcdefghi"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf("j,10:abcdefgh"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf("ij,10:abcdefg"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf("hij,10:abcdef"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf("ghij,10:abcde"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf("fghij,10:abcd"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf("efghij,10:abc"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf("defghij,10:ab"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf("cdefghij,10:a"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf("bcdefghij,10:"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf("abcdefghij,10"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf(":abcdefghij,1"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); + netbuff.append_to_buf("0:abcdefghij,"); + BOOST_CHECK_EQUAL("abcdefghij", netbuff.pop_data()); +} + +BOOST_AUTO_TEST_SUITE_END() -- 1.7.1