diff --git a/src/BencodeParser.cc b/src/BencodeParser.cc new file mode 100644 index 00000000..63585784 --- /dev/null +++ b/src/BencodeParser.cc @@ -0,0 +1,407 @@ +/* */ +#include "BencodeParser.h" +#include "StructParserStateMachine.h" +#include "util.h" + +namespace aria2 { + +namespace bittorrent { + +namespace { +enum { + BENCODE_FINISH, + BENCODE_ERROR, + BENCODE_INITIAL, + BENCODE_VALUE, + BENCODE_DICT_KEY, + BENCODE_DICT_VAL, + BENCODE_LIST, + BENCODE_STRING_LEN, + BENCODE_STRING, + BENCODE_NUMBER_SIGN, + BENCODE_NUMBER +}; +} // namespace + +BencodeParser::BencodeParser(StructParserStateMachine* psm) + : psm_(psm), + currentState_(BENCODE_INITIAL), + numberSign_(1), + number_(0), + numConsumed_(0), + lastError_(0) +{ + stateStack_.push(BENCODE_FINISH); +} + +BencodeParser::~BencodeParser() +{} + +ssize_t BencodeParser::parseUpdate(const char* data, size_t size) +{ + size_t i; + if(currentState_ == BENCODE_FINISH) { + return 0; + } else if(currentState_ == BENCODE_ERROR) { + return lastError_; + } + for(i = 0; i < size && currentState_ != BENCODE_FINISH; ++i) { + char c = data[i]; + switch(currentState_) { + case BENCODE_LIST: + if(c == 'e') { + onListEnd(); + break; + } else { + int rv = pushState(currentState_); + if(rv < 0) { + return rv; + } + currentState_ = BENCODE_VALUE; + runBeginCallback(STRUCT_ARRAY_DATA_T); + } + // Fall through + case BENCODE_INITIAL: + case BENCODE_VALUE: + switch(c) { + case 'd': { + currentState_ = BENCODE_DICT_KEY; + runBeginCallback(STRUCT_DICT_T); + break; + } + case'l': + currentState_ = BENCODE_LIST; + runBeginCallback(STRUCT_ARRAY_T); + break; + case 'i': + number_ = 0; + numberSign_ = 1; + numConsumed_ = 0; + currentState_ = BENCODE_NUMBER_SIGN; + runBeginCallback(STRUCT_NUMBER_T); + break; + default: + if(util::isDigit(c)) { + strLength_ = c - '0'; + numConsumed_ = 1; + currentState_ = BENCODE_STRING_LEN; + runBeginCallback(STRUCT_STRING_T); + break; + } else { + currentState_ = BENCODE_ERROR; + return lastError_ = ERR_UNEXPECTED_CHAR_BEFORE_VAL; + } + } + break; + case BENCODE_DICT_KEY: { + if(c == 'e') { + onDictEnd(); + break; + } + int rv = pushState(currentState_); + if(rv < 0) { + return rv; + } + runBeginCallback(STRUCT_DICT_KEY_T); + // Fall through + } + case BENCODE_STRING_LEN: { + size_t j; + for(j = i; j < size && in(data[j], '0', '9'); ++j) { + if((INT64_MAX - (data[j] - '0'))/ 10 < strLength_) { + currentState_ = BENCODE_ERROR; + return lastError_ = ERR_STRING_LENGTH_OUT_OF_RANGE; + } + strLength_ *= 10; + strLength_ += data[j] - '0'; + } + numConsumed_ += j - i; + if(j != size) { + if(data[j] != ':' || numConsumed_ == 0) { + currentState_ = BENCODE_ERROR; + return lastError_ = ERR_INVALID_STRING_LENGTH; + } + i = j; + currentState_ = BENCODE_STRING; + if(strLength_ == 0) { + runCharactersCallback(0, 0); + onStringEnd(); + } + } else { + i = j - 1; + } + break; + } + case BENCODE_STRING: { + size_t nread = std::min(static_cast(size - i), strLength_); + runCharactersCallback(&data[i], nread); + strLength_ -= nread; + i += nread - 1; + if(strLength_ == 0) { + onStringEnd(); + } + break; + } + case BENCODE_NUMBER_SIGN: { + switch(c) { + case '+': + numberSign_ = 1; + currentState_ = BENCODE_NUMBER; + break; + case '-': + numberSign_ = -1; + currentState_ = BENCODE_NUMBER; + break; + default: + if(util::isDigit(c)) { + number_ = c - '0'; + numConsumed_ = 1; + currentState_ = BENCODE_NUMBER; + } + } + break; + } + case BENCODE_NUMBER: { + size_t j; + for(j = i; j < size && in(data[j], '0', '9'); ++j) { + if((INT64_MAX - (data[j] - '0'))/ 10 < number_) { + currentState_ = BENCODE_ERROR; + return lastError_ = ERR_NUMBER_OUT_OF_RANGE; + } + number_ *= 10; + number_ += data[j] - '0'; + } + numConsumed_ += j - i; + if(j != size) { + if(data[j] != 'e' || numConsumed_ == 0) { + currentState_ = BENCODE_ERROR; + return lastError_ = ERR_INVALID_NUMBER; + } + i = j; + onNumberEnd(); + } else { + i = j - 1; + } + break; + } + } + } + return i; +} + +ssize_t BencodeParser::parseFinal(const char* data, size_t len) +{ + ssize_t rv; + rv = parseUpdate(data, len); + if(rv >= 0) { + if(currentState_ != BENCODE_FINISH && + currentState_ != BENCODE_INITIAL) { + rv = ERR_PREMATURE_DATA; + } + } + return rv; +} + +void BencodeParser::reset() +{ + psm_->reset(); + currentState_ = BENCODE_INITIAL; + lastError_ = 0; + while(!stateStack_.empty()) { + stateStack_.pop(); + } + stateStack_.push(BENCODE_FINISH); +} + +void BencodeParser::onStringEnd() +{ + runEndCallback(stateTop() == BENCODE_DICT_KEY ? + STRUCT_DICT_KEY_T : STRUCT_STRING_T); + onValueEnd(); +} + +void BencodeParser::onNumberEnd() +{ + runNumberCallback(numberSign_ * number_); + runEndCallback(STRUCT_NUMBER_T); + onValueEnd(); +} + +void BencodeParser::onDictEnd() +{ + runEndCallback(STRUCT_DICT_T); + onValueEnd(); +} + +void BencodeParser::onListEnd() +{ + runEndCallback(STRUCT_ARRAY_T); + onValueEnd(); +} + +void BencodeParser::onValueEnd() +{ + switch(stateTop()) { + case BENCODE_DICT_KEY: + popState(); + pushState(BENCODE_DICT_VAL); + currentState_ = BENCODE_VALUE; + runBeginCallback(STRUCT_DICT_DATA_T); + break; + case BENCODE_DICT_VAL: + runEndCallback(STRUCT_DICT_DATA_T); + popState(); + currentState_ = BENCODE_DICT_KEY; + break; + case BENCODE_LIST: + runEndCallback(STRUCT_ARRAY_DATA_T); + popState(); + currentState_ = BENCODE_LIST; + break; + default: + assert(stateTop() == BENCODE_FINISH); + currentState_ = stateTop(); + break; + } +} + +int BencodeParser::pushState(int state) +{ + if(stateStack_.size() >= 50) { + return ERR_STRUCTURE_TOO_DEEP; + } else { + stateStack_.push(state); + return 0; + } +} + +int BencodeParser::stateTop() const +{ + return stateStack_.top(); +} + +int BencodeParser::popState() +{ + int state = stateStack_.top(); + stateStack_.pop(); + return state; +} + +void BencodeParser::runBeginCallback(int elementType) +{ + // switch(elementType) { + // case STRUCT_DICT_T: + // std::cout << "object start" << std::endl; + // break; + // case STRUCT_DICT_KEY_T: + // std::cout << "object key start" << std::endl; + // break; + // case STRUCT_DICT_DATA_T: + // std::cout << "object data start" << std::endl; + // break; + // case STRUCT_ARRAY_T: + // std::cout << "array start" << std::endl; + // break; + // case STRUCT_ARRAY_DATA_T: + // std::cout << "array data start" << std::endl; + // break; + // case STRUCT_STRING_T: + // std::cout << "string start" << std::endl; + // break; + // case STRUCT_NUMBER_T: + // std::cout << "number start" << std::endl; + // break; + // case STRUCT_BOOL_T: + // std::cout << "bool start" << std::endl; + // break; + // case STRUCT_NULL_T: + // std::cout << "null start" << std::endl; + // break; + // default: + // break; + // }; + psm_->beginElement(elementType); +} + +void BencodeParser::runEndCallback(int elementType) +{ + // switch(elementType) { + // case STRUCT_DICT_T: + // std::cout << "object end" << std::endl; + // break; + // case STRUCT_DICT_KEY_T: + // std::cout << "object key end" << std::endl; + // break; + // case STRUCT_DICT_DATA_T: + // std::cout << "object data end" << std::endl; + // break; + // case STRUCT_ARRAY_T: + // std::cout << "array end" << std::endl; + // break; + // case STRUCT_ARRAY_DATA_T: + // std::cout << "array data end" << std::endl; + // break; + // case STRUCT_STRING_T: + // std::cout << "string end" << std::endl; + // break; + // case STRUCT_NUMBER_T: + // std::cout << "number end" << std::endl; + // break; + // case STRUCT_BOOL_T: + // std::cout << "bool end" << std::endl; + // break; + // case STRUCT_NULL_T: + // std::cout << "null end" << std::endl; + // break; + // default: + // break; + // }; + psm_->endElement(elementType); +} + +void BencodeParser::runCharactersCallback(const char* data, size_t len) +{ + psm_->charactersCallback(data, len); +} + +void BencodeParser::runNumberCallback(int64_t number) +{ + psm_->numberCallback(number, 0, 0); +} + +} // namespace bittorrent + +} // namespace aria2 diff --git a/src/BencodeParser.h b/src/BencodeParser.h new file mode 100644 index 00000000..eb1c4d67 --- /dev/null +++ b/src/BencodeParser.h @@ -0,0 +1,104 @@ +/* */ +#ifndef D_BENCODE_PARSER_H +#define D_BENCODE_PARSER_H + +#include "common.h" + +#include + +namespace aria2 { + +class StructParserStateMachine; + +namespace bittorrent { + +enum BencodeError { + ERR_UNEXPECTED_CHAR_BEFORE_VAL = -1, + ERR_INVALID_NUMBER = -2, + ERR_NUMBER_OUT_OF_RANGE = -3, + ERR_PREMATURE_DATA = -4, + ERR_STRUCTURE_TOO_DEEP = -5, + ERR_INVALID_STRING_LENGTH = -6, + ERR_STRING_LENGTH_OUT_OF_RANGE = -7 +}; + +class BencodeParser { +public: + BencodeParser(StructParserStateMachine* psm); + ~BencodeParser(); + // Parses |size| bytes of data |data| and returns the number of + // bytes processed. On error, one of the negative error codes is + // returned. + ssize_t parseUpdate(const char* data, size_t size); + // Parses |size| bytes of data |data| and returns the number of + // bytes processed. On error, one of the negative error codes is + // returned. Call this function to signal the parser that this is + // the last piece of data. This function does NOT reset the internal + // state. + ssize_t parseFinal(const char* data, size_t size); + // Resets the internal state of the parser and makes it ready for + // reuse. + void reset(); +private: + int pushState(int state); + int stateTop() const; + int popState(); + void runBeginCallback(int elementType); + void runEndCallback(int elementType); + void runCharactersCallback(const char* data, size_t len); + void runNumberCallback(int64_t number); + + void onStringEnd(); + void onNumberEnd(); + void onDictEnd(); + void onListEnd(); + void onValueEnd(); + + StructParserStateMachine* psm_; + std::stack stateStack_; + int currentState_; + int64_t strLength_; + int numberSign_; + int64_t number_; + size_t numConsumed_; + int lastError_; +}; + +} // namespace bittorrent + +} // namespace aria2 + +#endif // D_BENCODE_PARSER_H diff --git a/src/Makefile.am b/src/Makefile.am index b56090c2..25ec3723 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -508,7 +508,9 @@ SRCS += PeerAbstractCommand.cc PeerAbstractCommand.h\ LpdMessage.cc LpdMessage.h\ LpdReceiveMessageCommand.cc LpdReceiveMessageCommand.h\ LpdDispatchMessageCommand.cc LpdDispatchMessageCommand.h\ - bencode2.cc bencode2.h + bencode2.cc bencode2.h\ + BencodeParser.cc BencodeParser.h\ + ValueBaseBencodeParser.h endif # ENABLE_BITTORRENT if ENABLE_METALINK diff --git a/src/ValueBaseBencodeParser.h b/src/ValueBaseBencodeParser.h new file mode 100644 index 00000000..ffa109e4 --- /dev/null +++ b/src/ValueBaseBencodeParser.h @@ -0,0 +1,53 @@ +/* */ +#ifndef D_VALUE_BASE_BENCODE_PARSER_H +#define D_VALUE_BASE_BENCODE_PARSER_H + +#include "GenericParser.h" +#include "BencodeParser.h" +#include "ValueBaseStructParserStateMachine.h" + +namespace aria2 { + +namespace bittorrent { + +typedef GenericParser +ValueBaseBencodeParser; + +} // namespace bittorrent + +} // namespace aria2 + +#endif // D_VALUE_BASE_BENCODE_PARSER_H diff --git a/test/Makefile.am b/test/Makefile.am index 34670584..eb06f595 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -206,7 +206,8 @@ aria2c_SOURCES += BtAllowedFastMessageTest.cc\ LpdMessageDispatcherTest.cc\ LpdMessageReceiverTest.cc\ Bencode2Test.cc\ - PeerConnectionTest.cc + PeerConnectionTest.cc\ + ValueBaseBencodeParserTest.cc endif # ENABLE_BITTORRENT if ENABLE_METALINK diff --git a/test/ValueBaseBencodeParserTest.cc b/test/ValueBaseBencodeParserTest.cc new file mode 100644 index 00000000..677f1dce --- /dev/null +++ b/test/ValueBaseBencodeParserTest.cc @@ -0,0 +1,200 @@ +#include "ValueBaseBencodeParser.h" + +#include + +#include "ValueBase.h" + +namespace aria2 { + +class ValueBaseBencodeParserTest:public CppUnit::TestFixture { + + CPPUNIT_TEST_SUITE(ValueBaseBencodeParserTest); + CPPUNIT_TEST(testParseUpdate); + CPPUNIT_TEST_SUITE_END(); +public: + void testParseUpdate(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION( ValueBaseBencodeParserTest ); + +namespace { +void checkDecodeError(const std::string& src) +{ + bittorrent::ValueBaseBencodeParser parser; + ssize_t error; + SharedHandle r = parser.parseFinal(src.c_str(), src.size(), + error); + CPPUNIT_ASSERT(!r); + CPPUNIT_ASSERT(error < 0); +} +} // namespace + +void ValueBaseBencodeParserTest::testParseUpdate() +{ + bittorrent::ValueBaseBencodeParser parser; + ssize_t error; + { + // empty string + std::string src = "0:"; + SharedHandle s = parser.parseFinal(src.c_str(), src.size(), + error); + CPPUNIT_ASSERT_EQUAL(std::string(""), downcast(s)->s()); + } + { + // integer 0 + std::string src = "i0e"; + SharedHandle s = parser.parseFinal(src.c_str(), src.size(), + error); + CPPUNIT_ASSERT_EQUAL((int64_t)0, downcast(s)->i()); + } + { + // empty dict + std::string src = "de"; + SharedHandle d = parser.parseFinal(src.c_str(), src.size(), + error); + CPPUNIT_ASSERT(downcast(d)->empty()); + } + { + // empty list + std::string src = "le"; + SharedHandle l = parser.parseFinal(src.c_str(), src.size(), + error); + CPPUNIT_ASSERT(downcast(l)->empty()); + } + { + // string + std::string src = "3:foo"; + SharedHandle s = parser.parseFinal(src.c_str(), src.size(), + error); + CPPUNIT_ASSERT_EQUAL(std::string("foo"), downcast(s)->s()); + } + { + // integer + std::string src = "i9223372036854775807e"; + SharedHandle s = parser.parseFinal(src.c_str(), src.size(), + error); + CPPUNIT_ASSERT_EQUAL((int64_t)9223372036854775807LL, + downcast(s)->i()); + } + { + // dict, size 1 + std::string src = "d3:fooi123ee"; + SharedHandle d = parser.parseFinal(src.c_str(), src.size(), + error); + Dict* dict = downcast(d); + CPPUNIT_ASSERT(dict); + CPPUNIT_ASSERT(dict->get("foo")); + CPPUNIT_ASSERT_EQUAL((int64_t)123, + downcast(dict->get("foo"))->i()); + } + { + // dict, size 2 + std::string src = "d3:fooi123e3:bar1:ee"; + SharedHandle d = parser.parseFinal(src.c_str(), src.size(), + error); + Dict* dict = downcast(d); + CPPUNIT_ASSERT(dict); + CPPUNIT_ASSERT_EQUAL((size_t)2, dict->size()); + CPPUNIT_ASSERT(dict->get("foo")); + CPPUNIT_ASSERT_EQUAL((int64_t)123, + downcast(dict->get("foo"))->i()); + CPPUNIT_ASSERT(dict->get("bar")); + CPPUNIT_ASSERT_EQUAL(std::string("e"), + downcast(dict->get("bar"))->s()); + } + { + // list, size 1 + std::string src = "l3:fooe"; + SharedHandle l = parser.parseFinal(src.c_str(), src.size(), + error); + List* list = downcast(l); + CPPUNIT_ASSERT(list); + CPPUNIT_ASSERT_EQUAL((size_t)1, list->size()); + CPPUNIT_ASSERT_EQUAL(std::string("foo"), + downcast(list->get(0))->s()); + } + { + // list, size 2 + std::string src = "l3:fooi123ee"; + SharedHandle l = parser.parseFinal(src.c_str(), src.size(), + error); + List* list = downcast(l); + CPPUNIT_ASSERT(list); + CPPUNIT_ASSERT_EQUAL((size_t)2, list->size()); + CPPUNIT_ASSERT_EQUAL(std::string("foo"), + downcast(list->get(0))->s()); + CPPUNIT_ASSERT_EQUAL((int64_t)123, + downcast(list->get(1))->i()); + } + { + // string, integer and list in dict + std::string src = "d4:name5:aria24:sizei12345678900e5:filesl3:bin3:docee"; + SharedHandle r = parser.parseFinal(src.c_str(), src.size(), + error); + const Dict* dict = downcast(r); + CPPUNIT_ASSERT(dict); + CPPUNIT_ASSERT_EQUAL(std::string("aria2"), + downcast(dict->get("name"))->s()); + CPPUNIT_ASSERT_EQUAL(static_cast(12345678900LL), + downcast(dict->get("size"))->i()); + const List* list = downcast(dict->get("files")); + CPPUNIT_ASSERT(list); + CPPUNIT_ASSERT_EQUAL(static_cast(2), list->size()); + CPPUNIT_ASSERT_EQUAL(std::string("bin"), + downcast(list->get(0))->s()); + CPPUNIT_ASSERT_EQUAL(std::string("doc"), + downcast(list->get(1))->s()); + } + { + // dict in list + std::string src = "ld1:ki123eee"; + SharedHandle r = parser.parseFinal(src.c_str(), src.size(), + error); + const List* list = downcast(r); + CPPUNIT_ASSERT(list); + CPPUNIT_ASSERT_EQUAL(static_cast(1), list->size()); + const Dict* dict = downcast(list->get(0)); + CPPUNIT_ASSERT(dict); + CPPUNIT_ASSERT_EQUAL(static_cast(123), + downcast(dict->get("k"))->i()); + } + { + // empty key is allowed + std::string src = "d0:1:ve"; + SharedHandle s = parser.parseFinal(src.c_str(), src.size(), + error); + } + { + // empty encoded data + std::string src = ""; + SharedHandle s = parser.parseFinal(src.c_str(), src.size(), + error); + CPPUNIT_ASSERT(!s); + } + // integer, without ending 'e' + checkDecodeError("i3"); + // dict, without ending 'e' + checkDecodeError("d"); + // list, without ending 'e' + checkDecodeError("l"); + // string, less than the specified length. + checkDecodeError("3:ab"); + // string, but length is invalid + checkDecodeError("x:abc"); + // string with minus length + checkDecodeError("-1:a"); + // too deep structure + checkDecodeError(std::string(51, 'l')+std::string(51,'e')); + checkDecodeError(std::string(50, 'l')+"d3:fooi100ee"+std::string(50,'e')); + { + // ignore trailing garbage at the end of the input. + std::string src = "5:aria2trail"; + SharedHandle s = parser.parseFinal(src.c_str(), src.size(), + error); + CPPUNIT_ASSERT_EQUAL(std::string("aria2"), downcast(s)->s()); + // Get trailing garbage position + CPPUNIT_ASSERT_EQUAL((ssize_t)7, error); + } +} + +} // namespace aria2