Added stream Bencode parser

pull/25/merge
Tatsuhiro Tsujikawa 2012-07-24 23:50:06 +09:00
parent 9620bb0a6d
commit 8311d6ef87
6 changed files with 769 additions and 2 deletions

407
src/BencodeParser.cc Normal file
View File

@ -0,0 +1,407 @@
/* <!-- copyright */
/*
* aria2 - The high speed download utility
*
* Copyright (C) 2012 Tatsuhiro Tsujikawa
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* In addition, as a special exception, the copyright holders give
* permission to link the code of portions of this program with the
* OpenSSL library under certain conditions as described in each
* individual source file, and distribute linked combinations
* including the two.
* You must obey the GNU General Public License in all respects
* for all of the code used other than OpenSSL. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you
* do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source
* files in the program, then also delete it here.
*/
/* copyright --> */
#include "BencodeParser.h"
#include "StructParserStateMachine.h"
#include "util.h"
namespace aria2 {
namespace bittorrent {
namespace {
enum {
BENCODE_FINISH,
BENCODE_ERROR,
BENCODE_INITIAL,
BENCODE_VALUE,
BENCODE_DICT_KEY,
BENCODE_DICT_VAL,
BENCODE_LIST,
BENCODE_STRING_LEN,
BENCODE_STRING,
BENCODE_NUMBER_SIGN,
BENCODE_NUMBER
};
} // namespace
BencodeParser::BencodeParser(StructParserStateMachine* psm)
: psm_(psm),
currentState_(BENCODE_INITIAL),
numberSign_(1),
number_(0),
numConsumed_(0),
lastError_(0)
{
stateStack_.push(BENCODE_FINISH);
}
BencodeParser::~BencodeParser()
{}
ssize_t BencodeParser::parseUpdate(const char* data, size_t size)
{
size_t i;
if(currentState_ == BENCODE_FINISH) {
return 0;
} else if(currentState_ == BENCODE_ERROR) {
return lastError_;
}
for(i = 0; i < size && currentState_ != BENCODE_FINISH; ++i) {
char c = data[i];
switch(currentState_) {
case BENCODE_LIST:
if(c == 'e') {
onListEnd();
break;
} else {
int rv = pushState(currentState_);
if(rv < 0) {
return rv;
}
currentState_ = BENCODE_VALUE;
runBeginCallback(STRUCT_ARRAY_DATA_T);
}
// Fall through
case BENCODE_INITIAL:
case BENCODE_VALUE:
switch(c) {
case 'd': {
currentState_ = BENCODE_DICT_KEY;
runBeginCallback(STRUCT_DICT_T);
break;
}
case'l':
currentState_ = BENCODE_LIST;
runBeginCallback(STRUCT_ARRAY_T);
break;
case 'i':
number_ = 0;
numberSign_ = 1;
numConsumed_ = 0;
currentState_ = BENCODE_NUMBER_SIGN;
runBeginCallback(STRUCT_NUMBER_T);
break;
default:
if(util::isDigit(c)) {
strLength_ = c - '0';
numConsumed_ = 1;
currentState_ = BENCODE_STRING_LEN;
runBeginCallback(STRUCT_STRING_T);
break;
} else {
currentState_ = BENCODE_ERROR;
return lastError_ = ERR_UNEXPECTED_CHAR_BEFORE_VAL;
}
}
break;
case BENCODE_DICT_KEY: {
if(c == 'e') {
onDictEnd();
break;
}
int rv = pushState(currentState_);
if(rv < 0) {
return rv;
}
runBeginCallback(STRUCT_DICT_KEY_T);
// Fall through
}
case BENCODE_STRING_LEN: {
size_t j;
for(j = i; j < size && in(data[j], '0', '9'); ++j) {
if((INT64_MAX - (data[j] - '0'))/ 10 < strLength_) {
currentState_ = BENCODE_ERROR;
return lastError_ = ERR_STRING_LENGTH_OUT_OF_RANGE;
}
strLength_ *= 10;
strLength_ += data[j] - '0';
}
numConsumed_ += j - i;
if(j != size) {
if(data[j] != ':' || numConsumed_ == 0) {
currentState_ = BENCODE_ERROR;
return lastError_ = ERR_INVALID_STRING_LENGTH;
}
i = j;
currentState_ = BENCODE_STRING;
if(strLength_ == 0) {
runCharactersCallback(0, 0);
onStringEnd();
}
} else {
i = j - 1;
}
break;
}
case BENCODE_STRING: {
size_t nread = std::min(static_cast<int64_t>(size - i), strLength_);
runCharactersCallback(&data[i], nread);
strLength_ -= nread;
i += nread - 1;
if(strLength_ == 0) {
onStringEnd();
}
break;
}
case BENCODE_NUMBER_SIGN: {
switch(c) {
case '+':
numberSign_ = 1;
currentState_ = BENCODE_NUMBER;
break;
case '-':
numberSign_ = -1;
currentState_ = BENCODE_NUMBER;
break;
default:
if(util::isDigit(c)) {
number_ = c - '0';
numConsumed_ = 1;
currentState_ = BENCODE_NUMBER;
}
}
break;
}
case BENCODE_NUMBER: {
size_t j;
for(j = i; j < size && in(data[j], '0', '9'); ++j) {
if((INT64_MAX - (data[j] - '0'))/ 10 < number_) {
currentState_ = BENCODE_ERROR;
return lastError_ = ERR_NUMBER_OUT_OF_RANGE;
}
number_ *= 10;
number_ += data[j] - '0';
}
numConsumed_ += j - i;
if(j != size) {
if(data[j] != 'e' || numConsumed_ == 0) {
currentState_ = BENCODE_ERROR;
return lastError_ = ERR_INVALID_NUMBER;
}
i = j;
onNumberEnd();
} else {
i = j - 1;
}
break;
}
}
}
return i;
}
ssize_t BencodeParser::parseFinal(const char* data, size_t len)
{
ssize_t rv;
rv = parseUpdate(data, len);
if(rv >= 0) {
if(currentState_ != BENCODE_FINISH &&
currentState_ != BENCODE_INITIAL) {
rv = ERR_PREMATURE_DATA;
}
}
return rv;
}
void BencodeParser::reset()
{
psm_->reset();
currentState_ = BENCODE_INITIAL;
lastError_ = 0;
while(!stateStack_.empty()) {
stateStack_.pop();
}
stateStack_.push(BENCODE_FINISH);
}
void BencodeParser::onStringEnd()
{
runEndCallback(stateTop() == BENCODE_DICT_KEY ?
STRUCT_DICT_KEY_T : STRUCT_STRING_T);
onValueEnd();
}
void BencodeParser::onNumberEnd()
{
runNumberCallback(numberSign_ * number_);
runEndCallback(STRUCT_NUMBER_T);
onValueEnd();
}
void BencodeParser::onDictEnd()
{
runEndCallback(STRUCT_DICT_T);
onValueEnd();
}
void BencodeParser::onListEnd()
{
runEndCallback(STRUCT_ARRAY_T);
onValueEnd();
}
void BencodeParser::onValueEnd()
{
switch(stateTop()) {
case BENCODE_DICT_KEY:
popState();
pushState(BENCODE_DICT_VAL);
currentState_ = BENCODE_VALUE;
runBeginCallback(STRUCT_DICT_DATA_T);
break;
case BENCODE_DICT_VAL:
runEndCallback(STRUCT_DICT_DATA_T);
popState();
currentState_ = BENCODE_DICT_KEY;
break;
case BENCODE_LIST:
runEndCallback(STRUCT_ARRAY_DATA_T);
popState();
currentState_ = BENCODE_LIST;
break;
default:
assert(stateTop() == BENCODE_FINISH);
currentState_ = stateTop();
break;
}
}
int BencodeParser::pushState(int state)
{
if(stateStack_.size() >= 50) {
return ERR_STRUCTURE_TOO_DEEP;
} else {
stateStack_.push(state);
return 0;
}
}
int BencodeParser::stateTop() const
{
return stateStack_.top();
}
int BencodeParser::popState()
{
int state = stateStack_.top();
stateStack_.pop();
return state;
}
void BencodeParser::runBeginCallback(int elementType)
{
// switch(elementType) {
// case STRUCT_DICT_T:
// std::cout << "object start" << std::endl;
// break;
// case STRUCT_DICT_KEY_T:
// std::cout << "object key start" << std::endl;
// break;
// case STRUCT_DICT_DATA_T:
// std::cout << "object data start" << std::endl;
// break;
// case STRUCT_ARRAY_T:
// std::cout << "array start" << std::endl;
// break;
// case STRUCT_ARRAY_DATA_T:
// std::cout << "array data start" << std::endl;
// break;
// case STRUCT_STRING_T:
// std::cout << "string start" << std::endl;
// break;
// case STRUCT_NUMBER_T:
// std::cout << "number start" << std::endl;
// break;
// case STRUCT_BOOL_T:
// std::cout << "bool start" << std::endl;
// break;
// case STRUCT_NULL_T:
// std::cout << "null start" << std::endl;
// break;
// default:
// break;
// };
psm_->beginElement(elementType);
}
void BencodeParser::runEndCallback(int elementType)
{
// switch(elementType) {
// case STRUCT_DICT_T:
// std::cout << "object end" << std::endl;
// break;
// case STRUCT_DICT_KEY_T:
// std::cout << "object key end" << std::endl;
// break;
// case STRUCT_DICT_DATA_T:
// std::cout << "object data end" << std::endl;
// break;
// case STRUCT_ARRAY_T:
// std::cout << "array end" << std::endl;
// break;
// case STRUCT_ARRAY_DATA_T:
// std::cout << "array data end" << std::endl;
// break;
// case STRUCT_STRING_T:
// std::cout << "string end" << std::endl;
// break;
// case STRUCT_NUMBER_T:
// std::cout << "number end" << std::endl;
// break;
// case STRUCT_BOOL_T:
// std::cout << "bool end" << std::endl;
// break;
// case STRUCT_NULL_T:
// std::cout << "null end" << std::endl;
// break;
// default:
// break;
// };
psm_->endElement(elementType);
}
void BencodeParser::runCharactersCallback(const char* data, size_t len)
{
psm_->charactersCallback(data, len);
}
void BencodeParser::runNumberCallback(int64_t number)
{
psm_->numberCallback(number, 0, 0);
}
} // namespace bittorrent
} // namespace aria2

104
src/BencodeParser.h Normal file
View File

@ -0,0 +1,104 @@
/* <!-- copyright */
/*
* aria2 - The high speed download utility
*
* Copyright (C) 2012 Tatsuhiro Tsujikawa
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* In addition, as a special exception, the copyright holders give
* permission to link the code of portions of this program with the
* OpenSSL library under certain conditions as described in each
* individual source file, and distribute linked combinations
* including the two.
* You must obey the GNU General Public License in all respects
* for all of the code used other than OpenSSL. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you
* do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source
* files in the program, then also delete it here.
*/
/* copyright --> */
#ifndef D_BENCODE_PARSER_H
#define D_BENCODE_PARSER_H
#include "common.h"
#include <stack>
namespace aria2 {
class StructParserStateMachine;
namespace bittorrent {
enum BencodeError {
ERR_UNEXPECTED_CHAR_BEFORE_VAL = -1,
ERR_INVALID_NUMBER = -2,
ERR_NUMBER_OUT_OF_RANGE = -3,
ERR_PREMATURE_DATA = -4,
ERR_STRUCTURE_TOO_DEEP = -5,
ERR_INVALID_STRING_LENGTH = -6,
ERR_STRING_LENGTH_OUT_OF_RANGE = -7
};
class BencodeParser {
public:
BencodeParser(StructParserStateMachine* psm);
~BencodeParser();
// Parses |size| bytes of data |data| and returns the number of
// bytes processed. On error, one of the negative error codes is
// returned.
ssize_t parseUpdate(const char* data, size_t size);
// Parses |size| bytes of data |data| and returns the number of
// bytes processed. On error, one of the negative error codes is
// returned. Call this function to signal the parser that this is
// the last piece of data. This function does NOT reset the internal
// state.
ssize_t parseFinal(const char* data, size_t size);
// Resets the internal state of the parser and makes it ready for
// reuse.
void reset();
private:
int pushState(int state);
int stateTop() const;
int popState();
void runBeginCallback(int elementType);
void runEndCallback(int elementType);
void runCharactersCallback(const char* data, size_t len);
void runNumberCallback(int64_t number);
void onStringEnd();
void onNumberEnd();
void onDictEnd();
void onListEnd();
void onValueEnd();
StructParserStateMachine* psm_;
std::stack<int> stateStack_;
int currentState_;
int64_t strLength_;
int numberSign_;
int64_t number_;
size_t numConsumed_;
int lastError_;
};
} // namespace bittorrent
} // namespace aria2
#endif // D_BENCODE_PARSER_H

View File

@ -508,7 +508,9 @@ SRCS += PeerAbstractCommand.cc PeerAbstractCommand.h\
LpdMessage.cc LpdMessage.h\
LpdReceiveMessageCommand.cc LpdReceiveMessageCommand.h\
LpdDispatchMessageCommand.cc LpdDispatchMessageCommand.h\
bencode2.cc bencode2.h
bencode2.cc bencode2.h\
BencodeParser.cc BencodeParser.h\
ValueBaseBencodeParser.h
endif # ENABLE_BITTORRENT
if ENABLE_METALINK

View File

@ -0,0 +1,53 @@
/* <!-- copyright */
/*
* aria2 - The high speed download utility
*
* Copyright (C) 2012 Tatsuhiro Tsujikawa
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* In addition, as a special exception, the copyright holders give
* permission to link the code of portions of this program with the
* OpenSSL library under certain conditions as described in each
* individual source file, and distribute linked combinations
* including the two.
* You must obey the GNU General Public License in all respects
* for all of the code used other than OpenSSL. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you
* do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source
* files in the program, then also delete it here.
*/
/* copyright --> */
#ifndef D_VALUE_BASE_BENCODE_PARSER_H
#define D_VALUE_BASE_BENCODE_PARSER_H
#include "GenericParser.h"
#include "BencodeParser.h"
#include "ValueBaseStructParserStateMachine.h"
namespace aria2 {
namespace bittorrent {
typedef GenericParser<BencodeParser, ValueBaseStructParserStateMachine>
ValueBaseBencodeParser;
} // namespace bittorrent
} // namespace aria2
#endif // D_VALUE_BASE_BENCODE_PARSER_H

View File

@ -206,7 +206,8 @@ aria2c_SOURCES += BtAllowedFastMessageTest.cc\
LpdMessageDispatcherTest.cc\
LpdMessageReceiverTest.cc\
Bencode2Test.cc\
PeerConnectionTest.cc
PeerConnectionTest.cc\
ValueBaseBencodeParserTest.cc
endif # ENABLE_BITTORRENT
if ENABLE_METALINK

View File

@ -0,0 +1,200 @@
#include "ValueBaseBencodeParser.h"
#include <cppunit/extensions/HelperMacros.h>
#include "ValueBase.h"
namespace aria2 {
class ValueBaseBencodeParserTest:public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE(ValueBaseBencodeParserTest);
CPPUNIT_TEST(testParseUpdate);
CPPUNIT_TEST_SUITE_END();
public:
void testParseUpdate();
};
CPPUNIT_TEST_SUITE_REGISTRATION( ValueBaseBencodeParserTest );
namespace {
void checkDecodeError(const std::string& src)
{
bittorrent::ValueBaseBencodeParser parser;
ssize_t error;
SharedHandle<ValueBase> r = parser.parseFinal(src.c_str(), src.size(),
error);
CPPUNIT_ASSERT(!r);
CPPUNIT_ASSERT(error < 0);
}
} // namespace
void ValueBaseBencodeParserTest::testParseUpdate()
{
bittorrent::ValueBaseBencodeParser parser;
ssize_t error;
{
// empty string
std::string src = "0:";
SharedHandle<ValueBase> s = parser.parseFinal(src.c_str(), src.size(),
error);
CPPUNIT_ASSERT_EQUAL(std::string(""), downcast<String>(s)->s());
}
{
// integer 0
std::string src = "i0e";
SharedHandle<ValueBase> s = parser.parseFinal(src.c_str(), src.size(),
error);
CPPUNIT_ASSERT_EQUAL((int64_t)0, downcast<Integer>(s)->i());
}
{
// empty dict
std::string src = "de";
SharedHandle<ValueBase> d = parser.parseFinal(src.c_str(), src.size(),
error);
CPPUNIT_ASSERT(downcast<Dict>(d)->empty());
}
{
// empty list
std::string src = "le";
SharedHandle<ValueBase> l = parser.parseFinal(src.c_str(), src.size(),
error);
CPPUNIT_ASSERT(downcast<List>(l)->empty());
}
{
// string
std::string src = "3:foo";
SharedHandle<ValueBase> s = parser.parseFinal(src.c_str(), src.size(),
error);
CPPUNIT_ASSERT_EQUAL(std::string("foo"), downcast<String>(s)->s());
}
{
// integer
std::string src = "i9223372036854775807e";
SharedHandle<ValueBase> s = parser.parseFinal(src.c_str(), src.size(),
error);
CPPUNIT_ASSERT_EQUAL((int64_t)9223372036854775807LL,
downcast<Integer>(s)->i());
}
{
// dict, size 1
std::string src = "d3:fooi123ee";
SharedHandle<ValueBase> d = parser.parseFinal(src.c_str(), src.size(),
error);
Dict* dict = downcast<Dict>(d);
CPPUNIT_ASSERT(dict);
CPPUNIT_ASSERT(dict->get("foo"));
CPPUNIT_ASSERT_EQUAL((int64_t)123,
downcast<Integer>(dict->get("foo"))->i());
}
{
// dict, size 2
std::string src = "d3:fooi123e3:bar1:ee";
SharedHandle<ValueBase> d = parser.parseFinal(src.c_str(), src.size(),
error);
Dict* dict = downcast<Dict>(d);
CPPUNIT_ASSERT(dict);
CPPUNIT_ASSERT_EQUAL((size_t)2, dict->size());
CPPUNIT_ASSERT(dict->get("foo"));
CPPUNIT_ASSERT_EQUAL((int64_t)123,
downcast<Integer>(dict->get("foo"))->i());
CPPUNIT_ASSERT(dict->get("bar"));
CPPUNIT_ASSERT_EQUAL(std::string("e"),
downcast<String>(dict->get("bar"))->s());
}
{
// list, size 1
std::string src = "l3:fooe";
SharedHandle<ValueBase> l = parser.parseFinal(src.c_str(), src.size(),
error);
List* list = downcast<List>(l);
CPPUNIT_ASSERT(list);
CPPUNIT_ASSERT_EQUAL((size_t)1, list->size());
CPPUNIT_ASSERT_EQUAL(std::string("foo"),
downcast<String>(list->get(0))->s());
}
{
// list, size 2
std::string src = "l3:fooi123ee";
SharedHandle<ValueBase> l = parser.parseFinal(src.c_str(), src.size(),
error);
List* list = downcast<List>(l);
CPPUNIT_ASSERT(list);
CPPUNIT_ASSERT_EQUAL((size_t)2, list->size());
CPPUNIT_ASSERT_EQUAL(std::string("foo"),
downcast<String>(list->get(0))->s());
CPPUNIT_ASSERT_EQUAL((int64_t)123,
downcast<Integer>(list->get(1))->i());
}
{
// string, integer and list in dict
std::string src = "d4:name5:aria24:sizei12345678900e5:filesl3:bin3:docee";
SharedHandle<ValueBase> r = parser.parseFinal(src.c_str(), src.size(),
error);
const Dict* dict = downcast<Dict>(r);
CPPUNIT_ASSERT(dict);
CPPUNIT_ASSERT_EQUAL(std::string("aria2"),
downcast<String>(dict->get("name"))->s());
CPPUNIT_ASSERT_EQUAL(static_cast<Integer::ValueType>(12345678900LL),
downcast<Integer>(dict->get("size"))->i());
const List* list = downcast<List>(dict->get("files"));
CPPUNIT_ASSERT(list);
CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), list->size());
CPPUNIT_ASSERT_EQUAL(std::string("bin"),
downcast<String>(list->get(0))->s());
CPPUNIT_ASSERT_EQUAL(std::string("doc"),
downcast<String>(list->get(1))->s());
}
{
// dict in list
std::string src = "ld1:ki123eee";
SharedHandle<ValueBase> r = parser.parseFinal(src.c_str(), src.size(),
error);
const List* list = downcast<List>(r);
CPPUNIT_ASSERT(list);
CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), list->size());
const Dict* dict = downcast<Dict>(list->get(0));
CPPUNIT_ASSERT(dict);
CPPUNIT_ASSERT_EQUAL(static_cast<Integer::ValueType>(123),
downcast<Integer>(dict->get("k"))->i());
}
{
// empty key is allowed
std::string src = "d0:1:ve";
SharedHandle<ValueBase> s = parser.parseFinal(src.c_str(), src.size(),
error);
}
{
// empty encoded data
std::string src = "";
SharedHandle<ValueBase> s = parser.parseFinal(src.c_str(), src.size(),
error);
CPPUNIT_ASSERT(!s);
}
// integer, without ending 'e'
checkDecodeError("i3");
// dict, without ending 'e'
checkDecodeError("d");
// list, without ending 'e'
checkDecodeError("l");
// string, less than the specified length.
checkDecodeError("3:ab");
// string, but length is invalid
checkDecodeError("x:abc");
// string with minus length
checkDecodeError("-1:a");
// too deep structure
checkDecodeError(std::string(51, 'l')+std::string(51,'e'));
checkDecodeError(std::string(50, 'l')+"d3:fooi100ee"+std::string(50,'e'));
{
// ignore trailing garbage at the end of the input.
std::string src = "5:aria2trail";
SharedHandle<ValueBase> s = parser.parseFinal(src.c_str(), src.size(),
error);
CPPUNIT_ASSERT_EQUAL(std::string("aria2"), downcast<String>(s)->s());
// Get trailing garbage position
CPPUNIT_ASSERT_EQUAL((ssize_t)7, error);
}
}
} // namespace aria2