From 5fc1b1ad297e78de9e0d596146780f05e0d643ab Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Sat, 23 Jun 2012 17:34:20 +0900 Subject: [PATCH] Rewritten HTTP header parser --- src/HttpConnection.cc | 20 +- src/HttpHeader.cc | 57 +---- src/HttpHeader.h | 13 +- src/HttpHeaderProcessor.cc | 392 ++++++++++++++++++++++++-------- src/HttpHeaderProcessor.h | 58 ++--- src/HttpServer.cc | 20 +- src/LpdMessageReceiver.cc | 7 +- src/util.cc | 9 + src/util.h | 4 + test/HttpHeaderProcessorTest.cc | 193 ++++++++-------- test/HttpHeaderTest.cc | 27 --- 11 files changed, 477 insertions(+), 323 deletions(-) diff --git a/src/HttpConnection.cc b/src/HttpConnection.cc index edf582aa..a452c956 100644 --- a/src/HttpConnection.cc +++ b/src/HttpConnection.cc @@ -64,7 +64,7 @@ namespace aria2 { HttpRequestEntry::HttpRequestEntry (const SharedHandle& httpRequest) : httpRequest_(httpRequest), - proc_(new HttpHeaderProcessor()) + proc_(new HttpHeaderProcessor(HttpHeaderProcessor::CLIENT_PARSER)) {} HttpRequestEntry::~HttpRequestEntry() {} @@ -130,34 +130,28 @@ SharedHandle HttpConnection::receiveResponse() throw DL_ABORT_EX(EX_NO_HTTP_REQUEST_ENTRY_FOUND); } HttpRequestEntryHandle entry = outstandingHttpRequests_.front(); - HttpHeaderProcessorHandle proc = entry->getHttpHeaderProcessor(); + const SharedHandle& proc = + entry->getHttpHeaderProcessor(); if(socketRecvBuffer_->bufferEmpty()) { if(socketRecvBuffer_->recv() == 0 && !socket_->wantRead() && !socket_->wantWrite()) { throw DL_RETRY_EX(EX_GOT_EOF); } } - proc->update(socketRecvBuffer_->getBuffer(), - socketRecvBuffer_->getBufferLength()); SharedHandle httpResponse; - size_t shiftBufferLength; - if(proc->eoh()) { - SharedHandle httpHeader = proc->getHttpResponseHeader(); - size_t putbackDataLength = proc->getPutBackDataLength(); + if(proc->parse(socketRecvBuffer_->getBuffer(), + socketRecvBuffer_->getBufferLength())) { + const SharedHandle& httpHeader = proc->getResult(); A2_LOG_INFO(fmt(MSG_RECEIVE_RESPONSE, cuid_, proc->getHeaderString().c_str())); - assert(socketRecvBuffer_->getBufferLength() >= putbackDataLength); - shiftBufferLength = socketRecvBuffer_->getBufferLength()-putbackDataLength; httpResponse.reset(new HttpResponse()); httpResponse->setCuid(cuid_); httpResponse->setHttpHeader(httpHeader); httpResponse->setHttpRequest(entry->getHttpRequest()); outstandingHttpRequests_.pop_front(); - } else { - shiftBufferLength = socketRecvBuffer_->getBufferLength(); } - socketRecvBuffer_->shiftBuffer(shiftBufferLength); + socketRecvBuffer_->shiftBuffer(proc->getLastBytesProcessed()); return httpResponse; } diff --git a/src/HttpHeader.cc b/src/HttpHeader.cc index 1bba0ad2..6f86d934 100644 --- a/src/HttpHeader.cc +++ b/src/HttpHeader.cc @@ -216,53 +216,6 @@ void HttpHeader::setRequestPath(const std::string& requestPath) requestPath_ = requestPath; } -void HttpHeader::fill -(std::string::const_iterator first, - std::string::const_iterator last) -{ - std::string name; - std::string value; - while(first != last) { - std::string::const_iterator j = first; - while(j != last && *j != '\r' && *j != '\n') { - ++j; - } - if(first != j) { - std::string::const_iterator sep = std::find(first, j, ':'); - if(sep == j) { - // multiline header? - if(*first == ' ' || *first == '\t') { - std::pair p = util::stripIter(first, j); - if(!name.empty() && p.first != p.second) { - if(!value.empty()) { - value += " "; - } - value.append(p.first, p.second); - } - } - } else { - if(!name.empty()) { - put(name, value); - } - std::pair p = util::stripIter(first, sep); - name.assign(p.first, p.second); - util::lowercase(name); - p = util::stripIter(sep+1, j); - value.assign(p.first, p.second); - } - } - while(j != last && (*j == '\r' || *j == '\n')) { - ++j; - } - first = j; - } - if(!name.empty()) { - put(name, value); - } -} - void HttpHeader::clearField() { table_.clear(); @@ -293,6 +246,16 @@ const std::string& HttpHeader::getRequestPath() const return requestPath_; } +const std::string& HttpHeader::getReasonPhrase() const +{ + return reasonPhrase_; +} + +void HttpHeader::setReasonPhrase(const std::string& reasonPhrase) +{ + reasonPhrase_ = reasonPhrase; +} + bool HttpHeader::fieldContains(const std::string& name, const std::string& value) { diff --git a/src/HttpHeader.h b/src/HttpHeader.h index 8791a4c5..ec04aa32 100644 --- a/src/HttpHeader.h +++ b/src/HttpHeader.h @@ -54,6 +54,9 @@ private: // HTTP status code, e.g. 200 int statusCode_; + // The reason-phrase for the response + std::string reasonPhrase_; + // HTTP version, e.g. HTTP/1.1 std::string version_; @@ -83,6 +86,10 @@ public: void setStatusCode(int code); + const std::string& getReasonPhrase() const; + + void setReasonPhrase(const std::string& reasonPhrase); + const std::string& getVersion() const; void setVersion(const std::string& version); @@ -113,12 +120,6 @@ public: requestPath_.assign(first, last); } - // Parses header fields in [first, last). Field name is stored in - // lowercase. - void fill - (std::string::const_iterator first, - std::string::const_iterator last); - // Clears table_. responseStatus_ and version_ are unchanged. void clearField(); diff --git a/src/HttpHeaderProcessor.cc b/src/HttpHeaderProcessor.cc index c48bac61..1b027c5b 100644 --- a/src/HttpHeaderProcessor.cc +++ b/src/HttpHeaderProcessor.cc @@ -2,7 +2,7 @@ /* * aria2 - The high speed download utility * - * Copyright (C) 2006 Tatsuhiro Tsujikawa + * Copyright (C) 2012 Tatsuhiro Tsujikawa * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -46,128 +46,330 @@ namespace aria2 { -HttpHeaderProcessor::HttpHeaderProcessor(): - limit_(21/*lines*/*8190/*per line*/) {} -// The above values come from Apache's documentation -// http://httpd.apache.org/docs/2.2/en/mod/core.html: See -// LimitRequestFieldSize and LimitRequestLine directive. Also the -// page states that the number of request fields rarely exceeds 20. -// aria2 uses this class in both client and server side. +namespace { +enum { + // Server mode + PREV_METHOD, + METHOD, + PREV_PATH, + PATH, + PREV_REQ_VERSION, + REQ_VERSION, + // Client mode, + PREV_RES_VERSION, + RES_VERSION, + PREV_STATUS_CODE, + STATUS_CODE, + PREV_REASON_PHRASE, + REASON_PHRASE, + // name/value header fields + PREV_EOL, + PREV_FIELD_NAME, + FIELD_NAME, + PREV_FIELD_VALUE, + FIELD_VALUE, + // End of header + PREV_EOH, + HEADERS_COMPLETE +}; +} // namespace + +HttpHeaderProcessor::HttpHeaderProcessor(ParserMode mode) + : mode_(mode), + state_(mode == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD), + lastBytesProcessed_(0), + result_(new HttpHeader()) +{} HttpHeaderProcessor::~HttpHeaderProcessor() {} -void HttpHeaderProcessor::update(const unsigned char* data, size_t length) +namespace { +size_t getToken(std::string& buf, + const unsigned char* data, size_t length, size_t off) { - checkHeaderLimit(length); - buf_.append(&data[0], &data[length]); + size_t j; + for(j = off; j < length && !util::isLws(data[j]) && !util::isCRLF(data[j]); + ++j); + buf.append(&data[off], &data[j]); + return j-1; } +} // namespace -void HttpHeaderProcessor::update(const std::string& data) +namespace { +size_t getFieldNameToken(std::string& buf, + const unsigned char* data, size_t length, size_t off) { - checkHeaderLimit(data.size()); - buf_ += data; + size_t j; + for(j = off; j < length && data[j] != ':' && + !util::isLws(data[j]) && !util::isCRLF(data[j]); ++j); + buf.append(&data[off], &data[j]); + return j-1; } +} // namespace -void HttpHeaderProcessor::checkHeaderLimit(size_t incomingLength) +namespace { +size_t getText(std::string& buf, + const unsigned char* data, size_t length, size_t off) { - if(buf_.size()+incomingLength > limit_) { - throw DL_ABORT_EX2("Too large http header", - error_code::HTTP_PROTOCOL_ERROR); + size_t j; + for(j = off; j < length && !util::isCRLF(data[j]); ++j); + buf.append(&data[off], &data[j]); + return j-1; +} +} // namespace + +bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length) +{ + size_t i; + lastBytesProcessed_ = 0; + for(i = 0; i < length; ++i) { + unsigned char c = data[i]; + switch(state_) { + case PREV_METHOD: + if(util::isLws(c) || util::isCRLF(c)) { + throw DL_ABORT_EX("Bad Request-Line: missing method"); + } else { + i = getToken(buf_, data, length, i); + state_ = METHOD; + } + break; + case METHOD: + if(util::isLws(c)) { + result_->setMethod(buf_); + buf_.clear(); + state_ = PREV_PATH; + } else if(util::isCRLF(c)) { + throw DL_ABORT_EX("Bad Request-Line: missing request-target"); + } else { + i = getToken(buf_, data, length, i); + } + break; + case PREV_PATH: + if(util::isCRLF(c)) { + throw DL_ABORT_EX("Bad Request-Line: missing request-target"); + } else if(!util::isLws(c)) { + i = getToken(buf_, data, length, i); + state_ = PATH; + } + break; + case PATH: + if(util::isLws(c)) { + result_->setRequestPath(buf_); + buf_.clear(); + state_ = PREV_REQ_VERSION; + } else if(util::isCRLF(c)) { + throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version"); + } else { + i = getToken(buf_, data, length, i); + } + break; + case PREV_REQ_VERSION: + if(util::isCRLF(c)) { + throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version"); + } else if(!util::isLws(c)) { + i = getToken(buf_, data, length, i); + state_ = REQ_VERSION; + } + break; + case REQ_VERSION: + if(util::isCRLF(c)) { + result_->setVersion(buf_); + buf_.clear(); + if(c == '\n') { + state_ = PREV_FIELD_NAME; + } else { + state_ = PREV_EOL; + } + } else if(util::isLws(c)) { + throw DL_ABORT_EX("Bad Request-Line: LWS after HTTP-version"); + } else { + i = getToken(buf_, data, length, i); + } + break; + case PREV_RES_VERSION: + if(util::isLws(c) || util::isCRLF(c)) { + throw DL_ABORT_EX("Bad Status-Line: missing HTTP-version"); + } else { + i = getToken(buf_, data, length, i); + state_ = RES_VERSION; + } + break; + case RES_VERSION: + if(util::isLws(c)) { + result_->setVersion(buf_); + buf_.clear(); + state_ = PREV_STATUS_CODE; + } else if(util::isCRLF(c)) { + throw DL_ABORT_EX("Bad Status-Line: missing status-code"); + } + break; + case PREV_STATUS_CODE: + if(util::isCRLF(c)) { + throw DL_ABORT_EX("Bad Status-Line: missing status-code"); + } else if(!util::isLws(c)) { + state_ = STATUS_CODE; + i = getToken(buf_, data, length, i); + } + break; + case STATUS_CODE: + if(util::isLws(c) || util::isCRLF(c)) { + int statusCode = -1; + if(buf_.size() == 3 && util::isNumber(buf_.begin(), buf_.end())) { + statusCode = (buf_[0]-'0')*100 + (buf_[1]-'0')*10 + (buf_[2]-'0'); + } + if(statusCode >= 100) { + result_->setStatusCode(statusCode); + buf_.clear(); + } else { + throw DL_ABORT_EX("Bad status code: bad status-code"); + } + if(c == '\r') { + state_ = PREV_EOL; + } else if(c == '\n') { + state_ = PREV_FIELD_NAME; + } else { + state_ = PREV_REASON_PHRASE; + } + } else { + i = getToken(buf_, data, length, i); + } + break; + case PREV_REASON_PHRASE: + if(util::isCRLF(c)) { + // The reason-phrase is completely optional. + if(c == '\n') { + state_ = PREV_FIELD_NAME; + } else { + state_ = PREV_EOL; + } + } else if(!util::isLws(c)) { + state_ = REASON_PHRASE; + i = getText(buf_, data, length, i); + } + break; + case REASON_PHRASE: + if(util::isCRLF(c)) { + result_->setReasonPhrase(buf_); + buf_.clear(); + if(c == '\n') { + state_ = PREV_FIELD_NAME; + } else { + state_ = PREV_EOL; + } + } else { + i = getText(buf_, data, length, i); + } + break; + case PREV_EOL: + if(c == '\n') { + state_ = PREV_FIELD_NAME; + } else { + throw DL_ABORT_EX("Bad HTTP header: missing LF"); + } + break; + case PREV_FIELD_NAME: + if(util::isLws(c)) { + // Evil Multi-line header field + state_ = FIELD_VALUE; + } else { + if(!lastFieldName_.empty()) { + util::lowercase(lastFieldName_); + result_->put(lastFieldName_, util::strip(buf_)); + lastFieldName_.clear(); + buf_.clear(); + } + if(c == '\n') { + state_ = HEADERS_COMPLETE; + } else if(c == '\r') { + state_ = PREV_EOH; + } else { + state_ = FIELD_NAME; + i = getFieldNameToken(lastFieldName_, data, length, i); + } + } + break; + case FIELD_NAME: + if(util::isLws(c) || util::isCRLF(c)) { + throw DL_ABORT_EX("Bad HTTP header: missing ':'"); + } else if(c == ':') { + state_ = PREV_FIELD_VALUE; + } else { + i = getFieldNameToken(lastFieldName_, data, length, i); + } + break; + case PREV_FIELD_VALUE: + if(c == '\r') { + state_ = PREV_EOL; + } else if(c == '\n') { + state_ = PREV_FIELD_NAME; + } else if(!util::isLws(c)) { + state_ = FIELD_VALUE; + i = getText(buf_, data, length, i); + } + break; + case FIELD_VALUE: + if(c == '\r') { + state_ = PREV_EOL; + } else if(c == '\n') { + state_ = PREV_FIELD_NAME; + } else { + i = getText(buf_, data, length, i); + } + break; + case PREV_EOH: + if(c == '\n') { + state_ = HEADERS_COMPLETE; + } else { + throw DL_ABORT_EX("Bad HTTP header: " + "missing LF at the end of the header"); + } + break; + case HEADERS_COMPLETE: + goto fin; + } } + fin: + // See Apache's documentation + // http://httpd.apache.org/docs/2.2/en/mod/core.html about size + // limit of HTTP headers. The page states that the number of request + // fields rarely exceeds 20. + if(lastFieldName_.size() > 1024 || buf_.size() > 8192) { + throw DL_ABORT_EX("Too large HTTP header"); + } + lastBytesProcessed_ = i; + headers_.append(&data[0], &data[i]); + return state_ == HEADERS_COMPLETE; } -bool HttpHeaderProcessor::eoh() const +bool HttpHeaderProcessor::parse(const std::string& data) { - if(buf_.find("\r\n\r\n") == std::string::npos && - buf_.find("\n\n") == std::string::npos) { - return false; - } else { - return true; - } + return parse(reinterpret_cast(data.c_str()), + data.size()); } -size_t HttpHeaderProcessor::getPutBackDataLength() const +size_t HttpHeaderProcessor::getLastBytesProcessed() const { - std::string::size_type delimpos = std::string::npos; - if((delimpos = buf_.find("\r\n\r\n")) != std::string::npos) { - return buf_.size()-(delimpos+4); - } else if((delimpos = buf_.find("\n\n")) != std::string::npos) { - return buf_.size()-(delimpos+2); - } else { - return 0; - } + return lastBytesProcessed_; } void HttpHeaderProcessor::clear() { - buf_.erase(); + state_ = (mode_ == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD); + lastBytesProcessed_ = 0; + buf_.clear(); + lastFieldName_.clear(); + result_.reset(new HttpHeader()); + headers_.clear(); } -SharedHandle HttpHeaderProcessor::getHttpResponseHeader() +const SharedHandle& HttpHeaderProcessor::getResult() const { - std::string::size_type delimpos = std::string::npos; - if(((delimpos = buf_.find("\r\n")) == std::string::npos && - (delimpos = buf_.find("\n")) == std::string::npos) || - delimpos < 12) { - throw DL_RETRY_EX(EX_NO_STATUS_HEADER); - } - int32_t statusCode; - if(!util::parseIntNoThrow(statusCode, - std::string(buf_.begin()+9, buf_.begin()+12))) { - throw DL_RETRY_EX("Status code could not be parsed as integer."); - } - HttpHeaderHandle httpHeader(new HttpHeader()); - httpHeader->setVersion(buf_.begin(), buf_.begin()+8); - httpHeader->setStatusCode(statusCode); - // TODO 1st line(HTTP/1.1 200...) is also send to HttpHeader, but it should - // not. - if((delimpos = buf_.find("\r\n\r\n")) == std::string::npos && - (delimpos = buf_.find("\n\n")) == std::string::npos) { - delimpos = buf_.size(); - } - httpHeader->fill(buf_.begin(), buf_.begin()+delimpos); - return httpHeader; -} - -SharedHandle HttpHeaderProcessor::getHttpRequestHeader() -{ - // The minimum case of the first line is: - // GET / HTTP/1.x - // At least 14bytes before \r\n or \n. - std::string::size_type delimpos = std::string::npos; - if(((delimpos = buf_.find("\r\n")) == std::string::npos && - (delimpos = buf_.find("\n")) == std::string::npos) || - delimpos < 14) { - throw DL_RETRY_EX(EX_NO_STATUS_HEADER); - } - std::vector firstLine; - util::splitIter(buf_.begin(), buf_.begin()+delimpos, - std::back_inserter(firstLine), ' ', true); - if(firstLine.size() != 3) { - throw DL_ABORT_EX2("Malformed HTTP request header.", - error_code::HTTP_PROTOCOL_ERROR); - } - SharedHandle httpHeader(new HttpHeader()); - httpHeader->setMethod(firstLine[0].first, firstLine[0].second); - httpHeader->setRequestPath(firstLine[1].first, firstLine[1].second); - httpHeader->setVersion(firstLine[2].first, firstLine[2].second); - if((delimpos = buf_.find("\r\n\r\n")) == std::string::npos && - (delimpos = buf_.find("\n\n")) == std::string::npos) { - delimpos = buf_.size(); - } - httpHeader->fill(buf_.begin(), buf_.begin()+delimpos); - return httpHeader; + return result_; } std::string HttpHeaderProcessor::getHeaderString() const { - std::string::size_type delimpos = std::string::npos; - if((delimpos = buf_.find("\r\n\r\n")) == std::string::npos && - (delimpos = buf_.find("\n\n")) == std::string::npos) { - return buf_; - } else { - return buf_.substr(0, delimpos); - } + return headers_; } } // namespace aria2 diff --git a/src/HttpHeaderProcessor.h b/src/HttpHeaderProcessor.h index b352108f..dd8c2a51 100644 --- a/src/HttpHeaderProcessor.h +++ b/src/HttpHeaderProcessor.h @@ -2,7 +2,7 @@ /* * aria2 - The high speed download utility * - * Copyright (C) 2006 Tatsuhiro Tsujikawa + * Copyright (C) 2012 Tatsuhiro Tsujikawa * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -47,51 +47,51 @@ namespace aria2 { class HttpHeader; class HttpHeaderProcessor { -private: - std::string buf_; - size_t limit_; - - void checkHeaderLimit(size_t incomingLength); - public: - HttpHeaderProcessor(); + enum ParserMode { + CLIENT_PARSER, + SERVER_PARSER + }; + + HttpHeaderProcessor(ParserMode mode); ~HttpHeaderProcessor(); - - void update(const unsigned char* data, size_t length); - - void update(const std::string& data); + /** + * Parses incoming data. Returns true if end of header is reached. + * This function stops processing data when end of header is + * reached. + */ + bool parse(const unsigned char* data, size_t length); + bool parse(const std::string& data); /** - * Returns true if end of header is reached. + * Retruns the number of bytes processed in the last invocation of + * parse(). */ - bool eoh() const; - - /** - * Retruns the number of bytes beyond the end of header. - */ - size_t getPutBackDataLength() const; + size_t getLastBytesProcessed() const; /** * Processes the received header as a http response header and returns * HttpHeader object. */ - SharedHandle getHttpResponseHeader(); - - SharedHandle getHttpRequestHeader(); + const SharedHandle& getResult() const; std::string getHeaderString() const; + /** + * Resets internal status and ready for next header processing. + */ void clear(); - - void setHeaderLimit(size_t limit) - { - limit_ = limit; - } +private: + ParserMode mode_; + int state_; + size_t lastBytesProcessed_; + std::string buf_; + std::string lastFieldName_; + SharedHandle result_; + std::string headers_; }; -typedef SharedHandle HttpHeaderProcessorHandle; - } // namespace aria2 #endif // D_HTTP_HEADER_PROCESSOR_H diff --git a/src/HttpServer.cc b/src/HttpServer.cc index 4c41896d..fdc85014 100644 --- a/src/HttpServer.cc +++ b/src/HttpServer.cc @@ -60,7 +60,8 @@ HttpServer::HttpServer socketRecvBuffer_(new SocketRecvBuffer(socket_)), socketBuffer_(socket), e_(e), - headerProcessor_(new HttpHeaderProcessor()), + headerProcessor_(new HttpHeaderProcessor + (HttpHeaderProcessor::SERVER_PARSER)), keepAlive_(true), gzip_(false), acceptsPersistentConnection_(true), @@ -129,15 +130,13 @@ SharedHandle HttpServer::receiveRequest() throw DL_ABORT_EX(EX_EOF_FROM_PEER); } } - headerProcessor_->update(socketRecvBuffer_->getBuffer(), - socketRecvBuffer_->getBufferLength()); - if(headerProcessor_->eoh()) { - SharedHandle header = headerProcessor_->getHttpRequestHeader(); - size_t putbackDataLength = headerProcessor_->getPutBackDataLength(); + SharedHandle header; + if(headerProcessor_->parse(socketRecvBuffer_->getBuffer(), + socketRecvBuffer_->getBufferLength())) { + header = headerProcessor_->getResult(); A2_LOG_INFO(fmt("HTTP Server received request\n%s", headerProcessor_->getHeaderString().c_str())); - socketRecvBuffer_->shiftBuffer - (socketRecvBuffer_->getBufferLength()-putbackDataLength); + socketRecvBuffer_->shiftBuffer(headerProcessor_->getLastBytesProcessed()); lastRequestHeader_ = header; lastBody_.clear(); lastBody_.str(""); @@ -175,11 +174,10 @@ SharedHandle HttpServer::receiveRequest() break; } } - return header; } else { - socketRecvBuffer_->clearBuffer(); - return SharedHandle(); + socketRecvBuffer_->shiftBuffer(headerProcessor_->getLastBytesProcessed()); } + return header; } bool HttpServer::receiveBody() diff --git a/src/LpdMessageReceiver.cc b/src/LpdMessageReceiver.cc index ff2faab1..278b4bf6 100644 --- a/src/LpdMessageReceiver.cc +++ b/src/LpdMessageReceiver.cc @@ -87,13 +87,12 @@ SharedHandle LpdMessageReceiver::receiveMessage() if(length == 0) { return msg; } - HttpHeaderProcessor proc; - proc.update(buf, length); - if(!proc.eoh()) { + HttpHeaderProcessor proc(HttpHeaderProcessor::SERVER_PARSER); + if(!proc.parse(buf, length)) { msg.reset(new LpdMessage()); return msg; } - SharedHandle header = proc.getHttpRequestHeader(); + const SharedHandle& header = proc.getResult(); static const std::string A2_INFOHASH = "infohash"; static const std::string A2_PORT = "port"; const std::string& infoHashString = header->find(A2_INFOHASH); diff --git a/src/util.cc b/src/util.cc index be8d5a30..0cdd06a2 100644 --- a/src/util.cc +++ b/src/util.cc @@ -313,6 +313,15 @@ bool inRFC2616HttpToken(const char c) std::find(vbegin(chars), vend(chars), c) != vend(chars); } +bool isLws(const char c) +{ + return c == ' ' || c == '\t'; +} +bool isCRLF(const char c) +{ + return c == '\r' || c == '\n'; +} + namespace { bool isUtf8Tail(unsigned char ch) { diff --git a/src/util.h b/src/util.h index 24d1ecae..aa1fecf1 100644 --- a/src/util.h +++ b/src/util.h @@ -370,6 +370,10 @@ bool isHexDigit(const char c); bool isHexDigit(const std::string& s); +bool isLws(const char c); + +bool isCRLF(const char c); + template bool isLowercase(InputIterator first, InputIterator last) { diff --git a/test/HttpHeaderProcessorTest.cc b/test/HttpHeaderProcessorTest.cc index ad41b955..7dc9cd0f 100644 --- a/test/HttpHeaderProcessorTest.cc +++ b/test/HttpHeaderProcessorTest.cc @@ -13,12 +13,12 @@ namespace aria2 { class HttpHeaderProcessorTest:public CppUnit::TestFixture { CPPUNIT_TEST_SUITE(HttpHeaderProcessorTest); - CPPUNIT_TEST(testUpdate1); - CPPUNIT_TEST(testUpdate2); - CPPUNIT_TEST(testGetPutBackDataLength); - CPPUNIT_TEST(testGetPutBackDataLength_nullChar); + CPPUNIT_TEST(testParse1); + CPPUNIT_TEST(testParse2); + CPPUNIT_TEST(testParse3); + CPPUNIT_TEST(testGetLastBytesProcessed); + CPPUNIT_TEST(testGetLastBytesProcessed_nullChar); CPPUNIT_TEST(testGetHttpResponseHeader); - CPPUNIT_TEST(testGetHttpResponseHeader_empty); CPPUNIT_TEST(testGetHttpResponseHeader_statusOnly); CPPUNIT_TEST(testGetHttpResponseHeader_insufficientStatusLength); CPPUNIT_TEST(testBeyondLimit); @@ -27,12 +27,12 @@ class HttpHeaderProcessorTest:public CppUnit::TestFixture { CPPUNIT_TEST_SUITE_END(); public: - void testUpdate1(); - void testUpdate2(); - void testGetPutBackDataLength(); - void testGetPutBackDataLength_nullChar(); + void testParse1(); + void testParse2(); + void testParse3(); + void testGetLastBytesProcessed(); + void testGetLastBytesProcessed_nullChar(); void testGetHttpResponseHeader(); - void testGetHttpResponseHeader_empty(); void testGetHttpResponseHeader_statusOnly(); void testGetHttpResponseHeader_insufficientStatusLength(); void testBeyondLimit(); @@ -43,60 +43,84 @@ public: CPPUNIT_TEST_SUITE_REGISTRATION( HttpHeaderProcessorTest ); -void HttpHeaderProcessorTest::testUpdate1() +void HttpHeaderProcessorTest::testParse1() { - HttpHeaderProcessor proc; + HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER); std::string hd1 = "HTTP/1.1 200 OK\r\n"; - proc.update(hd1); - CPPUNIT_ASSERT(!proc.eoh()); - proc.update("\r\n"); - CPPUNIT_ASSERT(proc.eoh()); + CPPUNIT_ASSERT(!proc.parse(hd1)); + CPPUNIT_ASSERT(proc.parse("\r\n")); } -void HttpHeaderProcessorTest::testUpdate2() +void HttpHeaderProcessorTest::testParse2() { - HttpHeaderProcessor proc; + HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER); std::string hd1 = "HTTP/1.1 200 OK\n"; - proc.update(hd1); - CPPUNIT_ASSERT(!proc.eoh()); - proc.update("\n"); - CPPUNIT_ASSERT(proc.eoh()); + CPPUNIT_ASSERT(!proc.parse(hd1)); + CPPUNIT_ASSERT(proc.parse("\n")); } -void HttpHeaderProcessorTest::testGetPutBackDataLength() +void HttpHeaderProcessorTest::testParse3() { - HttpHeaderProcessor proc; - std::string hd1 = "HTTP/1.1 200 OK\r\n" + HttpHeaderProcessor proc(HttpHeaderProcessor::SERVER_PARSER); + std::string s = + "GET / HTTP/1.1\r\n" + "Host: aria2.sourceforge.net\r\n" + "Connection: close \r\n" // trailing white space (BWS) + "Multi-Line: text1\r\n" // Multi-line header + " text2\r\n" + " text3\r\n" + "Duplicate: foo\r\n" + "Duplicate: bar\r\n" + "\r\n"; + CPPUNIT_ASSERT(proc.parse(s)); + SharedHandle h = proc.getResult(); + CPPUNIT_ASSERT_EQUAL(std::string("aria2.sourceforge.net"), + h->find("host")); + CPPUNIT_ASSERT_EQUAL(std::string("close"), + h->find("connection")); + CPPUNIT_ASSERT_EQUAL(std::string("text1 text2 text3"), + h->find("multi-line")); + CPPUNIT_ASSERT_EQUAL(std::string("foo"), + h->findAll("duplicate")[0]); + CPPUNIT_ASSERT_EQUAL(std::string("bar"), + h->findAll("duplicate")[1]); +} + +void HttpHeaderProcessorTest::testGetLastBytesProcessed() +{ + HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER); + std::string hd1 = + "HTTP/1.1 200 OK\r\n" "\r\nputbackme"; - proc.update(hd1); - CPPUNIT_ASSERT(proc.eoh()); - CPPUNIT_ASSERT_EQUAL((size_t)9, proc.getPutBackDataLength()); + CPPUNIT_ASSERT(proc.parse(hd1)); + CPPUNIT_ASSERT_EQUAL((size_t)19, proc.getLastBytesProcessed()); proc.clear(); - std::string hd2 = "HTTP/1.1 200 OK\n" + std::string hd2 = + "HTTP/1.1 200 OK\n" "\nputbackme"; - proc.update(hd2); - CPPUNIT_ASSERT(proc.eoh()); - CPPUNIT_ASSERT_EQUAL((size_t)9, proc.getPutBackDataLength()); + CPPUNIT_ASSERT(proc.parse(hd2)); + CPPUNIT_ASSERT_EQUAL((size_t)17, proc.getLastBytesProcessed()); } -void HttpHeaderProcessorTest::testGetPutBackDataLength_nullChar() +void HttpHeaderProcessorTest::testGetLastBytesProcessed_nullChar() { - HttpHeaderProcessor proc; - const char* x = "HTTP/1.1 200 OK\r\n" + HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER); + const char x[] = + "HTTP/1.1 200 OK\r\n" "foo: foo\0bar\r\n" "\r\nputbackme"; - std::string hd1(&x[0], &x[42]); - proc.update(hd1); - CPPUNIT_ASSERT(proc.eoh()); - CPPUNIT_ASSERT_EQUAL((size_t)9, proc.getPutBackDataLength()); + std::string hd1(&x[0], &x[sizeof(x)-1]); + CPPUNIT_ASSERT(proc.parse(hd1)); + CPPUNIT_ASSERT_EQUAL((size_t)33, proc.getLastBytesProcessed()); } void HttpHeaderProcessorTest::testGetHttpResponseHeader() { - HttpHeaderProcessor proc; - std::string hd = "HTTP/1.1 200 OK\r\n" + HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER); + std::string hd = + "HTTP/1.1 404 Not Found\r\n" "Date: Mon, 25 Jun 2007 16:04:59 GMT\r\n" "Server: Apache/2.2.3 (Debian)\r\n" "Last-Modified: Tue, 12 Jun 2007 14:28:43 GMT\r\n" @@ -108,10 +132,11 @@ void HttpHeaderProcessorTest::testGetHttpResponseHeader() "\r\n" "Entity: body"; - proc.update(hd); + CPPUNIT_ASSERT(proc.parse(hd)); - SharedHandle header = proc.getHttpResponseHeader(); - CPPUNIT_ASSERT_EQUAL(200, header->getStatusCode()); + SharedHandle header = proc.getResult(); + CPPUNIT_ASSERT_EQUAL(404, header->getStatusCode()); + CPPUNIT_ASSERT_EQUAL(std::string("Not Found"), header->getReasonPhrase()); CPPUNIT_ASSERT_EQUAL(std::string("HTTP/1.1"), header->getVersion()); CPPUNIT_ASSERT_EQUAL(std::string("Mon, 25 Jun 2007 16:04:59 GMT"), header->find("date")); @@ -124,66 +149,50 @@ void HttpHeaderProcessorTest::testGetHttpResponseHeader() CPPUNIT_ASSERT(!header->defined("entity")); } -void HttpHeaderProcessorTest::testGetHttpResponseHeader_empty() -{ - HttpHeaderProcessor proc; - - try { - proc.getHttpResponseHeader(); - CPPUNIT_FAIL("Exception must be thrown."); - } catch(DlRetryEx& ex) { - std::cout << ex.stackTrace() << std::endl; - } - -} - void HttpHeaderProcessorTest::testGetHttpResponseHeader_statusOnly() { - HttpHeaderProcessor proc; + HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER); std::string hd = "HTTP/1.1 200\r\n\r\n"; - proc.update(hd); - SharedHandle header = proc.getHttpResponseHeader(); + CPPUNIT_ASSERT(proc.parse(hd)); + SharedHandle header = proc.getResult(); CPPUNIT_ASSERT_EQUAL(200, header->getStatusCode()); } void HttpHeaderProcessorTest::testGetHttpResponseHeader_insufficientStatusLength() { - HttpHeaderProcessor proc; + HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER); std::string hd = "HTTP/1.1 20\r\n\r\n"; - proc.update(hd); try { - proc.getHttpResponseHeader(); + proc.parse(hd); CPPUNIT_FAIL("Exception must be thrown."); - } catch(DlRetryEx& ex) { - std::cout << ex.stackTrace() << std::endl; + } catch(DlAbortEx& ex) { + // Success } - } void HttpHeaderProcessorTest::testBeyondLimit() { - HttpHeaderProcessor proc; - proc.setHeaderLimit(20); + HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER); std::string hd1 = "HTTP/1.1 200 OK\r\n"; - std::string hd2 = "Date: Mon, 25 Jun 2007 16:04:59 GMT\r\n"; + std::string hd2 = std::string(1025, 'A'); - proc.update(hd1); - + proc.parse(hd1); try { - proc.update(hd2); + proc.parse(hd2); CPPUNIT_FAIL("Exception must be thrown."); } catch(DlAbortEx& ex) { - std::cout << ex.stackTrace() << std::endl; + // Success } } void HttpHeaderProcessorTest::testGetHeaderString() { - HttpHeaderProcessor proc; - std::string hd = "HTTP/1.1 200 OK\r\n" + HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER); + std::string hd = + "HTTP/1.1 200 OK\r\n" "Date: Mon, 25 Jun 2007 16:04:59 GMT\r\n" "Server: Apache/2.2.3 (Debian)\r\n" "Last-Modified: Tue, 12 Jun 2007 14:28:43 GMT\r\n" @@ -194,33 +203,35 @@ void HttpHeaderProcessorTest::testGetHeaderString() "Content-Type: text/html; charset=UTF-8\r\n" "\r\nputbackme"; - proc.update(hd); + CPPUNIT_ASSERT(proc.parse(hd)); - CPPUNIT_ASSERT_EQUAL(std::string("HTTP/1.1 200 OK\r\n" - "Date: Mon, 25 Jun 2007 16:04:59 GMT\r\n" - "Server: Apache/2.2.3 (Debian)\r\n" - "Last-Modified: Tue, 12 Jun 2007 14:28:43 GMT\r\n" - "ETag: \"594065-23e3-50825cc0\"\r\n" - "Accept-Ranges: bytes\r\n" - "Content-Length: 9187\r\n" - "Connection: close\r\n" - "Content-Type: text/html; charset=UTF-8"), - proc.getHeaderString()); + CPPUNIT_ASSERT_EQUAL + (std::string("HTTP/1.1 200 OK\r\n" + "Date: Mon, 25 Jun 2007 16:04:59 GMT\r\n" + "Server: Apache/2.2.3 (Debian)\r\n" + "Last-Modified: Tue, 12 Jun 2007 14:28:43 GMT\r\n" + "ETag: \"594065-23e3-50825cc0\"\r\n" + "Accept-Ranges: bytes\r\n" + "Content-Length: 9187\r\n" + "Connection: close\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "\r\n"), + proc.getHeaderString()); } void HttpHeaderProcessorTest::testGetHttpRequestHeader() { - HttpHeaderProcessor proc; - std::string request = "GET /index.html HTTP/1.1\r\n" + HttpHeaderProcessor proc(HttpHeaderProcessor::SERVER_PARSER); + std::string request = + "GET /index.html HTTP/1.1\r\n" "Host: host\r\n" "Connection: close\r\n" "\r\n" "Entity: body"; - proc.update(request); + CPPUNIT_ASSERT(proc.parse(request)); - SharedHandle httpHeader = proc.getHttpRequestHeader(); - CPPUNIT_ASSERT(httpHeader); + SharedHandle httpHeader = proc.getResult(); CPPUNIT_ASSERT_EQUAL(std::string("GET"), httpHeader->getMethod()); CPPUNIT_ASSERT_EQUAL(std::string("/index.html"),httpHeader->getRequestPath()); CPPUNIT_ASSERT_EQUAL(std::string("HTTP/1.1"), httpHeader->getVersion()); diff --git a/test/HttpHeaderTest.cc b/test/HttpHeaderTest.cc index f1add642..77874f0f 100644 --- a/test/HttpHeaderTest.cc +++ b/test/HttpHeaderTest.cc @@ -13,7 +13,6 @@ class HttpHeaderTest:public CppUnit::TestFixture { CPPUNIT_TEST(testGetRange); CPPUNIT_TEST(testFindAll); CPPUNIT_TEST(testClearField); - CPPUNIT_TEST(testFill); CPPUNIT_TEST(testFieldContains); CPPUNIT_TEST_SUITE_END(); @@ -21,11 +20,9 @@ public: void testGetRange(); void testFindAll(); void testClearField(); - void testFill(); void testFieldContains(); }; - CPPUNIT_TEST_SUITE_REGISTRATION( HttpHeaderTest ); void HttpHeaderTest::testGetRange() @@ -153,30 +150,6 @@ void HttpHeaderTest::testClearField() CPPUNIT_ASSERT_EQUAL(std::string(HttpHeader::HTTP_1_1), h.getVersion()); } -void HttpHeaderTest::testFill() -{ - std::string s = - "Host: aria2.sourceforge.net\r\n" - "Connection: close \r\n" // trailing white space - "Multi-Line: text1\r\n" - " text2\r\n" - " text3\r\n" - "Duplicate: foo\r\n" - "Duplicate: bar\r\n"; - HttpHeader h; - h.fill(s.begin(), s.end()); - CPPUNIT_ASSERT_EQUAL(std::string("aria2.sourceforge.net"), - h.find("host")); - CPPUNIT_ASSERT_EQUAL(std::string("close"), - h.find("connection")); - CPPUNIT_ASSERT_EQUAL(std::string("text1 text2 text3"), - h.find("multi-line")); - CPPUNIT_ASSERT_EQUAL(std::string("foo"), - h.findAll("duplicate")[0]); - CPPUNIT_ASSERT_EQUAL(std::string("bar"), - h.findAll("duplicate")[1]); -} - void HttpHeaderTest::testFieldContains() { HttpHeader h;