From dd7014a6121c0985e94451e2492a112402501335 Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Sun, 23 Sep 2012 20:59:34 +0900 Subject: [PATCH] Store only interesting HTTP headers in HttpHeader In this change, we defined HTTP header fields we are interested in. We only store those headers in HttpHeader object. Accessing HTTP headers in HttpHeader object is now done through enum values. --- src/HttpHeader.cc | 109 ++++++++++++++---------- src/HttpHeader.h | 74 ++++++++++------- src/HttpHeaderProcessor.cc | 32 +++++++- src/HttpHeaderProcessor.h | 1 + src/HttpResponse.cc | 13 ++- src/HttpServerBodyCommand.cc | 8 +- src/HttpServerCommand.cc | 11 +-- src/LpdMessageReceiver.cc | 6 +- src/util.cc | 5 ++ src/util.h | 3 + test/HttpHeaderProcessorTest.cc | 44 +++++----- test/HttpHeaderTest.cc | 66 +++++++-------- test/HttpResponseTest.cc | 141 +++++++++++++++++--------------- 13 files changed, 294 insertions(+), 219 deletions(-) diff --git a/src/HttpHeader.cc b/src/HttpHeader.cc index 6197fd12..165ce546 100644 --- a/src/HttpHeader.cc +++ b/src/HttpHeader.cc @@ -37,26 +37,10 @@ #include "util.h" #include "A2STR.h" #include "DownloadFailureException.h" +#include "array_fun.h" namespace aria2 { -const std::string HttpHeader::LOCATION("location"); -const std::string HttpHeader::TRANSFER_ENCODING("transfer-encoding"); -const std::string HttpHeader::CONTENT_ENCODING("content-encoding"); -const std::string HttpHeader::CONTENT_DISPOSITION("content-disposition"); -const std::string HttpHeader::SET_COOKIE("set-cookie"); -const std::string HttpHeader::CONTENT_TYPE("content-type"); -const std::string HttpHeader::RETRY_AFTER("retry-after"); -const std::string HttpHeader::CONNECTION("connection"); -const std::string HttpHeader::CONTENT_LENGTH("content-length"); -const std::string HttpHeader::CONTENT_RANGE("content-range"); -const std::string HttpHeader::LAST_MODIFIED("last-modified"); -const std::string HttpHeader::ACCEPT_ENCODING("accept-encoding"); -const std::string HttpHeader::LINK("link"); -const std::string HttpHeader::DIGEST("digest"); -const std::string HttpHeader::PROXY_CONNECTION("proxy-connection"); -const std::string HttpHeader::AUTHORIZATION("authorization"); - const std::string HttpHeader::HTTP_1_1 = "HTTP/1.1"; const std::string HttpHeader::CLOSE = "close"; const std::string HttpHeader::KEEP_ALIVE = "keep-alive"; @@ -67,21 +51,20 @@ const std::string HttpHeader::DEFLATE = "deflate"; HttpHeader::HttpHeader() {} HttpHeader::~HttpHeader() {} -void HttpHeader::put(const std::string& name, const std::string& value) +void HttpHeader::put(int hdKey, const std::string& value) { - std::multimap::value_type vt(name, value); + std::multimap::value_type vt(hdKey, value); table_.insert(vt); } -bool HttpHeader::defined(const std::string& name) const +bool HttpHeader::defined(int hdKey) const { - return table_.count(name); + return table_.count(hdKey); } -const std::string& HttpHeader::find(const std::string& name) const +const std::string& HttpHeader::find(int hdKey) const { - std::multimap::const_iterator itr = - table_.find(name); + std::multimap::const_iterator itr = table_.find(hdKey); if(itr == table_.end()) { return A2STR::NIL; } else { @@ -89,12 +72,12 @@ const std::string& HttpHeader::find(const std::string& name) const } } -std::vector HttpHeader::findAll(const std::string& name) const +std::vector HttpHeader::findAll(int hdKey) const { std::vector v; - std::pair::const_iterator, - std::multimap::const_iterator> itrpair = - table_.equal_range(name); + std::pair::const_iterator, + std::multimap::const_iterator> itrpair = + table_.equal_range(hdKey); while(itrpair.first != itrpair.second) { v.push_back((*itrpair.first).second); ++itrpair.first; @@ -102,16 +85,16 @@ std::vector HttpHeader::findAll(const std::string& name) const return v; } -std::pair::const_iterator, - std::multimap::const_iterator> -HttpHeader::equalRange(const std::string& name) const +std::pair::const_iterator, + std::multimap::const_iterator> +HttpHeader::equalRange(int hdKey) const { - return table_.equal_range(name); + return table_.equal_range(hdKey); } -int32_t HttpHeader::findAsInt(const std::string& name) const +int32_t HttpHeader::findAsInt(int hdKey) const { - const std::string& value = find(name); + const std::string& value = find(hdKey); if(value.empty()) { return 0; } else { @@ -119,9 +102,9 @@ int32_t HttpHeader::findAsInt(const std::string& name) const } } -int64_t HttpHeader::findAsLLInt(const std::string& name) const +int64_t HttpHeader::findAsLLInt(int hdKey) const { - const std::string& value = find(name); + const std::string& value = find(hdKey); if(value.empty()) { return 0; } else { @@ -253,13 +236,12 @@ void HttpHeader::setReasonPhrase(const std::string& reasonPhrase) reasonPhrase_ = reasonPhrase; } -bool HttpHeader::fieldContains(const std::string& name, - const std::string& value) +bool HttpHeader::fieldContains(int hdKey, const char* value) { - std::pair::const_iterator, - std::multimap::const_iterator> range = - equalRange(name); - for(std::multimap::const_iterator i = range.first; + std::pair::const_iterator, + std::multimap::const_iterator> range = + equalRange(hdKey); + for(std::multimap::const_iterator i = range.first; i != range.second; ++i) { std::vector values; util::splitIter((*i).second.begin(), (*i).second.end(), @@ -269,7 +251,7 @@ bool HttpHeader::fieldContains(const std::string& name, ); for(std::vector::const_iterator j = values.begin(), eoj = values.end(); j != eoj; ++j) { - if(util::strieq((*j).first, (*j).second, value.begin(), value.end())) { + if(util::strieq((*j).first, (*j).second, value)) { return true; } } @@ -277,4 +259,45 @@ bool HttpHeader::fieldContains(const std::string& name, return false; } +namespace { +const char* INTERESTING_HEADER_NAMES[] = { + "accept-encoding", + "access-control-request-headers", + "access-control-request-method", + "authorization", + "connection", + "content-disposition", + "content-encoding", + "content-length", + "content-range", + "content-type", + "digest", + "infohash", + "last-modified", + "link", + "location", + "origin", + "port", + "proxy-connection", + "retry-after", + "sec-websocket-key", + "sec-websocket-version", + "set-cookie", + "transfer-encoding", + "upgrade", +}; +} // namespace + +int idInterestingHeader(const char* hdName) +{ + const char** i = std::lower_bound(vbegin(INTERESTING_HEADER_NAMES), + vend(INTERESTING_HEADER_NAMES), + hdName, util::strless); + if(i != vend(INTERESTING_HEADER_NAMES) && strcmp(*i, hdName) == 0 ) { + return i - vbegin(INTERESTING_HEADER_NAMES); + } else { + return HttpHeader::MAX_INTERESTING_HEADER; + } +} + } // namespace aria2 diff --git a/src/HttpHeader.h b/src/HttpHeader.h index ec04aa32..0d4eb7ba 100644 --- a/src/HttpHeader.h +++ b/src/HttpHeader.h @@ -49,7 +49,7 @@ class Range; class HttpHeader { private: - std::multimap table_; + std::multimap table_; // HTTP status code, e.g. 200 int statusCode_; @@ -69,16 +69,47 @@ public: HttpHeader(); ~HttpHeader(); + // The list of headers we are interested in. Only those header + // values are stored in table_. When updating this list, also update + // INTERESTING_HEADER_NAMES in HttpHeader.cc + enum InterestingHeader { + ACCEPT_ENCODING, + ACCESS_CONTROL_REQUEST_HEADERS, + ACCESS_CONTROL_REQUEST_METHOD, + AUTHORIZATION, + CONNECTION, + CONTENT_DISPOSITION, + CONTENT_ENCODING, + CONTENT_LENGTH, + CONTENT_RANGE, + CONTENT_TYPE, + DIGEST, + INFOHASH, // Used for BitTorrent LPD + LAST_MODIFIED, + LINK, + LOCATION, + ORIGIN, + PORT, // Used for BitTorrent LPD + PROXY_CONNECTION, + RETRY_AFTER, + SEC_WEBSOCKET_KEY, + SEC_WEBSOCKET_VERSION, + SET_COOKIE, + TRANSFER_ENCODING, + UPGRADE, + MAX_INTERESTING_HEADER + }; + // For all methods, use lowercased header field name. - void put(const std::string& name, const std::string& value); - bool defined(const std::string& name) const; - const std::string& find(const std::string& name) const; - std::vector findAll(const std::string& name) const; - std::pair::const_iterator, - std::multimap::const_iterator> - equalRange(const std::string& name) const; - int32_t findAsInt(const std::string& name) const; - int64_t findAsLLInt(const std::string& name) const; + void put(int hdKey, const std::string& value); + bool defined(int hdKey) const; + const std::string& find(int hdKey) const; + std::vector findAll(int hdKey) const; + std::pair::const_iterator, + std::multimap::const_iterator> + equalRange(int hdKey) const; + int32_t findAsInt(int hdKey) const; + int64_t findAsLLInt(int hdKey) const; SharedHandle getRange() const; @@ -125,24 +156,7 @@ public: // Returns true if heder field |name| contains |value|. This method // assumes the values of the header field is delimited by ','. - bool fieldContains(const std::string& name, const std::string& value); - - static const std::string LOCATION; - static const std::string TRANSFER_ENCODING; - static const std::string CONTENT_ENCODING; - static const std::string CONTENT_DISPOSITION; - static const std::string SET_COOKIE; - static const std::string CONTENT_TYPE; - static const std::string RETRY_AFTER; - static const std::string CONNECTION; - static const std::string CONTENT_LENGTH; - static const std::string CONTENT_RANGE; - static const std::string LAST_MODIFIED; - static const std::string ACCEPT_ENCODING; - static const std::string LINK; - static const std::string DIGEST; - static const std::string AUTHORIZATION; - static const std::string PROXY_CONNECTION; + bool fieldContains(int hdKey, const char* value); static const std::string HTTP_1_1; static const std::string CLOSE; @@ -152,8 +166,10 @@ public: static const std::string DEFLATE; }; +int idInterestingHeader(const char* hdName); + typedef SharedHandle HttpHeaderHandle; -} // namespace std; +} // namespace #endif // D_HTTP_HEADER_H diff --git a/src/HttpHeaderProcessor.cc b/src/HttpHeaderProcessor.cc index 6baf7595..6b85524a 100644 --- a/src/HttpHeaderProcessor.cc +++ b/src/HttpHeaderProcessor.cc @@ -78,6 +78,7 @@ HttpHeaderProcessor::HttpHeaderProcessor(ParserMode mode) : mode_(mode), state_(mode == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD), lastBytesProcessed_(0), + lastFieldHdKey_(HttpHeader::MAX_INTERESTING_HEADER), result_(new HttpHeader()) {} @@ -118,6 +119,16 @@ size_t getText(std::string& buf, } } // namespace +namespace { +size_t ignoreText(std::string& buf, + const unsigned char* data, size_t length, size_t off) +{ + size_t j; + for(j = off; j < length && !util::isCRLF(data[j]); ++j); + return j-1; +} +} // namespace + bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length) { size_t i; @@ -276,9 +287,11 @@ bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length) state_ = FIELD_VALUE; } else { if(!lastFieldName_.empty()) { - util::lowercase(lastFieldName_); - result_->put(lastFieldName_, util::strip(buf_)); + if(lastFieldHdKey_ != HttpHeader::MAX_INTERESTING_HEADER) { + result_->put(lastFieldHdKey_, util::strip(buf_)); + } lastFieldName_.clear(); + lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER; buf_.clear(); } if(c == '\n') { @@ -297,6 +310,8 @@ bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length) if(util::isLws(c) || util::isCRLF(c)) { throw DL_ABORT_EX("Bad HTTP header: missing ':'"); } else if(c == ':') { + util::lowercase(lastFieldName_); + lastFieldHdKey_ = idInterestingHeader(lastFieldName_.c_str()); state_ = PREV_FIELD_VALUE; } else { i = getFieldNameToken(lastFieldName_, data, length, i); @@ -309,7 +324,11 @@ bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length) state_ = PREV_FIELD_NAME; } else if(!util::isLws(c)) { state_ = FIELD_VALUE; - i = getText(buf_, data, length, i); + if(lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) { + i = ignoreText(buf_, data, length, i); + } else { + i = getText(buf_, data, length, i); + } } break; case FIELD_VALUE: @@ -318,7 +337,11 @@ bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length) } else if(c == '\n') { state_ = PREV_FIELD_NAME; } else { - i = getText(buf_, data, length, i); + if(lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) { + i = ignoreText(buf_, data, length, i); + } else { + i = getText(buf_, data, length, i); + } } break; case PREV_EOH: @@ -363,6 +386,7 @@ void HttpHeaderProcessor::clear() lastBytesProcessed_ = 0; buf_.clear(); lastFieldName_.clear(); + lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER; result_.reset(new HttpHeader()); headers_.clear(); } diff --git a/src/HttpHeaderProcessor.h b/src/HttpHeaderProcessor.h index dd8c2a51..d8912f72 100644 --- a/src/HttpHeaderProcessor.h +++ b/src/HttpHeaderProcessor.h @@ -88,6 +88,7 @@ private: size_t lastBytesProcessed_; std::string buf_; std::string lastFieldName_; + int lastFieldHdKey_; SharedHandle result_; std::string headers_; }; diff --git a/src/HttpResponse.cc b/src/HttpResponse.cc index 61dd5264..cb1b61fd 100644 --- a/src/HttpResponse.cc +++ b/src/HttpResponse.cc @@ -140,8 +140,8 @@ std::string HttpResponse::determinFilename() const void HttpResponse::retrieveCookie() { Time now; - std::pair::const_iterator, - std::multimap::const_iterator> r = + std::pair::const_iterator, + std::multimap::const_iterator> r = httpHeader_->equalRange(HttpHeader::SET_COOKIE); for(; r.first != r.second; ++r.first) { httpRequest_->getCookieStorage()->parseAndStore @@ -162,7 +162,6 @@ bool HttpResponse::isRedirect() const void HttpResponse::processRedirect() { - if(httpRequest_->getRequest()->redirectUri (util::percentEncodeMini(getRedirectURI()))) { A2_LOG_INFO(fmt(MSG_REDIRECT, @@ -375,8 +374,8 @@ void HttpResponse::getMetalinKHttpEntries (std::vector& result, const SharedHandle