diff --git a/src/HttpHeader.cc b/src/HttpHeader.cc index 6197fd12..165ce546 100644 --- a/src/HttpHeader.cc +++ b/src/HttpHeader.cc @@ -37,26 +37,10 @@ #include "util.h" #include "A2STR.h" #include "DownloadFailureException.h" +#include "array_fun.h" namespace aria2 { -const std::string HttpHeader::LOCATION("location"); -const std::string HttpHeader::TRANSFER_ENCODING("transfer-encoding"); -const std::string HttpHeader::CONTENT_ENCODING("content-encoding"); -const std::string HttpHeader::CONTENT_DISPOSITION("content-disposition"); -const std::string HttpHeader::SET_COOKIE("set-cookie"); -const std::string HttpHeader::CONTENT_TYPE("content-type"); -const std::string HttpHeader::RETRY_AFTER("retry-after"); -const std::string HttpHeader::CONNECTION("connection"); -const std::string HttpHeader::CONTENT_LENGTH("content-length"); -const std::string HttpHeader::CONTENT_RANGE("content-range"); -const std::string HttpHeader::LAST_MODIFIED("last-modified"); -const std::string HttpHeader::ACCEPT_ENCODING("accept-encoding"); -const std::string HttpHeader::LINK("link"); -const std::string HttpHeader::DIGEST("digest"); -const std::string HttpHeader::PROXY_CONNECTION("proxy-connection"); -const std::string HttpHeader::AUTHORIZATION("authorization"); - const std::string HttpHeader::HTTP_1_1 = "HTTP/1.1"; const std::string HttpHeader::CLOSE = "close"; const std::string HttpHeader::KEEP_ALIVE = "keep-alive"; @@ -67,21 +51,20 @@ const std::string HttpHeader::DEFLATE = "deflate"; HttpHeader::HttpHeader() {} HttpHeader::~HttpHeader() {} -void HttpHeader::put(const std::string& name, const std::string& value) +void HttpHeader::put(int hdKey, const std::string& value) { - std::multimap::value_type vt(name, value); + std::multimap::value_type vt(hdKey, value); table_.insert(vt); } -bool HttpHeader::defined(const std::string& name) const +bool HttpHeader::defined(int hdKey) const { - return table_.count(name); + return table_.count(hdKey); } -const std::string& HttpHeader::find(const std::string& name) const +const std::string& HttpHeader::find(int hdKey) const { - std::multimap::const_iterator itr = - table_.find(name); + std::multimap::const_iterator itr = table_.find(hdKey); if(itr == table_.end()) { return A2STR::NIL; } else { @@ -89,12 +72,12 @@ const std::string& HttpHeader::find(const std::string& name) const } } -std::vector HttpHeader::findAll(const std::string& name) const +std::vector HttpHeader::findAll(int hdKey) const { std::vector v; - std::pair::const_iterator, - std::multimap::const_iterator> itrpair = - table_.equal_range(name); + std::pair::const_iterator, + std::multimap::const_iterator> itrpair = + table_.equal_range(hdKey); while(itrpair.first != itrpair.second) { v.push_back((*itrpair.first).second); ++itrpair.first; @@ -102,16 +85,16 @@ std::vector HttpHeader::findAll(const std::string& name) const return v; } -std::pair::const_iterator, - std::multimap::const_iterator> -HttpHeader::equalRange(const std::string& name) const +std::pair::const_iterator, + std::multimap::const_iterator> +HttpHeader::equalRange(int hdKey) const { - return table_.equal_range(name); + return table_.equal_range(hdKey); } -int32_t HttpHeader::findAsInt(const std::string& name) const +int32_t HttpHeader::findAsInt(int hdKey) const { - const std::string& value = find(name); + const std::string& value = find(hdKey); if(value.empty()) { return 0; } else { @@ -119,9 +102,9 @@ int32_t HttpHeader::findAsInt(const std::string& name) const } } -int64_t HttpHeader::findAsLLInt(const std::string& name) const +int64_t HttpHeader::findAsLLInt(int hdKey) const { - const std::string& value = find(name); + const std::string& value = find(hdKey); if(value.empty()) { return 0; } else { @@ -253,13 +236,12 @@ void HttpHeader::setReasonPhrase(const std::string& reasonPhrase) reasonPhrase_ = reasonPhrase; } -bool HttpHeader::fieldContains(const std::string& name, - const std::string& value) +bool HttpHeader::fieldContains(int hdKey, const char* value) { - std::pair::const_iterator, - std::multimap::const_iterator> range = - equalRange(name); - for(std::multimap::const_iterator i = range.first; + std::pair::const_iterator, + std::multimap::const_iterator> range = + equalRange(hdKey); + for(std::multimap::const_iterator i = range.first; i != range.second; ++i) { std::vector values; util::splitIter((*i).second.begin(), (*i).second.end(), @@ -269,7 +251,7 @@ bool HttpHeader::fieldContains(const std::string& name, ); for(std::vector::const_iterator j = values.begin(), eoj = values.end(); j != eoj; ++j) { - if(util::strieq((*j).first, (*j).second, value.begin(), value.end())) { + if(util::strieq((*j).first, (*j).second, value)) { return true; } } @@ -277,4 +259,45 @@ bool HttpHeader::fieldContains(const std::string& name, return false; } +namespace { +const char* INTERESTING_HEADER_NAMES[] = { + "accept-encoding", + "access-control-request-headers", + "access-control-request-method", + "authorization", + "connection", + "content-disposition", + "content-encoding", + "content-length", + "content-range", + "content-type", + "digest", + "infohash", + "last-modified", + "link", + "location", + "origin", + "port", + "proxy-connection", + "retry-after", + "sec-websocket-key", + "sec-websocket-version", + "set-cookie", + "transfer-encoding", + "upgrade", +}; +} // namespace + +int idInterestingHeader(const char* hdName) +{ + const char** i = std::lower_bound(vbegin(INTERESTING_HEADER_NAMES), + vend(INTERESTING_HEADER_NAMES), + hdName, util::strless); + if(i != vend(INTERESTING_HEADER_NAMES) && strcmp(*i, hdName) == 0 ) { + return i - vbegin(INTERESTING_HEADER_NAMES); + } else { + return HttpHeader::MAX_INTERESTING_HEADER; + } +} + } // namespace aria2 diff --git a/src/HttpHeader.h b/src/HttpHeader.h index ec04aa32..0d4eb7ba 100644 --- a/src/HttpHeader.h +++ b/src/HttpHeader.h @@ -49,7 +49,7 @@ class Range; class HttpHeader { private: - std::multimap table_; + std::multimap table_; // HTTP status code, e.g. 200 int statusCode_; @@ -69,16 +69,47 @@ public: HttpHeader(); ~HttpHeader(); + // The list of headers we are interested in. Only those header + // values are stored in table_. When updating this list, also update + // INTERESTING_HEADER_NAMES in HttpHeader.cc + enum InterestingHeader { + ACCEPT_ENCODING, + ACCESS_CONTROL_REQUEST_HEADERS, + ACCESS_CONTROL_REQUEST_METHOD, + AUTHORIZATION, + CONNECTION, + CONTENT_DISPOSITION, + CONTENT_ENCODING, + CONTENT_LENGTH, + CONTENT_RANGE, + CONTENT_TYPE, + DIGEST, + INFOHASH, // Used for BitTorrent LPD + LAST_MODIFIED, + LINK, + LOCATION, + ORIGIN, + PORT, // Used for BitTorrent LPD + PROXY_CONNECTION, + RETRY_AFTER, + SEC_WEBSOCKET_KEY, + SEC_WEBSOCKET_VERSION, + SET_COOKIE, + TRANSFER_ENCODING, + UPGRADE, + MAX_INTERESTING_HEADER + }; + // For all methods, use lowercased header field name. - void put(const std::string& name, const std::string& value); - bool defined(const std::string& name) const; - const std::string& find(const std::string& name) const; - std::vector findAll(const std::string& name) const; - std::pair::const_iterator, - std::multimap::const_iterator> - equalRange(const std::string& name) const; - int32_t findAsInt(const std::string& name) const; - int64_t findAsLLInt(const std::string& name) const; + void put(int hdKey, const std::string& value); + bool defined(int hdKey) const; + const std::string& find(int hdKey) const; + std::vector findAll(int hdKey) const; + std::pair::const_iterator, + std::multimap::const_iterator> + equalRange(int hdKey) const; + int32_t findAsInt(int hdKey) const; + int64_t findAsLLInt(int hdKey) const; SharedHandle getRange() const; @@ -125,24 +156,7 @@ public: // Returns true if heder field |name| contains |value|. This method // assumes the values of the header field is delimited by ','. - bool fieldContains(const std::string& name, const std::string& value); - - static const std::string LOCATION; - static const std::string TRANSFER_ENCODING; - static const std::string CONTENT_ENCODING; - static const std::string CONTENT_DISPOSITION; - static const std::string SET_COOKIE; - static const std::string CONTENT_TYPE; - static const std::string RETRY_AFTER; - static const std::string CONNECTION; - static const std::string CONTENT_LENGTH; - static const std::string CONTENT_RANGE; - static const std::string LAST_MODIFIED; - static const std::string ACCEPT_ENCODING; - static const std::string LINK; - static const std::string DIGEST; - static const std::string AUTHORIZATION; - static const std::string PROXY_CONNECTION; + bool fieldContains(int hdKey, const char* value); static const std::string HTTP_1_1; static const std::string CLOSE; @@ -152,8 +166,10 @@ public: static const std::string DEFLATE; }; +int idInterestingHeader(const char* hdName); + typedef SharedHandle HttpHeaderHandle; -} // namespace std; +} // namespace #endif // D_HTTP_HEADER_H diff --git a/src/HttpHeaderProcessor.cc b/src/HttpHeaderProcessor.cc index 6baf7595..6b85524a 100644 --- a/src/HttpHeaderProcessor.cc +++ b/src/HttpHeaderProcessor.cc @@ -78,6 +78,7 @@ HttpHeaderProcessor::HttpHeaderProcessor(ParserMode mode) : mode_(mode), state_(mode == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD), lastBytesProcessed_(0), + lastFieldHdKey_(HttpHeader::MAX_INTERESTING_HEADER), result_(new HttpHeader()) {} @@ -118,6 +119,16 @@ size_t getText(std::string& buf, } } // namespace +namespace { +size_t ignoreText(std::string& buf, + const unsigned char* data, size_t length, size_t off) +{ + size_t j; + for(j = off; j < length && !util::isCRLF(data[j]); ++j); + return j-1; +} +} // namespace + bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length) { size_t i; @@ -276,9 +287,11 @@ bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length) state_ = FIELD_VALUE; } else { if(!lastFieldName_.empty()) { - util::lowercase(lastFieldName_); - result_->put(lastFieldName_, util::strip(buf_)); + if(lastFieldHdKey_ != HttpHeader::MAX_INTERESTING_HEADER) { + result_->put(lastFieldHdKey_, util::strip(buf_)); + } lastFieldName_.clear(); + lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER; buf_.clear(); } if(c == '\n') { @@ -297,6 +310,8 @@ bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length) if(util::isLws(c) || util::isCRLF(c)) { throw DL_ABORT_EX("Bad HTTP header: missing ':'"); } else if(c == ':') { + util::lowercase(lastFieldName_); + lastFieldHdKey_ = idInterestingHeader(lastFieldName_.c_str()); state_ = PREV_FIELD_VALUE; } else { i = getFieldNameToken(lastFieldName_, data, length, i); @@ -309,7 +324,11 @@ bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length) state_ = PREV_FIELD_NAME; } else if(!util::isLws(c)) { state_ = FIELD_VALUE; - i = getText(buf_, data, length, i); + if(lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) { + i = ignoreText(buf_, data, length, i); + } else { + i = getText(buf_, data, length, i); + } } break; case FIELD_VALUE: @@ -318,7 +337,11 @@ bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length) } else if(c == '\n') { state_ = PREV_FIELD_NAME; } else { - i = getText(buf_, data, length, i); + if(lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) { + i = ignoreText(buf_, data, length, i); + } else { + i = getText(buf_, data, length, i); + } } break; case PREV_EOH: @@ -363,6 +386,7 @@ void HttpHeaderProcessor::clear() lastBytesProcessed_ = 0; buf_.clear(); lastFieldName_.clear(); + lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER; result_.reset(new HttpHeader()); headers_.clear(); } diff --git a/src/HttpHeaderProcessor.h b/src/HttpHeaderProcessor.h index dd8c2a51..d8912f72 100644 --- a/src/HttpHeaderProcessor.h +++ b/src/HttpHeaderProcessor.h @@ -88,6 +88,7 @@ private: size_t lastBytesProcessed_; std::string buf_; std::string lastFieldName_; + int lastFieldHdKey_; SharedHandle result_; std::string headers_; }; diff --git a/src/HttpResponse.cc b/src/HttpResponse.cc index 61dd5264..cb1b61fd 100644 --- a/src/HttpResponse.cc +++ b/src/HttpResponse.cc @@ -140,8 +140,8 @@ std::string HttpResponse::determinFilename() const void HttpResponse::retrieveCookie() { Time now; - std::pair::const_iterator, - std::multimap::const_iterator> r = + std::pair::const_iterator, + std::multimap::const_iterator> r = httpHeader_->equalRange(HttpHeader::SET_COOKIE); for(; r.first != r.second; ++r.first) { httpRequest_->getCookieStorage()->parseAndStore @@ -162,7 +162,6 @@ bool HttpResponse::isRedirect() const void HttpResponse::processRedirect() { - if(httpRequest_->getRequest()->redirectUri (util::percentEncodeMini(getRedirectURI()))) { A2_LOG_INFO(fmt(MSG_REDIRECT, @@ -375,8 +374,8 @@ void HttpResponse::getMetalinKHttpEntries (std::vector& result, const SharedHandle