Cleanup HttpHeaderProcessor take 2

Decided to keep the 'if(' -> 'if (' stuff, as I formatted the whole file
and so it is internally consistent within the file. Also, too much
hassle.
pull/235/head
Nils Maier 2014-05-29 18:24:31 +02:00
parent 48ec56a64d
commit e57d330111
2 changed files with 229 additions and 147 deletions

View File

@ -85,47 +85,58 @@ HttpHeaderProcessor::HttpHeaderProcessor(ParserMode mode)
HttpHeaderProcessor::~HttpHeaderProcessor() {} HttpHeaderProcessor::~HttpHeaderProcessor() {}
namespace { namespace {
size_t getToken(std::string& buf, size_t
const unsigned char* data, size_t length, size_t off) getToken(std::string& buf, const unsigned char* data, size_t length, size_t off)
{ {
size_t j; size_t j = off;
for(j = off; j < length && !util::isLws(data[j]) && !util::isCRLF(data[j]); while (j < length && !util::isLws(data[j]) && !util::isCRLF(data[j])) {
++j); ++j;
}
buf.append(&data[off], &data[j]); buf.append(&data[off], &data[j]);
return j-1; return j - 1;
} }
} // namespace } // namespace
namespace { namespace {
size_t getFieldNameToken(std::string& buf, size_t getFieldNameToken(std::string& buf,
const unsigned char* data, size_t length, size_t off) const unsigned char* data,
size_t length,
size_t off)
{ {
size_t j; size_t j = off;
for(j = off; j < length && data[j] != ':' && while (j < length && data[j] != ':' && !util::isLws(data[j]) &&
!util::isLws(data[j]) && !util::isCRLF(data[j]); ++j); !util::isCRLF(data[j])) {
++j;
}
buf.append(&data[off], &data[j]); buf.append(&data[off], &data[j]);
return j-1; return j - 1;
} }
} // namespace } // namespace
namespace { namespace {
size_t getText(std::string& buf, size_t
const unsigned char* data, size_t length, size_t off) getText(std::string& buf, const unsigned char* data, size_t length, size_t off)
{ {
size_t j; size_t j = off;
for(j = off; j < length && !util::isCRLF(data[j]); ++j); while (j < length && !util::isCRLF(data[j])) {
++j;
}
buf.append(&data[off], &data[j]); buf.append(&data[off], &data[j]);
return j-1; return j - 1;
} }
} // namespace } // namespace
namespace { namespace {
size_t ignoreText(std::string& buf, size_t ignoreText(std::string& buf,
const unsigned char* data, size_t length, size_t off) const unsigned char* data,
size_t length,
size_t off)
{ {
size_t j; size_t j = off;
for(j = off; j < length && !util::isCRLF(data[j]); ++j); while (j < length && !util::isCRLF(data[j])) {
return j-1; ++j;
}
return j - 1;
} }
} // namespace } // namespace
@ -133,237 +144,306 @@ bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length)
{ {
size_t i; size_t i;
lastBytesProcessed_ = 0; lastBytesProcessed_ = 0;
for(i = 0; i < length; ++i) { for (i = 0; i < length; ++i) {
unsigned char c = data[i]; unsigned char c = data[i];
switch(state_) { switch (state_) {
case PREV_METHOD: case PREV_METHOD:
if(util::isLws(c) || util::isCRLF(c)) { if (util::isLws(c) || util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Request-Line: missing method"); throw DL_ABORT_EX("Bad Request-Line: missing method");
} else {
i = getToken(buf_, data, length, i);
state_ = METHOD;
} }
i = getToken(buf_, data, length, i);
state_ = METHOD;
break; break;
case METHOD: case METHOD:
if(util::isLws(c)) { if (util::isLws(c)) {
result_->setMethod(buf_); result_->setMethod(buf_);
buf_.clear(); buf_.clear();
state_ = PREV_PATH; state_ = PREV_PATH;
} else if(util::isCRLF(c)) { break;
throw DL_ABORT_EX("Bad Request-Line: missing request-target");
} else {
i = getToken(buf_, data, length, i);
} }
if (util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Request-Line: missing request-target");
}
i = getToken(buf_, data, length, i);
break; break;
case PREV_PATH: case PREV_PATH:
if(util::isCRLF(c)) { if (util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Request-Line: missing request-target"); throw DL_ABORT_EX("Bad Request-Line: missing request-target");
} else if(!util::isLws(c)) {
i = getToken(buf_, data, length, i);
state_ = PATH;
} }
if (util::isLws(c)) {
break;
}
i = getToken(buf_, data, length, i);
state_ = PATH;
break; break;
case PATH: case PATH:
if(util::isLws(c)) { if (util::isLws(c)) {
result_->setRequestPath(buf_); result_->setRequestPath(buf_);
buf_.clear(); buf_.clear();
state_ = PREV_REQ_VERSION; state_ = PREV_REQ_VERSION;
} else if(util::isCRLF(c)) { break;
throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version");
} else {
i = getToken(buf_, data, length, i);
} }
if (util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version");
}
i = getToken(buf_, data, length, i);
break; break;
case PREV_REQ_VERSION: case PREV_REQ_VERSION:
if(util::isCRLF(c)) { if (util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version"); throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version");
} else if(!util::isLws(c)) {
i = getToken(buf_, data, length, i);
state_ = REQ_VERSION;
} }
if (util::isLws(c)) {
break;
}
i = getToken(buf_, data, length, i);
state_ = REQ_VERSION;
break; break;
case REQ_VERSION: case REQ_VERSION:
if(util::isCRLF(c)) { if (util::isCRLF(c)) {
result_->setVersion(buf_); result_->setVersion(buf_);
buf_.clear(); buf_.clear();
if(c == '\n') { state_ = c == '\n' ? PREV_FIELD_NAME : PREV_EOL;
state_ = PREV_FIELD_NAME; break;
} else { }
state_ = PREV_EOL;
} if (util::isLws(c)) {
} else if(util::isLws(c)) {
throw DL_ABORT_EX("Bad Request-Line: LWS after HTTP-version"); throw DL_ABORT_EX("Bad Request-Line: LWS after HTTP-version");
} else {
i = getToken(buf_, data, length, i);
} }
i = getToken(buf_, data, length, i);
break; break;
case PREV_RES_VERSION: case PREV_RES_VERSION:
if(util::isLws(c) || util::isCRLF(c)) { if (util::isLws(c) || util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Status-Line: missing HTTP-version"); throw DL_ABORT_EX("Bad Status-Line: missing HTTP-version");
} else {
i = getToken(buf_, data, length, i);
state_ = RES_VERSION;
} }
i = getToken(buf_, data, length, i);
state_ = RES_VERSION;
break; break;
case RES_VERSION: case RES_VERSION:
if(util::isLws(c)) { if (util::isLws(c)) {
result_->setVersion(buf_); result_->setVersion(buf_);
buf_.clear(); buf_.clear();
state_ = PREV_STATUS_CODE; state_ = PREV_STATUS_CODE;
} else if(util::isCRLF(c)) { break;
}
if (util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Status-Line: missing status-code"); throw DL_ABORT_EX("Bad Status-Line: missing status-code");
} }
break; break;
case PREV_STATUS_CODE: case PREV_STATUS_CODE:
if(util::isCRLF(c)) { if (util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Status-Line: missing status-code"); throw DL_ABORT_EX("Bad Status-Line: missing status-code");
} else if(!util::isLws(c)) { }
if (!util::isLws(c)) {
state_ = STATUS_CODE; state_ = STATUS_CODE;
i = getToken(buf_, data, length, i); i = getToken(buf_, data, length, i);
} }
break; break;
case STATUS_CODE: case STATUS_CODE:
if(util::isLws(c) || util::isCRLF(c)) { if (!util::isLws(c) && !util::isCRLF(c)) {
i = getToken(buf_, data, length, i);
break;
}
{
int statusCode = -1; int statusCode = -1;
if(buf_.size() == 3 && util::isNumber(buf_.begin(), buf_.end())) { if (buf_.size() == 3 && util::isNumber(buf_.begin(), buf_.end())) {
statusCode = (buf_[0]-'0')*100 + (buf_[1]-'0')*10 + (buf_[2]-'0'); statusCode =
(buf_[0] - '0') * 100 + (buf_[1] - '0') * 10 + (buf_[2] - '0');
} }
if(statusCode >= 100) { if (statusCode < 100) {
result_->setStatusCode(statusCode);
buf_.clear();
} else {
throw DL_ABORT_EX("Bad status code: bad status-code"); throw DL_ABORT_EX("Bad status code: bad status-code");
} }
if(c == '\r') { result_->setStatusCode(statusCode);
state_ = PREV_EOL; buf_.clear();
} else if(c == '\n') {
state_ = PREV_FIELD_NAME;
} else {
state_ = PREV_REASON_PHRASE;
}
} else {
i = getToken(buf_, data, length, i);
} }
if (c == '\r') {
state_ = PREV_EOL;
break;
}
if (c == '\n') {
state_ = PREV_FIELD_NAME;
break;
}
state_ = PREV_REASON_PHRASE;
break; break;
case PREV_REASON_PHRASE: case PREV_REASON_PHRASE:
if(util::isCRLF(c)) { if (util::isCRLF(c)) {
// The reason-phrase is completely optional. // The reason-phrase is completely optional.
if(c == '\n') { state_ = c == '\n' ? PREV_FIELD_NAME : PREV_EOL;
state_ = PREV_FIELD_NAME; break;
} else {
state_ = PREV_EOL;
}
} else if(!util::isLws(c)) {
state_ = REASON_PHRASE;
i = getText(buf_, data, length, i);
} }
if (util::isLws(c)) {
break;
}
state_ = REASON_PHRASE;
i = getText(buf_, data, length, i);
break; break;
case REASON_PHRASE: case REASON_PHRASE:
if(util::isCRLF(c)) { if (util::isCRLF(c)) {
result_->setReasonPhrase(buf_); result_->setReasonPhrase(buf_);
buf_.clear(); buf_.clear();
if(c == '\n') { state_ = c == '\n' ? PREV_FIELD_NAME : PREV_EOL;
state_ = PREV_FIELD_NAME; break;
} else {
state_ = PREV_EOL;
}
} else {
i = getText(buf_, data, length, i);
} }
i = getText(buf_, data, length, i);
break; break;
case PREV_EOL: case PREV_EOL:
if(c == '\n') { if (c != '\n') {
state_ = PREV_FIELD_NAME;
} else {
throw DL_ABORT_EX("Bad HTTP header: missing LF"); throw DL_ABORT_EX("Bad HTTP header: missing LF");
} }
state_ = PREV_FIELD_NAME;
break; break;
case PREV_FIELD_NAME: case PREV_FIELD_NAME:
if(util::isLws(c)) { if (util::isLws(c)) {
if(lastFieldName_.empty()) { if (lastFieldName_.empty()) {
throw DL_ABORT_EX("Bad HTTP header: field name starts with LWS"); throw DL_ABORT_EX("Bad HTTP header: field name starts with LWS");
} }
// Evil Multi-line header field // Evil Multi-line header field
state_ = FIELD_VALUE; state_ = FIELD_VALUE;
} else { break;
if(!lastFieldName_.empty()) {
if(lastFieldHdKey_ != HttpHeader::MAX_INTERESTING_HEADER) {
result_->put(lastFieldHdKey_, util::strip(buf_));
}
lastFieldName_.clear();
lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER;
buf_.clear();
}
if(c == '\n') {
state_ = HEADERS_COMPLETE;
} else if(c == '\r') {
state_ = PREV_EOH;
} else if(c == ':') {
throw DL_ABORT_EX("Bad HTTP header: field name starts with ':'");
} else {
state_ = FIELD_NAME;
i = getFieldNameToken(lastFieldName_, data, length, i);
}
} }
if (!lastFieldName_.empty()) {
if (lastFieldHdKey_ != HttpHeader::MAX_INTERESTING_HEADER) {
result_->put(lastFieldHdKey_, util::strip(buf_));
}
lastFieldName_.clear();
lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER;
buf_.clear();
}
if (c == '\n') {
state_ = HEADERS_COMPLETE;
break;
}
if (c == '\r') {
state_ = PREV_EOH;
break;
}
if (c == ':') {
throw DL_ABORT_EX("Bad HTTP header: field name starts with ':'");
}
state_ = FIELD_NAME;
i = getFieldNameToken(lastFieldName_, data, length, i);
break; break;
case FIELD_NAME: case FIELD_NAME:
if(util::isLws(c) || util::isCRLF(c)) { if (util::isLws(c) || util::isCRLF(c)) {
throw DL_ABORT_EX("Bad HTTP header: missing ':'"); throw DL_ABORT_EX("Bad HTTP header: missing ':'");
} else if(c == ':') { }
if (c == ':') {
util::lowercase(lastFieldName_); util::lowercase(lastFieldName_);
lastFieldHdKey_ = idInterestingHeader(lastFieldName_.c_str()); lastFieldHdKey_ = idInterestingHeader(lastFieldName_.c_str());
state_ = PREV_FIELD_VALUE; state_ = PREV_FIELD_VALUE;
} else { break;
i = getFieldNameToken(lastFieldName_, data, length, i);
} }
i = getFieldNameToken(lastFieldName_, data, length, i);
break; break;
case PREV_FIELD_VALUE: case PREV_FIELD_VALUE:
if(c == '\r') { if (c == '\r') {
state_ = PREV_EOL; state_ = PREV_EOL;
} else if(c == '\n') { break;
state_ = PREV_FIELD_NAME;
} else if(!util::isLws(c)) {
state_ = FIELD_VALUE;
if(lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) {
i = ignoreText(buf_, data, length, i);
} else {
i = getText(buf_, data, length, i);
}
} }
if (c == '\n') {
state_ = PREV_FIELD_NAME;
break;
}
if (util::isLws(c)) {
break;
}
state_ = FIELD_VALUE;
if (lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) {
i = ignoreText(buf_, data, length, i);
break;
}
i = getText(buf_, data, length, i);
break; break;
case FIELD_VALUE: case FIELD_VALUE:
if(c == '\r') { if (c == '\r') {
state_ = PREV_EOL; state_ = PREV_EOL;
} else if(c == '\n') { break;
state_ = PREV_FIELD_NAME;
} else {
if(lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) {
i = ignoreText(buf_, data, length, i);
} else {
i = getText(buf_, data, length, i);
}
} }
if (c == '\n') {
state_ = PREV_FIELD_NAME;
break;
}
if (lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) {
i = ignoreText(buf_, data, length, i);
break;
}
i = getText(buf_, data, length, i);
break; break;
case PREV_EOH: case PREV_EOH:
if(c == '\n') { if (c != '\n') {
state_ = HEADERS_COMPLETE;
} else {
throw DL_ABORT_EX("Bad HTTP header: " throw DL_ABORT_EX("Bad HTTP header: "
"missing LF at the end of the header"); "missing LF at the end of the header");
} }
state_ = HEADERS_COMPLETE;
break; break;
case HEADERS_COMPLETE: case HEADERS_COMPLETE:
goto fin; goto fin;
} }
} }
fin:
fin:
// See Apache's documentation // See Apache's documentation
// http://httpd.apache.org/docs/2.2/en/mod/core.html about size // http://httpd.apache.org/docs/2.2/en/mod/core.html about size
// limit of HTTP headers. The page states that the number of request // limit of HTTP headers. The page states that the number of request
// fields rarely exceeds 20. // fields rarely exceeds 20.
if(lastFieldName_.size() > 1024 || buf_.size() > 8192) { if (lastFieldName_.size() > 1024 || buf_.size() > 8192) {
throw DL_ABORT_EX("Too large HTTP header"); throw DL_ABORT_EX("Too large HTTP header");
} }
lastBytesProcessed_ = i; lastBytesProcessed_ = i;
headers_.append(&data[0], &data[i]); headers_.append(&data[0], &data[i]);
return state_ == HEADERS_COMPLETE; return state_ == HEADERS_COMPLETE;

View File

@ -45,7 +45,8 @@ namespace aria2 {
class HttpHeader; class HttpHeader;
class HttpHeaderProcessor { class HttpHeaderProcessor
{
public: public:
enum ParserMode { enum ParserMode {
CLIENT_PARSER, CLIENT_PARSER,
@ -82,6 +83,7 @@ public:
* Resets internal status and ready for next header processing. * Resets internal status and ready for next header processing.
*/ */
void clear(); void clear();
private: private:
ParserMode mode_; ParserMode mode_;
int state_; int state_;