aria2/src/HttpHeaderProcessor.cc

405 lines
11 KiB
C++
Raw Normal View History

2007-06-30 09:54:03 +00:00
/* <!-- copyright */
/*
* aria2 - The high speed download utility
*
2012-06-23 08:34:20 +00:00
* Copyright (C) 2012 Tatsuhiro Tsujikawa
2007-06-30 09:54:03 +00:00
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2007-06-30 09:54:03 +00:00
*
* In addition, as a special exception, the copyright holders give
* permission to link the code of portions of this program with the
* OpenSSL library under certain conditions as described in each
* individual source file, and distribute linked combinations
* including the two.
* You must obey the GNU General Public License in all respects
* for all of the code used other than OpenSSL. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you
* do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source
* files in the program, then also delete it here.
*/
/* copyright --> */
#include "HttpHeaderProcessor.h"
#include <vector>
#include "HttpHeader.h"
2007-06-30 09:54:03 +00:00
#include "message.h"
#include "util.h"
2007-11-10 Tatsuhiro Tsujikawa <tujikawa at rednoah dot com> Don't connect server before checking file integrity at startup, if filesize and output file path are known. * src/AbstractCommand.cc * src/StreamFileAllocationEntry.cc * src/Metalink2RequestGroup.cc * src/RequestGroup.{h, cc} * src/HttpResponseCommand.cc * src/FtpNegotiationCommand.cc Added DownloadFailureException. If it is thrown, RequestGroup should halt. * src/AbstractCommand.cc * src/DownloadFailureException.h * src/RequestGroup.cc Catch RecoverableException, instead of DlAbortEx. * src/RequestGroupMan.cc * src/FillRequestGroupCommand.cc * src/MetaFileUtil.cc * src/IteratableChunkChecksumValidator.cc Now first parameter of MSG_DOWNLOAD_ABORTED is gid(RequestGroup:: getGID()) * src/CheckIntegrityCommand.cc * src/message.h Print gid instead of idx. * src/RequestGroupMan.cc Removed exception throwers declaration. * src/DirectDiskAdaptor.{h, cc} * src/SocketCore.{h, cc} * src/MultiDiskAdaptor.{h, cc} * src/HttpConnection.{h, cc} * src/HttpResponse.{h, cc} * src/DiskAdaptor.{h, cc} * src/CopyDiskAdaptor.{h, cc} * src/MultiDiskAdaptor.{h, cc} * src/HttpHeaderProcessor.{h, cc} * src/AbstractSingleDiskAdaptor.{h, cc} * src/Util.{h, cc} * test/UtilTest.cc * src/DefaultDiskWriter.{h, cc} * src/FtpConnection.{h, cc} * src/AbstractDiskWriter.{h, cc} Removed duplicate code. * src/StreamCheckIntegrityEntry.cc Removed unnecessary include. * src/DiskWriter.h Included Exception.h * src/option_processing.cc Included 2 files and added doc * src/TrackerWatcherCommand.cc * src/SocketCore.cc (writeData): Fixed send error with GnuTLS.
2007-11-09 18:01:12 +00:00
#include "DlRetryEx.h"
#include "DlAbortEx.h"
#include "A2STR.h"
#include "error_code.h"
2007-06-30 09:54:03 +00:00
namespace aria2 {
2012-06-23 08:34:20 +00:00
namespace {
enum {
// Server mode
PREV_METHOD,
METHOD,
PREV_PATH,
PATH,
PREV_REQ_VERSION,
REQ_VERSION,
// Client mode,
PREV_RES_VERSION,
RES_VERSION,
PREV_STATUS_CODE,
STATUS_CODE,
PREV_REASON_PHRASE,
REASON_PHRASE,
// name/value header fields
PREV_EOL,
PREV_FIELD_NAME,
FIELD_NAME,
PREV_FIELD_VALUE,
FIELD_VALUE,
// End of header
PREV_EOH,
HEADERS_COMPLETE
};
} // namespace
HttpHeaderProcessor::HttpHeaderProcessor(ParserMode mode)
: mode_(mode),
state_(mode == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD),
lastBytesProcessed_(0),
lastFieldHdKey_(HttpHeader::MAX_INTERESTING_HEADER),
2012-06-23 08:34:20 +00:00
result_(new HttpHeader())
{}
HttpHeaderProcessor::~HttpHeaderProcessor() {}
2012-06-23 08:34:20 +00:00
namespace {
size_t getToken(std::string& buf,
const unsigned char* data, size_t length, size_t off)
2007-06-30 09:54:03 +00:00
{
2012-06-23 08:34:20 +00:00
size_t j;
for(j = off; j < length && !util::isLws(data[j]) && !util::isCRLF(data[j]);
++j);
buf.append(&data[off], &data[j]);
return j-1;
2007-06-30 09:54:03 +00:00
}
2012-06-23 08:34:20 +00:00
} // namespace
2007-06-30 09:54:03 +00:00
2012-06-23 08:34:20 +00:00
namespace {
size_t getFieldNameToken(std::string& buf,
const unsigned char* data, size_t length, size_t off)
2007-06-30 09:54:03 +00:00
{
2012-06-23 08:34:20 +00:00
size_t j;
for(j = off; j < length && data[j] != ':' &&
!util::isLws(data[j]) && !util::isCRLF(data[j]); ++j);
buf.append(&data[off], &data[j]);
return j-1;
2007-06-30 09:54:03 +00:00
}
2012-06-23 08:34:20 +00:00
} // namespace
2007-06-30 09:54:03 +00:00
2012-06-23 08:34:20 +00:00
namespace {
size_t getText(std::string& buf,
const unsigned char* data, size_t length, size_t off)
2007-06-30 09:54:03 +00:00
{
2012-06-23 08:34:20 +00:00
size_t j;
for(j = off; j < length && !util::isCRLF(data[j]); ++j);
buf.append(&data[off], &data[j]);
return j-1;
2007-06-30 09:54:03 +00:00
}
2012-06-23 08:34:20 +00:00
} // namespace
2007-06-30 09:54:03 +00:00
namespace {
size_t ignoreText(std::string& buf,
const unsigned char* data, size_t length, size_t off)
{
size_t j;
for(j = off; j < length && !util::isCRLF(data[j]); ++j);
return j-1;
}
} // namespace
2012-06-23 08:34:20 +00:00
bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length)
2007-06-30 09:54:03 +00:00
{
2012-06-23 08:34:20 +00:00
size_t i;
lastBytesProcessed_ = 0;
for(i = 0; i < length; ++i) {
unsigned char c = data[i];
switch(state_) {
case PREV_METHOD:
if(util::isLws(c) || util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Request-Line: missing method");
} else {
i = getToken(buf_, data, length, i);
state_ = METHOD;
}
break;
case METHOD:
if(util::isLws(c)) {
result_->setMethod(buf_);
buf_.clear();
state_ = PREV_PATH;
} else if(util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Request-Line: missing request-target");
} else {
i = getToken(buf_, data, length, i);
}
break;
case PREV_PATH:
if(util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Request-Line: missing request-target");
} else if(!util::isLws(c)) {
i = getToken(buf_, data, length, i);
state_ = PATH;
}
break;
case PATH:
if(util::isLws(c)) {
result_->setRequestPath(buf_);
buf_.clear();
state_ = PREV_REQ_VERSION;
} else if(util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version");
} else {
i = getToken(buf_, data, length, i);
}
break;
case PREV_REQ_VERSION:
if(util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version");
} else if(!util::isLws(c)) {
i = getToken(buf_, data, length, i);
state_ = REQ_VERSION;
}
break;
case REQ_VERSION:
if(util::isCRLF(c)) {
result_->setVersion(buf_);
buf_.clear();
if(c == '\n') {
state_ = PREV_FIELD_NAME;
} else {
state_ = PREV_EOL;
}
} else if(util::isLws(c)) {
throw DL_ABORT_EX("Bad Request-Line: LWS after HTTP-version");
} else {
i = getToken(buf_, data, length, i);
}
break;
case PREV_RES_VERSION:
if(util::isLws(c) || util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Status-Line: missing HTTP-version");
} else {
i = getToken(buf_, data, length, i);
state_ = RES_VERSION;
}
break;
case RES_VERSION:
if(util::isLws(c)) {
result_->setVersion(buf_);
buf_.clear();
state_ = PREV_STATUS_CODE;
} else if(util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Status-Line: missing status-code");
}
break;
case PREV_STATUS_CODE:
if(util::isCRLF(c)) {
throw DL_ABORT_EX("Bad Status-Line: missing status-code");
} else if(!util::isLws(c)) {
state_ = STATUS_CODE;
i = getToken(buf_, data, length, i);
}
break;
case STATUS_CODE:
if(util::isLws(c) || util::isCRLF(c)) {
int statusCode = -1;
if(buf_.size() == 3 && util::isNumber(buf_.begin(), buf_.end())) {
statusCode = (buf_[0]-'0')*100 + (buf_[1]-'0')*10 + (buf_[2]-'0');
}
if(statusCode >= 100) {
result_->setStatusCode(statusCode);
buf_.clear();
} else {
throw DL_ABORT_EX("Bad status code: bad status-code");
}
if(c == '\r') {
state_ = PREV_EOL;
} else if(c == '\n') {
state_ = PREV_FIELD_NAME;
} else {
state_ = PREV_REASON_PHRASE;
}
} else {
i = getToken(buf_, data, length, i);
}
break;
case PREV_REASON_PHRASE:
if(util::isCRLF(c)) {
// The reason-phrase is completely optional.
if(c == '\n') {
state_ = PREV_FIELD_NAME;
} else {
state_ = PREV_EOL;
}
} else if(!util::isLws(c)) {
state_ = REASON_PHRASE;
i = getText(buf_, data, length, i);
}
break;
case REASON_PHRASE:
if(util::isCRLF(c)) {
result_->setReasonPhrase(buf_);
buf_.clear();
if(c == '\n') {
state_ = PREV_FIELD_NAME;
} else {
state_ = PREV_EOL;
}
} else {
i = getText(buf_, data, length, i);
}
break;
case PREV_EOL:
if(c == '\n') {
state_ = PREV_FIELD_NAME;
} else {
throw DL_ABORT_EX("Bad HTTP header: missing LF");
}
break;
case PREV_FIELD_NAME:
if(util::isLws(c)) {
2012-07-05 15:57:18 +00:00
if(lastFieldName_.empty()) {
throw DL_ABORT_EX("Bad HTTP header: field name starts with LWS");
}
2012-06-23 08:34:20 +00:00
// Evil Multi-line header field
state_ = FIELD_VALUE;
} else {
if(!lastFieldName_.empty()) {
if(lastFieldHdKey_ != HttpHeader::MAX_INTERESTING_HEADER) {
result_->put(lastFieldHdKey_, util::strip(buf_));
}
2012-06-23 08:34:20 +00:00
lastFieldName_.clear();
lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER;
2012-06-23 08:34:20 +00:00
buf_.clear();
}
if(c == '\n') {
state_ = HEADERS_COMPLETE;
} else if(c == '\r') {
state_ = PREV_EOH;
2012-07-05 15:57:18 +00:00
} else if(c == ':') {
throw DL_ABORT_EX("Bad HTTP header: field name starts with ':'");
2012-06-23 08:34:20 +00:00
} else {
state_ = FIELD_NAME;
i = getFieldNameToken(lastFieldName_, data, length, i);
}
}
break;
case FIELD_NAME:
if(util::isLws(c) || util::isCRLF(c)) {
throw DL_ABORT_EX("Bad HTTP header: missing ':'");
} else if(c == ':') {
util::lowercase(lastFieldName_);
lastFieldHdKey_ = idInterestingHeader(lastFieldName_.c_str());
2012-06-23 08:34:20 +00:00
state_ = PREV_FIELD_VALUE;
} else {
i = getFieldNameToken(lastFieldName_, data, length, i);
}
break;
case PREV_FIELD_VALUE:
if(c == '\r') {
state_ = PREV_EOL;
} else if(c == '\n') {
state_ = PREV_FIELD_NAME;
} else if(!util::isLws(c)) {
state_ = FIELD_VALUE;
if(lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) {
i = ignoreText(buf_, data, length, i);
} else {
i = getText(buf_, data, length, i);
}
2012-06-23 08:34:20 +00:00
}
break;
case FIELD_VALUE:
if(c == '\r') {
state_ = PREV_EOL;
} else if(c == '\n') {
state_ = PREV_FIELD_NAME;
} else {
if(lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) {
i = ignoreText(buf_, data, length, i);
} else {
i = getText(buf_, data, length, i);
}
2012-06-23 08:34:20 +00:00
}
break;
case PREV_EOH:
if(c == '\n') {
state_ = HEADERS_COMPLETE;
} else {
throw DL_ABORT_EX("Bad HTTP header: "
"missing LF at the end of the header");
}
break;
case HEADERS_COMPLETE:
goto fin;
}
2007-06-30 09:54:03 +00:00
}
2012-06-23 08:34:20 +00:00
fin:
// See Apache's documentation
// http://httpd.apache.org/docs/2.2/en/mod/core.html about size
// limit of HTTP headers. The page states that the number of request
// fields rarely exceeds 20.
if(lastFieldName_.size() > 1024 || buf_.size() > 8192) {
throw DL_ABORT_EX("Too large HTTP header");
}
lastBytesProcessed_ = i;
headers_.append(&data[0], &data[i]);
return state_ == HEADERS_COMPLETE;
2007-06-30 09:54:03 +00:00
}
2012-06-23 08:34:20 +00:00
bool HttpHeaderProcessor::parse(const std::string& data)
2007-06-30 09:54:03 +00:00
{
2012-06-23 08:34:20 +00:00
return parse(reinterpret_cast<const unsigned char*>(data.c_str()),
data.size());
2007-06-30 09:54:03 +00:00
}
2012-06-23 08:34:20 +00:00
size_t HttpHeaderProcessor::getLastBytesProcessed() const
2007-06-30 09:54:03 +00:00
{
2012-06-23 08:34:20 +00:00
return lastBytesProcessed_;
2007-06-30 09:54:03 +00:00
}
2012-06-23 08:34:20 +00:00
void HttpHeaderProcessor::clear()
2007-06-30 09:54:03 +00:00
{
2012-06-23 08:34:20 +00:00
state_ = (mode_ == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD);
lastBytesProcessed_ = 0;
buf_.clear();
lastFieldName_.clear();
lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER;
2012-06-23 08:34:20 +00:00
result_.reset(new HttpHeader());
headers_.clear();
2007-06-30 09:54:03 +00:00
}
2012-06-23 08:34:20 +00:00
const SharedHandle<HttpHeader>& HttpHeaderProcessor::getResult() const
{
2012-06-23 08:34:20 +00:00
return result_;
}
std::string HttpHeaderProcessor::getHeaderString() const
2007-06-30 09:54:03 +00:00
{
2012-06-23 08:34:20 +00:00
return headers_;
2007-06-30 09:54:03 +00:00
}
} // namespace aria2