From d3f05f44b73978f63279e8d1303885335312a741 Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Thu, 31 Jul 2008 12:42:28 +0000 Subject: [PATCH] 2008-07-31 Tatsuhiro Tsujikawa Fixed broken gzip inflation. Turn off segmented downloading if gzip content is smaller than or equal to 1MiB and inflate the data on the fly, because HTTP response header doesn't contain the length of inflated file we can't determin where the chunk of data should be written. On the other hand, if gzip content is larger than 1MB, then turn off on the fly inflation, because some servers returns "content-type: gzip" for *.tgz, *.gz files. * src/DownloadCommand.cc * src/HttpResponseCommand.cc * src/HttpResponseCommand.h --- ChangeLog | 14 ++++++++++++++ src/DownloadCommand.cc | 2 ++ src/HttpResponseCommand.cc | 22 ++++++++++++++++++++-- src/HttpResponseCommand.h | 4 ++++ 4 files changed, 40 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index d4b36c31..826a4cfd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2008-07-31 Tatsuhiro Tsujikawa + + Fixed broken gzip inflation. + Turn off segmented downloading if gzip content is smaller than or equal + to 1MiB and inflate the data on the fly, because HTTP response header + doesn't contain the length of inflated file we can't determin where + the chunk of data should be written. + On the other hand, if gzip content is larger than 1MB, then turn off + on the fly inflation, because some servers returns "content-type: gzip" + for *.tgz, *.gz files. + * src/DownloadCommand.cc + * src/HttpResponseCommand.cc + * src/HttpResponseCommand.h + 2008-07-31 Tatsuhiro Tsujikawa Applied tizianomueller's patch to fix compile error. diff --git a/src/DownloadCommand.cc b/src/DownloadCommand.cc index bdd7e3b7..e6585e72 100644 --- a/src/DownloadCommand.cc +++ b/src/DownloadCommand.cc @@ -165,6 +165,8 @@ bool DownloadCommand::executeInternal() { if((!_transferEncodingDecoder.isNull() && _transferEncodingDecoder->finished()) || (_transferEncodingDecoder.isNull() && segment->complete()) + || (!_contentEncodingDecoder.isNull() && + _contentEncodingDecoder->finished()) || bufSize == 0) { logger->info(MSG_SEGMENT_DOWNLOAD_COMPLETED, cuid); diff --git a/src/HttpResponseCommand.cc b/src/HttpResponseCommand.cc index 0f4c3401..ddf3acdb 100644 --- a/src/HttpResponseCommand.cc +++ b/src/HttpResponseCommand.cc @@ -110,7 +110,8 @@ bool HttpResponseCommand::executeInternal() (StringFormat(EX_DUPLICATE_FILE_DOWNLOAD, _requestGroup->getFilePath().c_str()).str()); } - if(totalLength == 0 || httpResponse->isTransferEncodingSpecified()) { + if(totalLength == 0 || httpResponse->isTransferEncodingSpecified() || + shouldInflateContentEncoding(httpResponse)) { // we ignore content-length when transfer-encoding is set dctx->setTotalLength(0); return handleOtherEncoding(httpResponse); @@ -125,6 +126,23 @@ bool HttpResponseCommand::executeInternal() } } +bool HttpResponseCommand::shouldInflateContentEncoding +(const SharedHandle& httpResponse) +{ + // Basically, on the fly inflation cannot be made with segment download, + // because in each segment we don't know where the date should be written. + // So turn off segmented downloading. + // Meanwhile, Some server returns content-encoding: gzip for .tgz files. + // Those files tend to be large enough to speed up using segmented + // downloading. Therefore, I choose threshold size to determine on the fly + // inflation should be done. I expect gzipped content such as metalink xml + // files tend to be smaller than the threshold size, those contents are + // inflated on the fly properly. + return httpResponse->isContentEncodingSpecified() && + httpResponse->getEntityLength() <= + static_cast(e->option->getAsInt(PREF_SEGMENT_SIZE)); +} + bool HttpResponseCommand::handleDefaultEncoding(const HttpResponseHandle& httpResponse) { HttpRequestHandle httpRequest = httpResponse->getHttpRequest(); @@ -255,7 +273,7 @@ HttpDownloadCommand* HttpResponseCommand::createHttpDownloadCommand (e->option->getAsInt(PREF_LOWEST_SPEED_LIMIT)); command->setTransferEncodingDecoder(transferEncodingDecoder); - if(!contentEncodingDecoder.isNull()) { + if(shouldInflateContentEncoding(httpResponse)) { command->setContentEncodingDecoder(contentEncodingDecoder); // Since the compressed file's length are returned in the response header // and the decompressed file size is unknown at this point, disable file diff --git a/src/HttpResponseCommand.h b/src/HttpResponseCommand.h index 0a30d08f..6cb410be 100644 --- a/src/HttpResponseCommand.h +++ b/src/HttpResponseCommand.h @@ -55,6 +55,10 @@ private: HttpDownloadCommand* createHttpDownloadCommand(const SharedHandle& httpResponse); protected: bool executeInternal(); + + bool shouldInflateContentEncoding + (const SharedHandle& httpResponse); + public: HttpResponseCommand(int32_t cuid, const SharedHandle& req,