From 2da32876a29626cf943eb0c217676eecfe0ff57b Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Wed, 17 Feb 2010 16:09:19 +0000 Subject: [PATCH] 2010-02-18 Tatsuhiro Tsujikawa Support RFC2231 "Parameter Value Character Set and Language Information" in Content-Disposition header. * src/HttpResponse.cc * src/util.cc * src/util.h * test/UtilTest.cc --- ChangeLog | 9 ++++ src/HttpResponse.cc | 2 +- src/util.cc | 124 +++++++++++++++++++++++++++++++++----------- src/util.h | 11 ++-- test/UtilTest.cc | 59 +++++++++++++++++++-- 5 files changed, 165 insertions(+), 40 deletions(-) diff --git a/ChangeLog b/ChangeLog index 00e8ed82..cfafda5f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2010-02-18 Tatsuhiro Tsujikawa + + Support RFC2231 "Parameter Value Character Set and Language + Information" in Content-Disposition header. + * src/HttpResponse.cc + * src/util.cc + * src/util.h + * test/UtilTest.cc + 2010-02-16 Tatsuhiro Tsujikawa Print CXXFLAGS diff --git a/src/HttpResponse.cc b/src/HttpResponse.cc index e377adeb..e59b853c 100644 --- a/src/HttpResponse.cc +++ b/src/HttpResponse.cc @@ -110,7 +110,7 @@ std::string HttpResponse::determinFilename() const } else { logger->info(MSG_CONTENT_DISPOSITION_DETECTED, cuid, contentDisposition.c_str()); - return util::urldecode(contentDisposition); + return contentDisposition; } } diff --git a/src/util.cc b/src/util.cc index ab9ee5ee..1136c423 100644 --- a/src/util.cc +++ b/src/util.cc @@ -115,7 +115,7 @@ void split(std::pair& hp, const std::string& src, char hp.second = A2STR::NIL; std::string::size_type p = src.find(delim); if(p == std::string::npos) { - hp.first = src; + hp.first = trim(src); hp.second = A2STR::NIL; } else { hp.first = trim(src.substr(0, p)); @@ -130,7 +130,7 @@ std::pair split(const std::string& src, const std::str hp.second = A2STR::NIL; std::string::size_type p = src.find_first_of(delims); if(p == std::string::npos) { - hp.first = src; + hp.first = trim(src); hp.second = A2STR::NIL; } else { hp.first = trim(src.substr(0, p)); @@ -610,36 +610,100 @@ void parsePrioritizePieceRange result.insert(result.end(), indexes.begin(), indexes.end()); } -std::string getContentDispositionFilename(const std::string& header) { - static const std::string keyName = "filename="; - std::string::size_type attributesp = header.find(keyName); - if(attributesp == std::string::npos) { - return A2STR::NIL; - } - std::string::size_type filenamesp = attributesp+keyName.size(); - std::string::size_type filenameep; - if(filenamesp == header.size()) { - return A2STR::NIL; - } - - if(header[filenamesp] == '\'' || header[filenamesp] == '"') { - char quoteChar = header[filenamesp]; - filenameep = header.find(quoteChar, filenamesp+1); - } else { - filenameep = header.find(';', filenamesp); - } - if(filenameep == std::string::npos) { - filenameep = header.size(); - } - static const std::string TRIMMED("\r\n '\""); - std::string fn = - File(trim(header.substr - (filenamesp, filenameep-filenamesp), TRIMMED)).getBasename(); +static std::string trimBasename(const std::string& src) +{ + static const std::string TRIMMED("\r\n\t '\""); + std::string fn = File(trim(src, TRIMMED)).getBasename(); if(fn == ".." || fn == A2STR::DOT_C) { - return A2STR::NIL; - } else { - return fn; + fn = A2STR::NIL; } + return fn; +} + +std::string iso8859ToUtf8(const std::string& src) +{ + std::string dest; + for(std::string::const_iterator itr = src.begin(); itr != src.end(); ++itr) { + unsigned char c = *itr; + if(0xa0 <= c && c <= 0xff) { + if(c <= 0xbf) { + dest += 0xc2; + } else { + dest += 0xc3; + } + dest += c&(~0x40); + } else { + dest += c; + } + } + return dest; +} + +std::string getContentDispositionFilename(const std::string& header) +{ + std::string filename; + std::vector params; + split(header, std::back_inserter(params), A2STR::SEMICOLON_C, true); + for(std::vector::iterator i = params.begin(); + i != params.end(); ++i) { + std::string& param = *i; + static const std::string keyName = "filename"; + if(!startsWith(param, keyName)) { + continue; + } + std::string::iterator markeritr = param.begin()+keyName.size(); + for(; markeritr != param.end() && *markeritr == ' '; ++markeritr); + if(markeritr == param.end()) { + continue; + } + if(*markeritr == '=') { + std::pair paramPair; + split(paramPair, param, '='); + std::string value = paramPair.second; + if(value.empty()) { + continue; + } + std::string::iterator filenameLast; + if(*value.begin() == '\'' || *value.begin() == '"') { + char qc = *value.begin(); + for(filenameLast = value.begin()+1; + filenameLast != value.end() && *filenameLast != qc; + ++filenameLast); + } else { + filenameLast = value.end(); + } + value = trimBasename(std::string(value.begin(), filenameLast)); + if(value.empty()) { + continue; + } + filename = urldecode(value); + // continue because there is a chance we can find filename*=... + } else if(*markeritr == '*') { + // See RFC2231 Section4 and draft-reschke-rfc2231-in-http. + // Please note that this function doesn't do charset conversion + // except that if iso-8859-1 is specified, it is converted to + // utf-8. + std::pair paramPair; + split(paramPair, param, '='); + std::string value = paramPair.second; + std::vector extValues; + split(value, std::back_inserter(extValues), "'", false, true); + if(extValues.size() != 3) { + continue; + } + value = trimBasename(extValues[2]); + if(value.empty()) { + continue; + } + value = urldecode(value); + if(extValues[0] == "iso-8859-1") { + value = iso8859ToUtf8(value); + } + filename = value; + break; + } + } + return filename; } std::string randomAlpha(size_t length, const RandomizerHandle& randomizer) { diff --git a/src/util.h b/src/util.h index 8a979074..7e16a77b 100644 --- a/src/util.h +++ b/src/util.h @@ -212,7 +212,9 @@ void parsePrioritizePieceRange size_t pieceLength, uint64_t defaultSize = 1048576 /* 1MiB */); -// this function temporarily put here +// Converts ISO/IEC 8859-1 string src to utf-8. +std::string iso8859ToUtf8(const std::string& src); + std::string getContentDispositionFilename(const std::string& header); std::string randomAlpha(size_t length, @@ -317,7 +319,8 @@ std::map createIndexPathMap(std::istream& i); */ template OutputIterator split(const std::string& src, OutputIterator out, - const std::string& delims, bool doTrim = false) + const std::string& delims, bool doTrim = false, + bool allowEmpty = false) { std::string::size_type p = 0; while(1) { @@ -327,7 +330,7 @@ OutputIterator split(const std::string& src, OutputIterator out, if(doTrim) { term = util::trim(term); } - if(!term.empty()) { + if(allowEmpty || !term.empty()) { *out = term; ++out; } @@ -338,7 +341,7 @@ OutputIterator split(const std::string& src, OutputIterator out, term = util::trim(term); } p = np+1; - if(!term.empty()) { + if(allowEmpty || !term.empty()) { *out = term; ++out; } diff --git a/test/UtilTest.cc b/test/UtilTest.cc index 13bd212e..d8379874 100644 --- a/test/UtilTest.cc +++ b/test/UtilTest.cc @@ -154,11 +154,11 @@ void UtilTest::testSplit() { } void UtilTest::testSplit_many() { - std::deque v1; + std::vector v1; util::split("name1=value1; name2=value2; name3=value3",std::back_inserter(v1), ";", true); - CPPUNIT_ASSERT_EQUAL(3, (int)v1.size()); - std::deque::iterator itr = v1.begin(); + CPPUNIT_ASSERT_EQUAL((size_t)3, v1.size()); + std::vector::iterator itr = v1.begin(); CPPUNIT_ASSERT_EQUAL(std::string("name1=value1"), *itr++); CPPUNIT_ASSERT_EQUAL(std::string("name2=value2"), *itr++); CPPUNIT_ASSERT_EQUAL(std::string("name3=value3"), *itr++); @@ -167,11 +167,28 @@ void UtilTest::testSplit_many() { util::split("name1=value1; name2=value2; name3=value3",std::back_inserter(v1), ";", false); - CPPUNIT_ASSERT_EQUAL(3, (int)v1.size()); + CPPUNIT_ASSERT_EQUAL((size_t)3, v1.size()); itr = v1.begin(); CPPUNIT_ASSERT_EQUAL(std::string("name1=value1"), *itr++); CPPUNIT_ASSERT_EQUAL(std::string(" name2=value2"), *itr++); CPPUNIT_ASSERT_EQUAL(std::string(" name3=value3"), *itr++); + + v1.clear(); + + util::split("k=v", std::back_inserter(v1), ";", false, true); + CPPUNIT_ASSERT_EQUAL((size_t)1, v1.size()); + CPPUNIT_ASSERT_EQUAL(std::string("k=v"), v1[0]); + + v1.clear(); + + util::split(" ", std::back_inserter(v1), ";", true, true); + CPPUNIT_ASSERT_EQUAL((size_t)1, v1.size()); + CPPUNIT_ASSERT_EQUAL(std::string(""), v1[0]); + + v1.clear(); + + util::split(" ", std::back_inserter(v1), ";", true); + CPPUNIT_ASSERT_EQUAL((size_t)0, v1.size()); } void UtilTest::testEndsWith() { @@ -276,7 +293,8 @@ void UtilTest::testGetContentDispositionFilename() { CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h8)); std::string h9 = "attachment; filename=\"aria2.tar.bz2; creation-date=20 Jun 2007 00:00:00 GMT\""; - CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2; creation-date=20 Jun 2007 00:00:00 GMT"), util::getContentDispositionFilename(h9)); + CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), + util::getContentDispositionFilename(h9)); std::string h10 = "attachment; filename="; CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h10)); @@ -295,6 +313,37 @@ void UtilTest::testGetContentDispositionFilename() { std::string currentDir = "attachment; filename=."; CPPUNIT_ASSERT_EQUAL(std::string(), util::getContentDispositionFilename(currentDir)); + // RFC2231 Section4 + std::string extparam1 = "attachment; filename * = UTF-8'ja'filename"; + CPPUNIT_ASSERT_EQUAL(std::string("filename"), + util::getContentDispositionFilename(extparam1)); + std::string extparam2 = "filename*=''aria2"; + CPPUNIT_ASSERT_EQUAL(std::string("aria2"), + util::getContentDispositionFilename(extparam2)); + std::string extparam3 = "filename*='''"; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(extparam3)); + std::string extparam4 = "filename*='aria2"; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(extparam4)); + std::string extparam5 = "filename*='''aria2"; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(extparam5)); + std::string extparam6 = "filename*"; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(extparam6)); + std::string extparam7 = "filename*=UTF-8''aria2;filename=hello%20world"; + CPPUNIT_ASSERT_EQUAL(std::string("aria2"), + util::getContentDispositionFilename(extparam7)); + std::string extparam8 = "filename=aria2;filename*=UTF-8''hello%20world"; + CPPUNIT_ASSERT_EQUAL(std::string("hello world"), + util::getContentDispositionFilename(extparam8)); + std::string extparam9 = "filename*=iso-8859-1''%A3"; + std::string extparam9ans; + extparam9ans += 0xc2; + extparam9ans += 0xa3; + CPPUNIT_ASSERT_EQUAL(extparam9ans, + util::getContentDispositionFilename(extparam9)); } class Printer {