diff --git a/src/util.cc b/src/util.cc index 07981b74..e1eb3d37 100644 --- a/src/util.cc +++ b/src/util.cc @@ -313,6 +313,17 @@ bool inRFC2616HttpToken(const char c) std::find(vbegin(chars), vend(chars), c) != vend(chars); } +bool inRFC5987AttrChar(const char c) +{ + return inRFC2616HttpToken(c) && c != '*' && c != '\'' && c != '%'; +} + +// Returns nonzero if |c| is in ISO/IEC 8859-1 character set. +bool isIso8859p1(unsigned char c) +{ + return (0x20u <= c && c <= 0x7eu) || (0xa0u <= c && c <= 0xffu); +} + bool isLws(const char c) { return c == ' ' || c == '\t'; @@ -715,12 +726,11 @@ void parsePrioritizePieceRange // Converts ISO/IEC 8859-1 string to UTF-8 string. If there is a // character not in ISO/IEC 8859-1, returns empty string. -std::string iso8859ToUtf8(const std::string& src) +std::string iso8859p1ToUtf8(const char* src, size_t len) { std::string dest; - for(std::string::const_iterator itr = src.begin(), eoi = src.end(); - itr != eoi; ++itr) { - unsigned char c = *itr; + for(const char* p = src, *last = src+len; p != last; ++p) { + unsigned char c = *p; if(0xa0u <= c) { if(c <= 0xbfu) { dest += 0xc2u; @@ -729,7 +739,7 @@ std::string iso8859ToUtf8(const std::string& src) } dest += c&(~0x40u); } else if(0x80u <= c && c <= 0x9fu) { - return A2STR::NIL; + return ""; } else { dest += c; } @@ -737,6 +747,11 @@ std::string iso8859ToUtf8(const std::string& src) return dest; } +std::string iso8859p1ToUtf8(const std::string& src) +{ + return iso8859p1ToUtf8(src.c_str(), src.size()); +} + namespace { template void parseParam(OutputIterator out, const std::string& header) @@ -778,112 +793,420 @@ void parseParam(OutputIterator out, const std::string& header) } } // namespace -std::string getContentDispositionFilename(const std::string& header) +/* Start of utf8 dfa */ +/* Copyright (c) 2008-2010 Bjoern Hoehrmann + * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. + * + * Copyright (c) 2008-2009 Bjoern Hoehrmann + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define UTF8_ACCEPT 0 +#define UTF8_REJECT 12 + +static const uint8_t utf8d[] = { + /* + * The first part of the table maps bytes to character classes that + * to reduce the size of the transition table and create bitmasks. + */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + + /* + * The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a state. + */ + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12, +}; + +static uint32_t +utf8dfa(uint32_t* state, uint32_t* codep, uint32_t byte) { + uint32_t type = utf8d[byte]; + + *codep = (*state != UTF8_ACCEPT) ? + (byte & 0x3fu) | (*codep << 6) : + (0xff >> type) & (byte); + + *state = utf8d[256 + *state + type]; + return *state; +} + +/* End of utf8 dfa */ + +typedef enum { + CD_BEFORE_DISPOSITION_TYPE, + CD_AFTER_DISPOSITION_TYPE, + CD_DISPOSITION_TYPE, + CD_BEFORE_DISPOSITION_PARM_NAME, + CD_AFTER_DISPOSITION_PARM_NAME, + CD_DISPOSITION_PARM_NAME, + CD_BEFORE_VALUE, + CD_AFTER_VALUE, + CD_QUOTED_STRING, + CD_TOKEN, + CD_BEFORE_EXT_VALUE, + CD_CHARSET, + CD_LANGUAGE, + CD_VALUE_CHARS, + CD_VALUE_CHARS_PCT_ENCODED1, + CD_VALUE_CHARS_PCT_ENCODED2 +} content_disposition_parse_state; + +typedef enum { + CD_FILENAME_FOUND = 1, + CD_EXT_FILENAME_FOUND = 1 << 1 +} content_disposition_parse_flag; + +typedef enum { + CD_ENC_UNKNOWN, + CD_ENC_UTF8, + CD_ENC_ISO_8859_1 +} content_disposition_charset; + +int parse_content_disposition(char *dest, size_t destlen, + const char **charsetp, size_t *charsetlenp, + const char *in, size_t len) { - static const char A2_KEYNAME[] = "filename"; - std::string filename; - std::vector params; - parseParam(std::back_inserter(params), header); - for(std::vector::const_iterator i = params.begin(), - eoi = params.end(); i != eoi; ++i) { - const std::string& param = *i; - if(!istartsWith(param, A2_KEYNAME) || - param.size() == sizeof(A2_KEYNAME)-1) { - continue; - } - std::string::const_iterator markeritr = param.begin()+sizeof(A2_KEYNAME)-1; - if(*markeritr == '*') { - // See RFC2231 Section4 and draft-reschke-rfc2231-in-http. - // Please note that this function doesn't do charset conversion - // except that if iso-8859-1 is specified, it is converted to - // utf-8. - ++markeritr; - for(; markeritr != param.end() && *markeritr == ' '; ++markeritr); - if(markeritr == param.end() || *markeritr != '=') { - continue; + const char *p = in, *mark_first = NULL, *mark_last = NULL; + int state = CD_BEFORE_DISPOSITION_TYPE; + int in_file_parm = 0; + int flags = 0; + int quoted_seen = 0; + int charset = 0; + /* To suppress warnings */ + char *dp = dest; + size_t dlen = destlen; + uint32_t dfa_state = 0; + uint32_t dfa_code = 0; + uint8_t pctval = 0; + + *charsetp = NULL; + *charsetlenp = 0; + + for(; *p; ++p) { + switch(state) { + case CD_BEFORE_DISPOSITION_TYPE: + if(inRFC2616HttpToken(*p)) { + state = CD_DISPOSITION_TYPE; + } else if(!isLws(*p)) { + return -1; } - std::vector extValues; - splitIter(markeritr+1, param.end(), std::back_inserter(extValues), - '\'', true, true); - if(extValues.size() != 3) { - continue; + break; + case CD_AFTER_DISPOSITION_TYPE: + case CD_DISPOSITION_TYPE: + if(*p == ';') { + state = CD_BEFORE_DISPOSITION_PARM_NAME; + } else if(isLws(*p)) { + state = CD_AFTER_DISPOSITION_TYPE; + } else if(state == CD_AFTER_DISPOSITION_TYPE || + !inRFC2616HttpToken(*p)) { + return -1; } - bool bad = false; - for(std::string::const_iterator j = extValues[0].first, - eoj = extValues[0].second; j != eoj; ++j) { - // Since we first split parameter by ', we can safely assume - // that ' is not included in charset. - if(!inRFC2978MIMECharset(*j)) { - bad = true; - break; + break; + case CD_BEFORE_DISPOSITION_PARM_NAME: + if(inRFC2616HttpToken(*p)) { + mark_first = p; + state = CD_DISPOSITION_PARM_NAME; + } else if(!isLws(*p)) { + return -1; + } + break; + case CD_AFTER_DISPOSITION_PARM_NAME: + case CD_DISPOSITION_PARM_NAME: + if(*p == '=') { + if(state == CD_DISPOSITION_PARM_NAME) { + mark_last = p; } - } - if(bad) { - continue; - } - bad = false; - for(std::string::const_iterator j = extValues[2].first, - eoj = extValues[2].second; j != eoj; ++j){ - if(*j == '%') { - if(j+1 != eoj && isHexDigit(*(j+1)) && - j+2 != eoj && isHexDigit(*(j+2))) { - j += 2; + in_file_parm = 0; + if(strieq(mark_first, mark_last, "filename*")) { + if((flags & CD_EXT_FILENAME_FOUND) == 0) { + in_file_parm = 1; } else { - bad = true; - break; + return -1; } + state = CD_BEFORE_EXT_VALUE; + } else if(strieq(mark_first, mark_last, "filename")) { + if(flags & CD_FILENAME_FOUND) { + return -1; + } + if((flags & CD_EXT_FILENAME_FOUND) == 0) { + in_file_parm = 1; + } + state = CD_BEFORE_VALUE; } else { - if(*j == '*' || *j == '\'' || !inRFC2616HttpToken(*j)) { - bad = true; - break; + /* ext-token must be characters in token, followed by "*" */ + if(mark_first != mark_last-1 && *(mark_last-1) == '*') { + state = CD_BEFORE_EXT_VALUE; + } else { + state = CD_BEFORE_VALUE; + } + } + if(in_file_parm) { + dp = dest; + dlen = destlen; + } + } else if(isLws(*p)) { + mark_last = p; + state = CD_AFTER_DISPOSITION_PARM_NAME; + } else if(state == CD_AFTER_DISPOSITION_PARM_NAME || + !inRFC2616HttpToken(*p)) { + return -1; + } + break; + case CD_BEFORE_VALUE: + if(*p == '"') { + quoted_seen = 0; + state = CD_QUOTED_STRING; + } else if(inRFC2616HttpToken(*p)) { + if(in_file_parm) { + if(dlen == 0) { + return -1; + } else { + *dp++ = *p; + --dlen; + } + } + state = CD_TOKEN; + } else if(!isLws(*p)) { + return -1; + } + break; + case CD_AFTER_VALUE: + if(*p == ';') { + state = CD_BEFORE_DISPOSITION_PARM_NAME; + } else if(!isLws(*p)) { + return -1; + } + break; + case CD_QUOTED_STRING: + if(*p == '\\' && quoted_seen == 0) { + quoted_seen = 1; + } else if(*p == '"' && quoted_seen == 0) { + if(in_file_parm) { + flags |= CD_FILENAME_FOUND; + } + state = CD_AFTER_VALUE; + } else { + /* TEXT which is OCTET except CTLs, but including LWS. We only + accept ISO-8859-1 chars. */ + quoted_seen = 0; + if(!isIso8859p1(*p)) { + return -1; + } + if(in_file_parm) { + if(dlen == 0) { + return -1; + } else { + *dp++ = *p; + --dlen; } } } - if(bad) { - continue; - } - std::string value = - percentDecode(extValues[2].first, extValues[2].second); - if(util::strieq(extValues[0].first, extValues[0].second, "iso-8859-1")) { - value = iso8859ToUtf8(value); - } - if(!detectDirTraversal(value) && value.find("/") == std::string::npos) { - filename = value; - } - if(!filename.empty()) { - break; - } - } else { - for(; markeritr != param.end() && *markeritr == ' '; ++markeritr); - if(markeritr == param.end() || markeritr+1 == param.end() || - *markeritr != '=') { - continue; - } - Scip p = stripIter(markeritr+1, param.end()); - if(p.first == p.second) { - continue; - } - std::string value(p.first, p.second); - std::string::iterator filenameLast; - if(value[0] == '\'' || value[0] == '"') { - char qc = *value.begin(); - for(filenameLast = value.begin()+1; - filenameLast != value.end() && *filenameLast != qc; - ++filenameLast); + break; + case CD_TOKEN: + if(inRFC2616HttpToken(*p)) { + if(in_file_parm) { + if(dlen == 0) { + return -1; + } else { + *dp++ = *p; + --dlen; + } + } + } else if(*p == ';') { + if(in_file_parm) { + flags |= CD_FILENAME_FOUND; + } + state = CD_BEFORE_DISPOSITION_PARM_NAME; + } else if(isLws(*p)) { + if(in_file_parm) { + flags |= CD_FILENAME_FOUND; + } + state = CD_AFTER_VALUE; } else { - filenameLast = value.end(); + return -1; } - std::pair vi = - util::stripIter(value.begin(), filenameLast, "\r\n\t '\""); - value.assign(vi.first, vi.second); - value.erase(std::remove(value.begin(), value.end(), '\\'), value.end()); - if(!detectDirTraversal(value) && value.find("/") == std::string::npos) { - filename = value; + break; + case CD_BEFORE_EXT_VALUE: + if(*p == '\'') { + /* Empty charset is not allowed */ + return -1; + } else if(inRFC2978MIMECharset(*p)) { + mark_first = p; + state = CD_CHARSET; + } else if(!isLws(*p)) { + return -1; } - // continue because there is a chance we can find filename*=... + break; + case CD_CHARSET: + if(*p == '\'') { + mark_last = p; + *charsetp = mark_first; + *charsetlenp = mark_last - mark_first; + if(strieq(mark_first, mark_last, "utf-8")) { + charset = CD_ENC_UTF8; + dfa_state = UTF8_ACCEPT; + dfa_code = 0; + } else if(strieq(mark_first, mark_last, "iso-8859-1")) { + charset = CD_ENC_ISO_8859_1; + } else { + charset = CD_ENC_UNKNOWN; + } + state = CD_LANGUAGE; + } else if(!inRFC2978MIMECharset(*p)) { + return -1; + } + break; + case CD_LANGUAGE: + if(*p == '\'') { + if(in_file_parm) { + dp = dest; + dlen = destlen; + } + state = CD_VALUE_CHARS; + } else if(*p != '-' && !isAlpha(*p) && !isDigit(*p)) { + return -1; + } + break; + case CD_VALUE_CHARS: + if(inRFC5987AttrChar(*p)) { + if(charset == CD_ENC_UTF8) { + if(utf8dfa(&dfa_state, &dfa_code, *p) == UTF8_REJECT) { + return -1; + } + } + if(in_file_parm) { + if(dlen == 0) { + return -1; + } else { + *dp++ = *p; + --dlen; + } + } + } else if(*p == '%') { + if(in_file_parm) { + if(dlen == 0) { + return -1; + } + } + pctval = 0; + state = CD_VALUE_CHARS_PCT_ENCODED1; + } else if(*p == ';' || isLws(*p)) { + if(charset == CD_ENC_UTF8 && dfa_state != UTF8_ACCEPT) { + return -1; + } + if(in_file_parm) { + flags |= CD_EXT_FILENAME_FOUND; + } + if(*p == ';') { + state = CD_BEFORE_DISPOSITION_PARM_NAME; + } else { + state = CD_AFTER_VALUE; + } + } else if(!inRFC5987AttrChar(*p)) { + return -1; + } + break; + case CD_VALUE_CHARS_PCT_ENCODED1: + if(isHexDigit(*p)) { + pctval |= hexCharToUInt(*p) << 4; + state = CD_VALUE_CHARS_PCT_ENCODED2; + } else { + return -1; + } + break; + case CD_VALUE_CHARS_PCT_ENCODED2: + if(isHexDigit(*p)) { + pctval |= hexCharToUInt(*p); + if(charset == CD_ENC_UTF8) { + if(utf8dfa(&dfa_state, &dfa_code, pctval) == UTF8_REJECT) { + return -1; + } + } else if(charset == CD_ENC_ISO_8859_1) { + if(!isIso8859p1(pctval)) { + return -1; + } + } + if(in_file_parm) { + *dp++ = pctval; + --dlen; + } + state = CD_VALUE_CHARS; + } else { + return -1; + } + break; + } + } + switch(state) { + case CD_BEFORE_DISPOSITION_TYPE: + case CD_AFTER_DISPOSITION_TYPE: + case CD_DISPOSITION_TYPE: + case CD_AFTER_VALUE: + case CD_TOKEN: + return destlen-dlen; + case CD_VALUE_CHARS: + if(charset == CD_ENC_UTF8 && dfa_state != UTF8_ACCEPT) { + return -1; + } + return destlen - dlen; + default: + return -1; + } +} + +std::string getContentDispositionFilename(const std::string& header) +{ + char cdval[1024]; + size_t cdvallen = sizeof(cdval); + const char* charset; + size_t charsetlen; + int rv = parse_content_disposition(cdval, cdvallen, &charset, &charsetlen, + header.c_str(), header.size()); + if(rv == -1) { + return ""; + } else { + std::string res; + if(!charset || strieq(charset, charset+charsetlen, "iso-8859-1")) { + res = iso8859p1ToUtf8(cdval, rv); + } else { + res.assign(cdval, rv); + } + if(!detectDirTraversal(res) && + res.find_first_of("/\\") == std::string::npos) { + return res; + } else { + return ""; } } - return filename; } std::string toUpper(const std::string& src) { diff --git a/src/util.h b/src/util.h index fecbc490..778bfdaf 100644 --- a/src/util.h +++ b/src/util.h @@ -215,6 +215,15 @@ bool inRFC3986ReservedChars(const char c); bool inRFC3986UnreservedChars(const char c); +bool inRFC2978MIMECharset(const char c); + +bool inRFC2616HttpToken(const char c); + +bool inRFC5987AttrChar(const char c); + +// Returns true if |c| is in ISO/IEC 8859-1 character set. +bool isIso8859p1(unsigned char c); + bool isUtf8(const std::string& str); std::string percentDecode @@ -285,7 +294,27 @@ void parsePrioritizePieceRange int64_t defaultSize = 1048576 /* 1MiB */); // Converts ISO/IEC 8859-1 string src to utf-8. -std::string iso8859ToUtf8(const std::string& src); +std::string iso8859p1ToUtf8(const char* src, size_t len); +std::string iso8859p1ToUtf8(const std::string& src); + +// Parses Content-Disposition header field value |in| with its length +// |len| in a manner conforming to RFC 6266 and extracts filename +// value and copies it to the region pointed by |dest|. The |destlen| +// specifies the capacity of the |dest|. This function does not store +// NUL character after filename in |dest|. This function does not +// support RFC 2231 Continuation. If the function sees RFC 2231/5987 +// encoding and charset, it stores its first pointer to |*charsetp| +// and its length in |*charsetlenp|. Otherwise, they are NULL and 0 +// respectively. In RFC 2231/5987 encoding, percent-encoded string +// will be decoded to original form and stored in |dest|. +// +// This function returns the number of written bytes in |dest| if it +// succeeds, or -1. If there is enough room to store filename in +// |dest|, this function returns -1. If this function returns -1, the +// |dest|, |*charsetp| and |*charsetlenp| are undefined. +int parse_content_disposition(char *dest, size_t destlen, + const char **charsetp, size_t *charsetlenp, + const char *in, size_t len); std::string getContentDispositionFilename(const std::string& header); diff --git a/test/UtilTest.cc b/test/UtilTest.cc index ba00d799..6847482f 100644 --- a/test/UtilTest.cc +++ b/test/UtilTest.cc @@ -40,6 +40,7 @@ class UtilTest:public CppUnit::TestFixture { CPPUNIT_TEST(testIstartsWith); // may be moved to other helper class in the future. CPPUNIT_TEST(testGetContentDispositionFilename); + CPPUNIT_TEST(testParseContentDisposition); CPPUNIT_TEST(testToUpper); CPPUNIT_TEST(testToLower); CPPUNIT_TEST(testUppercase); @@ -107,6 +108,7 @@ public: void testIstartsWith(); // may be moved to other helper class in the future. void testGetContentDispositionFilename(); + void testParseContentDisposition(); void testToUpper(); void testToLower(); void testUppercase(); @@ -816,136 +818,636 @@ void UtilTest::testIstartsWith() { } void UtilTest::testGetContentDispositionFilename() { - std::string h1 = "attachment; filename=\"aria2.tar.bz2\""; - CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h1)); + std::string val; - std::string h2 = "attachment; filename=\"\""; - CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h2)); - - std::string h3 = "attachment; filename=\""; - CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h3)); - - std::string h3_2 = "attachment; filename= \" aria2.tar.bz2 \""; + val = "attachment; filename=\"aria2.tar.bz2\""; CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), - util::getContentDispositionFilename(h3_2)); + util::getContentDispositionFilename(val)); - std::string h4 = "attachment;"; - CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h4)); - - std::string h5 = "attachment; filename=aria2.tar.bz2"; - CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h5)); - - std::string h6 = "attachment; filename='aria2.tar.bz2'"; - CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h6)); - - std::string h7 = "attachment; filename='aria2.tar.bz2"; - CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h7)); - - std::string h8 = "attachment; filename=aria2.tar.bz2; creation-date=20 Jun 2007 00:00:00 GMT"; - CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h8)); - - std::string h9 = "attachment; filename=\"aria2.tar.bz2; creation-date=20 Jun 2007 00:00:00 GMT\""; - CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2; creation-date=20 Jun 2007 00:00:00 GMT"), - util::getContentDispositionFilename(h9)); - - std::string h10 = "attachment; filename="; - CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h10)); - - std::string h11 = "attachment; filename=;"; - CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h11)); - - std::string filenameWithDir = "attachment; filename=dir/file"; + val = "attachment; filename=\"\""; CPPUNIT_ASSERT_EQUAL(std::string(""), - util::getContentDispositionFilename(filenameWithDir)); + util::getContentDispositionFilename(val)); - std::string semicolonInside = "attachment; filename=\"foo;bar\""; - CPPUNIT_ASSERT_EQUAL(std::string("foo;bar"), - util::getContentDispositionFilename(semicolonInside)); + val = "attachment; filename=\""; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(val)); + + val = "attachment; filename= \" aria2.tar.bz2 \""; + CPPUNIT_ASSERT_EQUAL(std::string(" aria2.tar.bz2 "), + util::getContentDispositionFilename(val)); + + val = "attachment; filename=dir/file"; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(val)); + + val = "attachment; filename=dir\\file"; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(val)); + + val = "attachment; filename=\"dir/file\""; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(val)); + + val = "attachment; filename=\"dir\\\\file\""; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(val)); + + val = "attachment; filename=\"/etc/passwd\""; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(val)); + + val = "attachment; filename=\"..\""; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(val)); + + val = "attachment; filename=.."; + CPPUNIT_ASSERT_EQUAL(std::string(""), + util::getContentDispositionFilename(val)); // Unescaping %2E%2E%2F produces "../". But since we won't unescape, // we just accept it as is. - CPPUNIT_ASSERT_EQUAL - (std::string("%2E%2E%2Ffoo.html"), - util::getContentDispositionFilename("filename=\"%2E%2E%2Ffoo.html\"")); + val = "attachment; filename=\"%2E%2E%2Ffoo.html\""; + CPPUNIT_ASSERT_EQUAL(std::string("%2E%2E%2Ffoo.html"), + util::getContentDispositionFilename(val)); - // RFC2231 Section4 - std::string extparam2 = "filename*=''aria2"; - CPPUNIT_ASSERT_EQUAL(std::string("aria2"), - util::getContentDispositionFilename(extparam2)); - std::string extparam3 = "filename*='''"; - CPPUNIT_ASSERT_EQUAL(std::string(""), - util::getContentDispositionFilename(extparam3)); - std::string extparam4 = "filename*='aria2"; - CPPUNIT_ASSERT_EQUAL(std::string(""), - util::getContentDispositionFilename(extparam4)); - std::string extparam5 = "filename*='''aria2"; - CPPUNIT_ASSERT_EQUAL(std::string(""), - util::getContentDispositionFilename(extparam5)); - std::string extparam6 = "filename*"; - CPPUNIT_ASSERT_EQUAL(std::string(""), - util::getContentDispositionFilename(extparam6)); - std::string extparam7 = "filename*=UTF-8''aria2;filename=hello%20world"; - CPPUNIT_ASSERT_EQUAL(std::string("aria2"), - util::getContentDispositionFilename(extparam7)); - std::string extparam8 = "filename=aria2;filename*=UTF-8''hello%20world"; - CPPUNIT_ASSERT_EQUAL(std::string("hello world"), - util::getContentDispositionFilename(extparam8)); - std::string extparam9 = "filename*=ISO-8859-1''%A3"; - std::string extparam9ans; - extparam9ans += 0xc2; - extparam9ans += 0xa3; - CPPUNIT_ASSERT_EQUAL(extparam9ans, - util::getContentDispositionFilename(extparam9)); - CPPUNIT_ASSERT_EQUAL - (std::string(""), - util::getContentDispositionFilename("filename*=UTF-8''foo%2F.html")); - CPPUNIT_ASSERT_EQUAL - (std::string("foo.html"), - util::getContentDispositionFilename("filename*=UTF-8'';filename=\"foo.html\"")); - CPPUNIT_ASSERT_EQUAL - (std::string(""), - util::getContentDispositionFilename("filename*=UTF-8''%2E%2E%2Ffoo.html")); + // iso-8859-1 string will be converted to utf-8. + val = "attachment; filename*=iso-8859-1''foo-%E4.html"; + CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"), + util::getContentDispositionFilename(val)); + + val = "attachment; filename*= UTF-8''foo-%c3%a4.html"; + CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"), + util::getContentDispositionFilename(val)); + + // iso-8859-1 string will be converted to utf-8. + val = "attachment; filename=\"foo-%E4.html\""; + val = util::percentDecode(val.begin(), val.end()); + CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"), + util::getContentDispositionFilename(val)); +} + +void UtilTest::testParseContentDisposition() { + char dest[1024]; + size_t destlen = sizeof(dest); + const char *cs; + size_t cslen; + std::string val; + + // test cases from http://greenbytes.de/tech/tc2231/ + // inlonly + val = "inline"; + CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // inlonlyquoted + val = "\"inline\""; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // inlwithasciifilename + val = "inline; filename=\"foo.html\""; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + // inlwithfnattach + val = "inline; filename=\"Not an attachment!\""; + CPPUNIT_ASSERT_EQUAL(18, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("Not an attachment!"), + std::string(&dest[0], &dest[18])); + + // inlwithasciifilenamepdf + val = "inline; filename=\"foo.pdf\""; + CPPUNIT_ASSERT_EQUAL(7, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.pdf"), + std::string(&dest[0], &dest[7])); + + // attwithasciifilename25 + val = "attachment; filename=\"0000000000111111111122222\""; + CPPUNIT_ASSERT_EQUAL(25, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("0000000000111111111122222"), + std::string(&dest[0], &dest[25])); + + // attwithasciifilename35 + val = "attachment; filename=\"00000000001111111111222222222233333\""; + CPPUNIT_ASSERT_EQUAL(35, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("00000000001111111111222222222233333"), + std::string(&dest[0], &dest[35])); - // Tests from http://greenbytes.de/tech/tc2231/ // attwithasciifnescapedchar - CPPUNIT_ASSERT_EQUAL - (std::string("foo.html"), - util::getContentDispositionFilename("filename=\"f\\oo.html\"")); + val = "attachment; filename=\"f\\oo.html\""; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + // attwithasciifnescapedquote + val = "attachment; filename=\"\\\"quoting\\\" tested.html\""; + CPPUNIT_ASSERT_EQUAL(21, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("\"quoting\" tested.html"), + std::string(&dest[0], &dest[21])); + + // attwithquotedsemicolon + val = "attachment; filename=\"Here's a semicolon;.html\""; + CPPUNIT_ASSERT_EQUAL(24, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("Here's a semicolon;.html"), + std::string(&dest[0], &dest[24])); + + // attwithfilenameandextparam + val = "attachment; foo=\"bar\"; filename=\"foo.html\""; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + // attwithfilenameandextparamescaped + val = "attachment; foo=\"\\\"\\\\\";filename=\"foo.html\""; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + // attwithasciifilenameucase - CPPUNIT_ASSERT_EQUAL - (std::string("foo.html"), - util::getContentDispositionFilename("FILENAME=\"foo.html\"")); + val = "attachment; FILENAME=\"foo.html\""; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + // attwithasciifilenamenq + val = "attachment; filename=foo.html"; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + // attwithtokfncommanq + val = "attachment; filename=foo,bar.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attwithasciifilenamenqs + val = "attachment; filename=foo.html ;"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attemptyparam + val = "attachment; ;filename=foo"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attwithasciifilenamenqws + val = "attachment; filename=foo bar.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attwithfntokensq + val = "attachment; filename='foo.bar'"; + CPPUNIT_ASSERT_EQUAL(9, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("'foo.bar'"), + std::string(&dest[0], &dest[9])); + + // attwithisofnplain + // attachment; filename="foo-ä.html" + val = "attachment; filename=\"foo-%E4.html\""; + val = util::percentDecode(val.begin(), val.end()); + CPPUNIT_ASSERT_EQUAL(10, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"), + util::iso8859p1ToUtf8(std::string(&dest[0], &dest[10]))); + + // attwithutf8fnplain + // attachment; filename="foo-ä.html" + val = "attachment; filename=\"foo-%C3%A4.html\""; + val = util::percentDecode(val.begin(), val.end()); + CPPUNIT_ASSERT_EQUAL(11, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"), + util::iso8859p1ToUtf8(std::string(&dest[0], &dest[11]))); + + // attwithfnrawpctenca + val = "attachment; filename=\"foo-%41.html\""; + CPPUNIT_ASSERT_EQUAL(12, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo-%41.html"), + std::string(&dest[0], &dest[12])); + + // attwithfnusingpct + val = "attachment; filename=\"50%.html\""; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("50%.html"), + std::string(&dest[0], &dest[8])); + + // attwithfnrawpctencaq + val = "attachment; filename=\"foo-%\\41.html\""; + CPPUNIT_ASSERT_EQUAL(12, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo-%41.html"), + std::string(&dest[0], &dest[12])); + + // attwithnamepct + val = "attachment; name=\"foo-%41.html\""; + CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attwithfilenamepctandiso + // attachment; filename="ä-%41.html" + val = "attachment; filename=\"%E4-%2541.html\""; + val = util::percentDecode(val.begin(), val.end()); + CPPUNIT_ASSERT_EQUAL(10, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("ä-%41.html"), + util::iso8859p1ToUtf8(std::string(&dest[0], &dest[10]))); + + // attwithfnrawpctenclong + val = "attachment; filename=\"foo-%c3%a4-%e2%82%ac.html\""; + CPPUNIT_ASSERT_EQUAL(25, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo-%c3%a4-%e2%82%ac.html"), + std::string(&dest[0], &dest[25])); + + // attwithasciifilenamews1 + val = "attachment; filename =\"foo.html\""; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + // attwith2filenames + val = "attachment; filename=\"foo.html\"; filename=\"bar.html\""; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attfnbrokentoken + val = "attachment; filename=foo[1](2).html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attfnbrokentokeniso + val = "attachment; filename=foo-%E4.html"; + val = util::percentDecode(val.begin(), val.end()); + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attfnbrokentokenutf + // attachment; filename=foo-ä.html + val = "attachment; filename=foo-ä.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attmissingdisposition + val = "filename=foo.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attmissingdisposition2 + val = "x=y; filename=foo.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attmissingdisposition3 + val = "\"foo; filename=bar;baz\"; filename=qux"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attmissingdisposition4 + val = "filename=foo.html, filename=bar.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // emptydisposition + val = "; filename=foo.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // doublecolon + val = ": inline; attachment; filename=foo.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attandinline + val = "inline; attachment; filename=foo.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attandinline2 + val = "attachment; inline; filename=foo.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attbrokenquotedfn + val = "attachment; filename=\"foo.html\".txt"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attbrokenquotedfn2 + val = "attachment; filename=\"bar"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attbrokenquotedfn3 + val = "attachment; filename=foo\"bar;baz\"qux"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attmultinstances + val = "attachment; filename=foo.html, attachment; filename=bar.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attmissingdelim + val = "attachment; foo=foo filename=bar"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attmissingdelim2 + val = "attachment; filename=bar foo=foo "; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attmissingdelim3 + val = "attachment filename=bar"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attreversed + val = "filename=foo.html; attachment"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attconfusedparam + val = "attachment; xfilename=foo.html"; + CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attabspath + val = "attachment; filename=\"/foo.html\""; + CPPUNIT_ASSERT_EQUAL(9, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("/foo.html"), + std::string(&dest[0], &dest[9])); + + // attabspathwin + val = "attachment; filename=\"\\\\foo.html\""; + CPPUNIT_ASSERT_EQUAL(9, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("\\foo.html"), + std::string(&dest[0], &dest[9])); + + // attcdate + val = "attachment; creation-date=\"Wed, 12 Feb 1997 16:29:51 -0500\""; + CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // dispext + val = "foobar"; + CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // dispextbadfn + val = "attachment; example=\"filename=example.txt\""; + CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + // attwithisofn2231iso - CPPUNIT_ASSERT_EQUAL - (std::string("foo-ä.html"), - util::getContentDispositionFilename("filename*=iso-8859-1''foo-%E4.html")); + val = "attachment; filename*=iso-8859-1''foo-%E4.html"; + CPPUNIT_ASSERT_EQUAL(10, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("iso-8859-1"), std::string(cs, cslen)); + CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"), + util::iso8859p1ToUtf8(std::string(&dest[0], &dest[10]))); + // attwithfn2231utf8 - CPPUNIT_ASSERT_EQUAL - (std::string("foo-ä-€.html"), - util::getContentDispositionFilename - ("filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html")); + val = "attachment; filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html"; + CPPUNIT_ASSERT_EQUAL(15, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("UTF-8"), std::string(cs, cslen)); + CPPUNIT_ASSERT_EQUAL(std::string("foo-ä-€.html"), + std::string(&dest[0], &dest[15])); + + // attwithfn2231noc + val = "attachment; filename*=''foo-%c3%a4-%e2%82%ac.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attwithfn2231utf8comp + val = "attachment; filename*=UTF-8''foo-a%cc%88.html"; + CPPUNIT_ASSERT_EQUAL(12, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + val = "foo-a%cc%88.html"; + CPPUNIT_ASSERT_EQUAL(std::string(util::percentDecode(val.begin(), + val.end())), + std::string(&dest[0], &dest[12])); + // attwithfn2231utf8-bad - CPPUNIT_ASSERT_EQUAL - (std::string(""), - util::getContentDispositionFilename - ("filename*=iso-8859-1''foo-%c3%a4-%e2%82%ac.html")); + val = "attachment; filename*=iso-8859-1''foo-%c3%a4-%e2%82%ac.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attwithfn2231iso-bad + val = "attachment; filename*=utf-8''foo-%E4.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + // attwithfn2231ws1 - CPPUNIT_ASSERT_EQUAL - (std::string(""), - util::getContentDispositionFilename("filename *=UTF-8''foo-%c3%a4.html")); + val = "attachment; filename *=UTF-8''foo-%c3%a4.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + // attwithfn2231ws2 - CPPUNIT_ASSERT_EQUAL - (std::string("foo-ä.html"), - util::getContentDispositionFilename("filename*= UTF-8''foo-%c3%a4.html")); + val = "attachment; filename*= UTF-8''foo-%c3%a4.html"; + CPPUNIT_ASSERT_EQUAL(11, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"), + std::string(&dest[0], &dest[11])); + // attwithfn2231ws3 - CPPUNIT_ASSERT_EQUAL - (std::string("foo-ä.html"), - util::getContentDispositionFilename("filename* =UTF-8''foo-%c3%a4.html")); + val = "attachment; filename* =UTF-8''foo-%c3%a4.html"; + CPPUNIT_ASSERT_EQUAL(11, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"), + std::string(&dest[0], &dest[11])); + // attwithfn2231quot - CPPUNIT_ASSERT_EQUAL - (std::string(""), - util::getContentDispositionFilename - ("filename*=\"UTF-8''foo-%c3%a4.html\"")); + val = "attachment; filename*=\"UTF-8''foo-%c3%a4.html\""; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attwithfn2231quot2 + val = "attachment; filename*=\"foo%20bar.html\""; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attwithfn2231singleqmissing + val = "attachment; filename*=UTF-8'foo-%c3%a4.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attwithfn2231nbadpct1 + val = "attachment; filename*=UTF-8''foo%"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attwithfn2231nbadpct2 + val = "attachment; filename*=UTF-8''f%oo.html"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attwithfn2231dpct + val = "attachment; filename*=UTF-8''A-%2541.html"; + CPPUNIT_ASSERT_EQUAL(10, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("A-%41.html"), + std::string(&dest[0], &dest[10])); + + // attwithfn2231abspathdisguised + val = "attachment; filename*=UTF-8''%5cfoo.html"; + CPPUNIT_ASSERT_EQUAL(9, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("\\foo.html"), + std::string(&dest[0], &dest[9])); + + // attfnboth + val = "attachment; filename=\"foo-ae.html\"; filename*=UTF-8''foo-%c3%a4.html"; + CPPUNIT_ASSERT_EQUAL(11, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"), + std::string(&dest[0], &dest[11])); + + // attfnboth2 + val = "attachment; filename*=UTF-8''foo-%c3%a4.html; filename=\"foo-ae.html\""; + CPPUNIT_ASSERT_EQUAL(11, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"), + std::string(&dest[0], &dest[11])); + + // attfnboth3 + val = "attachment; filename*0*=ISO-8859-15''euro-sign%3d%a4; filename*=ISO-8859-1''currency-sign%3d%a4"; + CPPUNIT_ASSERT_EQUAL(15, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("ISO-8859-1"), std::string(cs, cslen)); + CPPUNIT_ASSERT_EQUAL(std::string("currency-sign=¤"), + util::iso8859p1ToUtf8(std::string(&dest[0], &dest[15]))); + + // attnewandfn + val = "attachment; foobar=x; filename=\"foo.html\""; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + // attrfc2047token + val = "attachment; filename==?ISO-8859-1?Q?foo-=E4.html?="; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // attrfc2047quoted + val = "attachment; filename=\"=?ISO-8859-1?Q?foo-=E4.html?=\""; + CPPUNIT_ASSERT_EQUAL(29, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("=?ISO-8859-1?Q?foo-=E4.html?="), + std::string(&dest[0], &dest[29])); + + // aria2 original testcases + + // zero-length filename. token cannot be empty, so this is invalid. + val = "attachment; filename="; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // zero-length filename. quoted-string can be empty string, so this + // is ok. + val = "attachment; filename=\"\""; + CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // empty value is not allowed + val = "attachment; filename=;"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // / is not valid char in token. + val = "attachment; filename=dir/file"; + CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + + // value-chars is *(pct-encoded / attr-char), so empty string is + // allowed. + val = "attachment; filename*=UTF-8''"; + CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("UTF-8"), std::string(cs, cslen)); + + val = "attachment; filename*=UTF-8''; filename=foo"; + CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("UTF-8"), std::string(cs, cslen)); + + val = "attachment; filename*=UTF-8'' ; filename=foo"; + CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("UTF-8"), std::string(cs, cslen)); + + // with language + val = "attachment; filename*=UTF-8'japanese'konnichiwa"; + CPPUNIT_ASSERT_EQUAL(10, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("konnichiwa"), + std::string(&dest[0], &dest[10])); + + // lws before and after "=" + val = "attachment; filename = foo.html"; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + // lws before and after "=" with quoted-string + val = "attachment; filename = \"foo.html\""; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + // lws after parm + val = "attachment; filename=foo.html "; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + val = "attachment; filename=foo.html ; hello=world"; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + val = "attachment; filename=\"foo.html\" "; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + val = "attachment; filename=\"foo.html\" ; hello=world"; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); + + val = "attachment; filename*=UTF-8''foo.html ; hello=world"; + CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition + (dest, destlen, &cs, &cslen, val.c_str(), val.size())); + CPPUNIT_ASSERT_EQUAL(std::string("foo.html"), + std::string(&dest[0], &dest[8])); } class Printer {