Content-Disposition parser conforming to RFC 6266.

RFC 2231 Continuation is not supported.
pull/31/head
Tatsuhiro Tsujikawa 2012-10-27 11:41:56 +09:00
parent 21c3903af0
commit 118aed9c24
3 changed files with 1062 additions and 208 deletions

View File

@ -313,6 +313,17 @@ bool inRFC2616HttpToken(const char c)
std::find(vbegin(chars), vend(chars), c) != vend(chars);
}
bool inRFC5987AttrChar(const char c)
{
return inRFC2616HttpToken(c) && c != '*' && c != '\'' && c != '%';
}
// Returns nonzero if |c| is in ISO/IEC 8859-1 character set.
bool isIso8859p1(unsigned char c)
{
return (0x20u <= c && c <= 0x7eu) || (0xa0u <= c && c <= 0xffu);
}
bool isLws(const char c)
{
return c == ' ' || c == '\t';
@ -715,12 +726,11 @@ void parsePrioritizePieceRange
// Converts ISO/IEC 8859-1 string to UTF-8 string. If there is a
// character not in ISO/IEC 8859-1, returns empty string.
std::string iso8859ToUtf8(const std::string& src)
std::string iso8859p1ToUtf8(const char* src, size_t len)
{
std::string dest;
for(std::string::const_iterator itr = src.begin(), eoi = src.end();
itr != eoi; ++itr) {
unsigned char c = *itr;
for(const char* p = src, *last = src+len; p != last; ++p) {
unsigned char c = *p;
if(0xa0u <= c) {
if(c <= 0xbfu) {
dest += 0xc2u;
@ -729,7 +739,7 @@ std::string iso8859ToUtf8(const std::string& src)
}
dest += c&(~0x40u);
} else if(0x80u <= c && c <= 0x9fu) {
return A2STR::NIL;
return "";
} else {
dest += c;
}
@ -737,6 +747,11 @@ std::string iso8859ToUtf8(const std::string& src)
return dest;
}
std::string iso8859p1ToUtf8(const std::string& src)
{
return iso8859p1ToUtf8(src.c_str(), src.size());
}
namespace {
template<typename OutputIterator>
void parseParam(OutputIterator out, const std::string& header)
@ -778,112 +793,420 @@ void parseParam(OutputIterator out, const std::string& header)
}
} // namespace
/* Start of utf8 dfa */
/* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
*
* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define UTF8_ACCEPT 0
#define UTF8_REJECT 12
static const uint8_t utf8d[] = {
/*
* The first part of the table maps bytes to character classes that
* to reduce the size of the transition table and create bitmasks.
*/
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
/*
* The second part is a transition table that maps a combination
* of a state of the automaton and a character class to a state.
*/
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
12,36,12,12,12,12,12,12,12,12,12,12,
};
static uint32_t
utf8dfa(uint32_t* state, uint32_t* codep, uint32_t byte) {
uint32_t type = utf8d[byte];
*codep = (*state != UTF8_ACCEPT) ?
(byte & 0x3fu) | (*codep << 6) :
(0xff >> type) & (byte);
*state = utf8d[256 + *state + type];
return *state;
}
/* End of utf8 dfa */
typedef enum {
CD_BEFORE_DISPOSITION_TYPE,
CD_AFTER_DISPOSITION_TYPE,
CD_DISPOSITION_TYPE,
CD_BEFORE_DISPOSITION_PARM_NAME,
CD_AFTER_DISPOSITION_PARM_NAME,
CD_DISPOSITION_PARM_NAME,
CD_BEFORE_VALUE,
CD_AFTER_VALUE,
CD_QUOTED_STRING,
CD_TOKEN,
CD_BEFORE_EXT_VALUE,
CD_CHARSET,
CD_LANGUAGE,
CD_VALUE_CHARS,
CD_VALUE_CHARS_PCT_ENCODED1,
CD_VALUE_CHARS_PCT_ENCODED2
} content_disposition_parse_state;
typedef enum {
CD_FILENAME_FOUND = 1,
CD_EXT_FILENAME_FOUND = 1 << 1
} content_disposition_parse_flag;
typedef enum {
CD_ENC_UNKNOWN,
CD_ENC_UTF8,
CD_ENC_ISO_8859_1
} content_disposition_charset;
int parse_content_disposition(char *dest, size_t destlen,
const char **charsetp, size_t *charsetlenp,
const char *in, size_t len)
{
const char *p = in, *mark_first = NULL, *mark_last = NULL;
int state = CD_BEFORE_DISPOSITION_TYPE;
int in_file_parm = 0;
int flags = 0;
int quoted_seen = 0;
int charset = 0;
/* To suppress warnings */
char *dp = dest;
size_t dlen = destlen;
uint32_t dfa_state = 0;
uint32_t dfa_code = 0;
uint8_t pctval = 0;
*charsetp = NULL;
*charsetlenp = 0;
for(; *p; ++p) {
switch(state) {
case CD_BEFORE_DISPOSITION_TYPE:
if(inRFC2616HttpToken(*p)) {
state = CD_DISPOSITION_TYPE;
} else if(!isLws(*p)) {
return -1;
}
break;
case CD_AFTER_DISPOSITION_TYPE:
case CD_DISPOSITION_TYPE:
if(*p == ';') {
state = CD_BEFORE_DISPOSITION_PARM_NAME;
} else if(isLws(*p)) {
state = CD_AFTER_DISPOSITION_TYPE;
} else if(state == CD_AFTER_DISPOSITION_TYPE ||
!inRFC2616HttpToken(*p)) {
return -1;
}
break;
case CD_BEFORE_DISPOSITION_PARM_NAME:
if(inRFC2616HttpToken(*p)) {
mark_first = p;
state = CD_DISPOSITION_PARM_NAME;
} else if(!isLws(*p)) {
return -1;
}
break;
case CD_AFTER_DISPOSITION_PARM_NAME:
case CD_DISPOSITION_PARM_NAME:
if(*p == '=') {
if(state == CD_DISPOSITION_PARM_NAME) {
mark_last = p;
}
in_file_parm = 0;
if(strieq(mark_first, mark_last, "filename*")) {
if((flags & CD_EXT_FILENAME_FOUND) == 0) {
in_file_parm = 1;
} else {
return -1;
}
state = CD_BEFORE_EXT_VALUE;
} else if(strieq(mark_first, mark_last, "filename")) {
if(flags & CD_FILENAME_FOUND) {
return -1;
}
if((flags & CD_EXT_FILENAME_FOUND) == 0) {
in_file_parm = 1;
}
state = CD_BEFORE_VALUE;
} else {
/* ext-token must be characters in token, followed by "*" */
if(mark_first != mark_last-1 && *(mark_last-1) == '*') {
state = CD_BEFORE_EXT_VALUE;
} else {
state = CD_BEFORE_VALUE;
}
}
if(in_file_parm) {
dp = dest;
dlen = destlen;
}
} else if(isLws(*p)) {
mark_last = p;
state = CD_AFTER_DISPOSITION_PARM_NAME;
} else if(state == CD_AFTER_DISPOSITION_PARM_NAME ||
!inRFC2616HttpToken(*p)) {
return -1;
}
break;
case CD_BEFORE_VALUE:
if(*p == '"') {
quoted_seen = 0;
state = CD_QUOTED_STRING;
} else if(inRFC2616HttpToken(*p)) {
if(in_file_parm) {
if(dlen == 0) {
return -1;
} else {
*dp++ = *p;
--dlen;
}
}
state = CD_TOKEN;
} else if(!isLws(*p)) {
return -1;
}
break;
case CD_AFTER_VALUE:
if(*p == ';') {
state = CD_BEFORE_DISPOSITION_PARM_NAME;
} else if(!isLws(*p)) {
return -1;
}
break;
case CD_QUOTED_STRING:
if(*p == '\\' && quoted_seen == 0) {
quoted_seen = 1;
} else if(*p == '"' && quoted_seen == 0) {
if(in_file_parm) {
flags |= CD_FILENAME_FOUND;
}
state = CD_AFTER_VALUE;
} else {
/* TEXT which is OCTET except CTLs, but including LWS. We only
accept ISO-8859-1 chars. */
quoted_seen = 0;
if(!isIso8859p1(*p)) {
return -1;
}
if(in_file_parm) {
if(dlen == 0) {
return -1;
} else {
*dp++ = *p;
--dlen;
}
}
}
break;
case CD_TOKEN:
if(inRFC2616HttpToken(*p)) {
if(in_file_parm) {
if(dlen == 0) {
return -1;
} else {
*dp++ = *p;
--dlen;
}
}
} else if(*p == ';') {
if(in_file_parm) {
flags |= CD_FILENAME_FOUND;
}
state = CD_BEFORE_DISPOSITION_PARM_NAME;
} else if(isLws(*p)) {
if(in_file_parm) {
flags |= CD_FILENAME_FOUND;
}
state = CD_AFTER_VALUE;
} else {
return -1;
}
break;
case CD_BEFORE_EXT_VALUE:
if(*p == '\'') {
/* Empty charset is not allowed */
return -1;
} else if(inRFC2978MIMECharset(*p)) {
mark_first = p;
state = CD_CHARSET;
} else if(!isLws(*p)) {
return -1;
}
break;
case CD_CHARSET:
if(*p == '\'') {
mark_last = p;
*charsetp = mark_first;
*charsetlenp = mark_last - mark_first;
if(strieq(mark_first, mark_last, "utf-8")) {
charset = CD_ENC_UTF8;
dfa_state = UTF8_ACCEPT;
dfa_code = 0;
} else if(strieq(mark_first, mark_last, "iso-8859-1")) {
charset = CD_ENC_ISO_8859_1;
} else {
charset = CD_ENC_UNKNOWN;
}
state = CD_LANGUAGE;
} else if(!inRFC2978MIMECharset(*p)) {
return -1;
}
break;
case CD_LANGUAGE:
if(*p == '\'') {
if(in_file_parm) {
dp = dest;
dlen = destlen;
}
state = CD_VALUE_CHARS;
} else if(*p != '-' && !isAlpha(*p) && !isDigit(*p)) {
return -1;
}
break;
case CD_VALUE_CHARS:
if(inRFC5987AttrChar(*p)) {
if(charset == CD_ENC_UTF8) {
if(utf8dfa(&dfa_state, &dfa_code, *p) == UTF8_REJECT) {
return -1;
}
}
if(in_file_parm) {
if(dlen == 0) {
return -1;
} else {
*dp++ = *p;
--dlen;
}
}
} else if(*p == '%') {
if(in_file_parm) {
if(dlen == 0) {
return -1;
}
}
pctval = 0;
state = CD_VALUE_CHARS_PCT_ENCODED1;
} else if(*p == ';' || isLws(*p)) {
if(charset == CD_ENC_UTF8 && dfa_state != UTF8_ACCEPT) {
return -1;
}
if(in_file_parm) {
flags |= CD_EXT_FILENAME_FOUND;
}
if(*p == ';') {
state = CD_BEFORE_DISPOSITION_PARM_NAME;
} else {
state = CD_AFTER_VALUE;
}
} else if(!inRFC5987AttrChar(*p)) {
return -1;
}
break;
case CD_VALUE_CHARS_PCT_ENCODED1:
if(isHexDigit(*p)) {
pctval |= hexCharToUInt(*p) << 4;
state = CD_VALUE_CHARS_PCT_ENCODED2;
} else {
return -1;
}
break;
case CD_VALUE_CHARS_PCT_ENCODED2:
if(isHexDigit(*p)) {
pctval |= hexCharToUInt(*p);
if(charset == CD_ENC_UTF8) {
if(utf8dfa(&dfa_state, &dfa_code, pctval) == UTF8_REJECT) {
return -1;
}
} else if(charset == CD_ENC_ISO_8859_1) {
if(!isIso8859p1(pctval)) {
return -1;
}
}
if(in_file_parm) {
*dp++ = pctval;
--dlen;
}
state = CD_VALUE_CHARS;
} else {
return -1;
}
break;
}
}
switch(state) {
case CD_BEFORE_DISPOSITION_TYPE:
case CD_AFTER_DISPOSITION_TYPE:
case CD_DISPOSITION_TYPE:
case CD_AFTER_VALUE:
case CD_TOKEN:
return destlen-dlen;
case CD_VALUE_CHARS:
if(charset == CD_ENC_UTF8 && dfa_state != UTF8_ACCEPT) {
return -1;
}
return destlen - dlen;
default:
return -1;
}
}
std::string getContentDispositionFilename(const std::string& header)
{
static const char A2_KEYNAME[] = "filename";
std::string filename;
std::vector<std::string> params;
parseParam(std::back_inserter(params), header);
for(std::vector<std::string>::const_iterator i = params.begin(),
eoi = params.end(); i != eoi; ++i) {
const std::string& param = *i;
if(!istartsWith(param, A2_KEYNAME) ||
param.size() == sizeof(A2_KEYNAME)-1) {
continue;
}
std::string::const_iterator markeritr = param.begin()+sizeof(A2_KEYNAME)-1;
if(*markeritr == '*') {
// See RFC2231 Section4 and draft-reschke-rfc2231-in-http.
// Please note that this function doesn't do charset conversion
// except that if iso-8859-1 is specified, it is converted to
// utf-8.
++markeritr;
for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
if(markeritr == param.end() || *markeritr != '=') {
continue;
}
std::vector<Scip> extValues;
splitIter(markeritr+1, param.end(), std::back_inserter(extValues),
'\'', true, true);
if(extValues.size() != 3) {
continue;
}
bool bad = false;
for(std::string::const_iterator j = extValues[0].first,
eoj = extValues[0].second; j != eoj; ++j) {
// Since we first split parameter by ', we can safely assume
// that ' is not included in charset.
if(!inRFC2978MIMECharset(*j)) {
bad = true;
break;
}
}
if(bad) {
continue;
}
bad = false;
for(std::string::const_iterator j = extValues[2].first,
eoj = extValues[2].second; j != eoj; ++j){
if(*j == '%') {
if(j+1 != eoj && isHexDigit(*(j+1)) &&
j+2 != eoj && isHexDigit(*(j+2))) {
j += 2;
char cdval[1024];
size_t cdvallen = sizeof(cdval);
const char* charset;
size_t charsetlen;
int rv = parse_content_disposition(cdval, cdvallen, &charset, &charsetlen,
header.c_str(), header.size());
if(rv == -1) {
return "";
} else {
bad = true;
break;
}
std::string res;
if(!charset || strieq(charset, charset+charsetlen, "iso-8859-1")) {
res = iso8859p1ToUtf8(cdval, rv);
} else {
if(*j == '*' || *j == '\'' || !inRFC2616HttpToken(*j)) {
bad = true;
break;
}
}
}
if(bad) {
continue;
}
std::string value =
percentDecode(extValues[2].first, extValues[2].second);
if(util::strieq(extValues[0].first, extValues[0].second, "iso-8859-1")) {
value = iso8859ToUtf8(value);
}
if(!detectDirTraversal(value) && value.find("/") == std::string::npos) {
filename = value;
}
if(!filename.empty()) {
break;
res.assign(cdval, rv);
}
if(!detectDirTraversal(res) &&
res.find_first_of("/\\") == std::string::npos) {
return res;
} else {
for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
if(markeritr == param.end() || markeritr+1 == param.end() ||
*markeritr != '=') {
continue;
}
Scip p = stripIter(markeritr+1, param.end());
if(p.first == p.second) {
continue;
}
std::string value(p.first, p.second);
std::string::iterator filenameLast;
if(value[0] == '\'' || value[0] == '"') {
char qc = *value.begin();
for(filenameLast = value.begin()+1;
filenameLast != value.end() && *filenameLast != qc;
++filenameLast);
} else {
filenameLast = value.end();
}
std::pair<std::string::iterator, std::string::iterator> vi =
util::stripIter(value.begin(), filenameLast, "\r\n\t '\"");
value.assign(vi.first, vi.second);
value.erase(std::remove(value.begin(), value.end(), '\\'), value.end());
if(!detectDirTraversal(value) && value.find("/") == std::string::npos) {
filename = value;
}
// continue because there is a chance we can find filename*=...
return "";
}
}
return filename;
}
std::string toUpper(const std::string& src) {

View File

@ -215,6 +215,15 @@ bool inRFC3986ReservedChars(const char c);
bool inRFC3986UnreservedChars(const char c);
bool inRFC2978MIMECharset(const char c);
bool inRFC2616HttpToken(const char c);
bool inRFC5987AttrChar(const char c);
// Returns true if |c| is in ISO/IEC 8859-1 character set.
bool isIso8859p1(unsigned char c);
bool isUtf8(const std::string& str);
std::string percentDecode
@ -285,7 +294,27 @@ void parsePrioritizePieceRange
int64_t defaultSize = 1048576 /* 1MiB */);
// Converts ISO/IEC 8859-1 string src to utf-8.
std::string iso8859ToUtf8(const std::string& src);
std::string iso8859p1ToUtf8(const char* src, size_t len);
std::string iso8859p1ToUtf8(const std::string& src);
// Parses Content-Disposition header field value |in| with its length
// |len| in a manner conforming to RFC 6266 and extracts filename
// value and copies it to the region pointed by |dest|. The |destlen|
// specifies the capacity of the |dest|. This function does not store
// NUL character after filename in |dest|. This function does not
// support RFC 2231 Continuation. If the function sees RFC 2231/5987
// encoding and charset, it stores its first pointer to |*charsetp|
// and its length in |*charsetlenp|. Otherwise, they are NULL and 0
// respectively. In RFC 2231/5987 encoding, percent-encoded string
// will be decoded to original form and stored in |dest|.
//
// This function returns the number of written bytes in |dest| if it
// succeeds, or -1. If there is enough room to store filename in
// |dest|, this function returns -1. If this function returns -1, the
// |dest|, |*charsetp| and |*charsetlenp| are undefined.
int parse_content_disposition(char *dest, size_t destlen,
const char **charsetp, size_t *charsetlenp,
const char *in, size_t len);
std::string getContentDispositionFilename(const std::string& header);

View File

@ -40,6 +40,7 @@ class UtilTest:public CppUnit::TestFixture {
CPPUNIT_TEST(testIstartsWith);
// may be moved to other helper class in the future.
CPPUNIT_TEST(testGetContentDispositionFilename);
CPPUNIT_TEST(testParseContentDisposition);
CPPUNIT_TEST(testToUpper);
CPPUNIT_TEST(testToLower);
CPPUNIT_TEST(testUppercase);
@ -107,6 +108,7 @@ public:
void testIstartsWith();
// may be moved to other helper class in the future.
void testGetContentDispositionFilename();
void testParseContentDisposition();
void testToUpper();
void testToLower();
void testUppercase();
@ -816,136 +818,636 @@ void UtilTest::testIstartsWith() {
}
void UtilTest::testGetContentDispositionFilename() {
std::string h1 = "attachment; filename=\"aria2.tar.bz2\"";
CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h1));
std::string val;
std::string h2 = "attachment; filename=\"\"";
CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h2));
std::string h3 = "attachment; filename=\"";
CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h3));
std::string h3_2 = "attachment; filename= \" aria2.tar.bz2 \"";
val = "attachment; filename=\"aria2.tar.bz2\"";
CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"),
util::getContentDispositionFilename(h3_2));
util::getContentDispositionFilename(val));
std::string h4 = "attachment;";
CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h4));
std::string h5 = "attachment; filename=aria2.tar.bz2";
CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h5));
std::string h6 = "attachment; filename='aria2.tar.bz2'";
CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h6));
std::string h7 = "attachment; filename='aria2.tar.bz2";
CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h7));
std::string h8 = "attachment; filename=aria2.tar.bz2; creation-date=20 Jun 2007 00:00:00 GMT";
CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h8));
std::string h9 = "attachment; filename=\"aria2.tar.bz2; creation-date=20 Jun 2007 00:00:00 GMT\"";
CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2; creation-date=20 Jun 2007 00:00:00 GMT"),
util::getContentDispositionFilename(h9));
std::string h10 = "attachment; filename=";
CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h10));
std::string h11 = "attachment; filename=;";
CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h11));
std::string filenameWithDir = "attachment; filename=dir/file";
val = "attachment; filename=\"\"";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(filenameWithDir));
util::getContentDispositionFilename(val));
std::string semicolonInside = "attachment; filename=\"foo;bar\"";
CPPUNIT_ASSERT_EQUAL(std::string("foo;bar"),
util::getContentDispositionFilename(semicolonInside));
val = "attachment; filename=\"";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(val));
val = "attachment; filename= \" aria2.tar.bz2 \"";
CPPUNIT_ASSERT_EQUAL(std::string(" aria2.tar.bz2 "),
util::getContentDispositionFilename(val));
val = "attachment; filename=dir/file";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(val));
val = "attachment; filename=dir\\file";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(val));
val = "attachment; filename=\"dir/file\"";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(val));
val = "attachment; filename=\"dir\\\\file\"";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(val));
val = "attachment; filename=\"/etc/passwd\"";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(val));
val = "attachment; filename=\"..\"";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(val));
val = "attachment; filename=..";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(val));
// Unescaping %2E%2E%2F produces "../". But since we won't unescape,
// we just accept it as is.
CPPUNIT_ASSERT_EQUAL
(std::string("%2E%2E%2Ffoo.html"),
util::getContentDispositionFilename("filename=\"%2E%2E%2Ffoo.html\""));
val = "attachment; filename=\"%2E%2E%2Ffoo.html\"";
CPPUNIT_ASSERT_EQUAL(std::string("%2E%2E%2Ffoo.html"),
util::getContentDispositionFilename(val));
// RFC2231 Section4
std::string extparam2 = "filename*=''aria2";
CPPUNIT_ASSERT_EQUAL(std::string("aria2"),
util::getContentDispositionFilename(extparam2));
std::string extparam3 = "filename*='''";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(extparam3));
std::string extparam4 = "filename*='aria2";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(extparam4));
std::string extparam5 = "filename*='''aria2";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(extparam5));
std::string extparam6 = "filename*";
CPPUNIT_ASSERT_EQUAL(std::string(""),
util::getContentDispositionFilename(extparam6));
std::string extparam7 = "filename*=UTF-8''aria2;filename=hello%20world";
CPPUNIT_ASSERT_EQUAL(std::string("aria2"),
util::getContentDispositionFilename(extparam7));
std::string extparam8 = "filename=aria2;filename*=UTF-8''hello%20world";
CPPUNIT_ASSERT_EQUAL(std::string("hello world"),
util::getContentDispositionFilename(extparam8));
std::string extparam9 = "filename*=ISO-8859-1''%A3";
std::string extparam9ans;
extparam9ans += 0xc2;
extparam9ans += 0xa3;
CPPUNIT_ASSERT_EQUAL(extparam9ans,
util::getContentDispositionFilename(extparam9));
CPPUNIT_ASSERT_EQUAL
(std::string(""),
util::getContentDispositionFilename("filename*=UTF-8''foo%2F.html"));
CPPUNIT_ASSERT_EQUAL
(std::string("foo.html"),
util::getContentDispositionFilename("filename*=UTF-8'';filename=\"foo.html\""));
CPPUNIT_ASSERT_EQUAL
(std::string(""),
util::getContentDispositionFilename("filename*=UTF-8''%2E%2E%2Ffoo.html"));
// iso-8859-1 string will be converted to utf-8.
val = "attachment; filename*=iso-8859-1''foo-%E4.html";
CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"),
util::getContentDispositionFilename(val));
val = "attachment; filename*= UTF-8''foo-%c3%a4.html";
CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"),
util::getContentDispositionFilename(val));
// iso-8859-1 string will be converted to utf-8.
val = "attachment; filename=\"foo-%E4.html\"";
val = util::percentDecode(val.begin(), val.end());
CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"),
util::getContentDispositionFilename(val));
}
void UtilTest::testParseContentDisposition() {
char dest[1024];
size_t destlen = sizeof(dest);
const char *cs;
size_t cslen;
std::string val;
// test cases from http://greenbytes.de/tech/tc2231/
// inlonly
val = "inline";
CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// inlonlyquoted
val = "\"inline\"";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// inlwithasciifilename
val = "inline; filename=\"foo.html\"";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
// inlwithfnattach
val = "inline; filename=\"Not an attachment!\"";
CPPUNIT_ASSERT_EQUAL(18, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("Not an attachment!"),
std::string(&dest[0], &dest[18]));
// inlwithasciifilenamepdf
val = "inline; filename=\"foo.pdf\"";
CPPUNIT_ASSERT_EQUAL(7, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.pdf"),
std::string(&dest[0], &dest[7]));
// attwithasciifilename25
val = "attachment; filename=\"0000000000111111111122222\"";
CPPUNIT_ASSERT_EQUAL(25, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("0000000000111111111122222"),
std::string(&dest[0], &dest[25]));
// attwithasciifilename35
val = "attachment; filename=\"00000000001111111111222222222233333\"";
CPPUNIT_ASSERT_EQUAL(35, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("00000000001111111111222222222233333"),
std::string(&dest[0], &dest[35]));
// Tests from http://greenbytes.de/tech/tc2231/
// attwithasciifnescapedchar
CPPUNIT_ASSERT_EQUAL
(std::string("foo.html"),
util::getContentDispositionFilename("filename=\"f\\oo.html\""));
val = "attachment; filename=\"f\\oo.html\"";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
// attwithasciifnescapedquote
val = "attachment; filename=\"\\\"quoting\\\" tested.html\"";
CPPUNIT_ASSERT_EQUAL(21, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("\"quoting\" tested.html"),
std::string(&dest[0], &dest[21]));
// attwithquotedsemicolon
val = "attachment; filename=\"Here's a semicolon;.html\"";
CPPUNIT_ASSERT_EQUAL(24, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("Here's a semicolon;.html"),
std::string(&dest[0], &dest[24]));
// attwithfilenameandextparam
val = "attachment; foo=\"bar\"; filename=\"foo.html\"";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
// attwithfilenameandextparamescaped
val = "attachment; foo=\"\\\"\\\\\";filename=\"foo.html\"";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
// attwithasciifilenameucase
CPPUNIT_ASSERT_EQUAL
(std::string("foo.html"),
util::getContentDispositionFilename("FILENAME=\"foo.html\""));
val = "attachment; FILENAME=\"foo.html\"";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
// attwithasciifilenamenq
val = "attachment; filename=foo.html";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
// attwithtokfncommanq
val = "attachment; filename=foo,bar.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithasciifilenamenqs
val = "attachment; filename=foo.html ;";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attemptyparam
val = "attachment; ;filename=foo";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithasciifilenamenqws
val = "attachment; filename=foo bar.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithfntokensq
val = "attachment; filename='foo.bar'";
CPPUNIT_ASSERT_EQUAL(9, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("'foo.bar'"),
std::string(&dest[0], &dest[9]));
// attwithisofnplain
// attachment; filename="foo-ä.html"
val = "attachment; filename=\"foo-%E4.html\"";
val = util::percentDecode(val.begin(), val.end());
CPPUNIT_ASSERT_EQUAL(10, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"),
util::iso8859p1ToUtf8(std::string(&dest[0], &dest[10])));
// attwithutf8fnplain
// attachment; filename="foo-ä.html"
val = "attachment; filename=\"foo-%C3%A4.html\"";
val = util::percentDecode(val.begin(), val.end());
CPPUNIT_ASSERT_EQUAL(11, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"),
util::iso8859p1ToUtf8(std::string(&dest[0], &dest[11])));
// attwithfnrawpctenca
val = "attachment; filename=\"foo-%41.html\"";
CPPUNIT_ASSERT_EQUAL(12, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo-%41.html"),
std::string(&dest[0], &dest[12]));
// attwithfnusingpct
val = "attachment; filename=\"50%.html\"";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("50%.html"),
std::string(&dest[0], &dest[8]));
// attwithfnrawpctencaq
val = "attachment; filename=\"foo-%\\41.html\"";
CPPUNIT_ASSERT_EQUAL(12, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo-%41.html"),
std::string(&dest[0], &dest[12]));
// attwithnamepct
val = "attachment; name=\"foo-%41.html\"";
CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithfilenamepctandiso
// attachment; filename="ä-%41.html"
val = "attachment; filename=\"%E4-%2541.html\"";
val = util::percentDecode(val.begin(), val.end());
CPPUNIT_ASSERT_EQUAL(10, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("ä-%41.html"),
util::iso8859p1ToUtf8(std::string(&dest[0], &dest[10])));
// attwithfnrawpctenclong
val = "attachment; filename=\"foo-%c3%a4-%e2%82%ac.html\"";
CPPUNIT_ASSERT_EQUAL(25, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo-%c3%a4-%e2%82%ac.html"),
std::string(&dest[0], &dest[25]));
// attwithasciifilenamews1
val = "attachment; filename =\"foo.html\"";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
// attwith2filenames
val = "attachment; filename=\"foo.html\"; filename=\"bar.html\"";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attfnbrokentoken
val = "attachment; filename=foo[1](2).html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attfnbrokentokeniso
val = "attachment; filename=foo-%E4.html";
val = util::percentDecode(val.begin(), val.end());
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attfnbrokentokenutf
// attachment; filename=foo-ä.html
val = "attachment; filename=foo-ä.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attmissingdisposition
val = "filename=foo.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attmissingdisposition2
val = "x=y; filename=foo.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attmissingdisposition3
val = "\"foo; filename=bar;baz\"; filename=qux";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attmissingdisposition4
val = "filename=foo.html, filename=bar.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// emptydisposition
val = "; filename=foo.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// doublecolon
val = ": inline; attachment; filename=foo.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attandinline
val = "inline; attachment; filename=foo.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attandinline2
val = "attachment; inline; filename=foo.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attbrokenquotedfn
val = "attachment; filename=\"foo.html\".txt";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attbrokenquotedfn2
val = "attachment; filename=\"bar";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attbrokenquotedfn3
val = "attachment; filename=foo\"bar;baz\"qux";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attmultinstances
val = "attachment; filename=foo.html, attachment; filename=bar.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attmissingdelim
val = "attachment; foo=foo filename=bar";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attmissingdelim2
val = "attachment; filename=bar foo=foo ";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attmissingdelim3
val = "attachment filename=bar";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attreversed
val = "filename=foo.html; attachment";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attconfusedparam
val = "attachment; xfilename=foo.html";
CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attabspath
val = "attachment; filename=\"/foo.html\"";
CPPUNIT_ASSERT_EQUAL(9, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("/foo.html"),
std::string(&dest[0], &dest[9]));
// attabspathwin
val = "attachment; filename=\"\\\\foo.html\"";
CPPUNIT_ASSERT_EQUAL(9, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("\\foo.html"),
std::string(&dest[0], &dest[9]));
// attcdate
val = "attachment; creation-date=\"Wed, 12 Feb 1997 16:29:51 -0500\"";
CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// dispext
val = "foobar";
CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// dispextbadfn
val = "attachment; example=\"filename=example.txt\"";
CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithisofn2231iso
CPPUNIT_ASSERT_EQUAL
(std::string("foo-ä.html"),
util::getContentDispositionFilename("filename*=iso-8859-1''foo-%E4.html"));
val = "attachment; filename*=iso-8859-1''foo-%E4.html";
CPPUNIT_ASSERT_EQUAL(10, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("iso-8859-1"), std::string(cs, cslen));
CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"),
util::iso8859p1ToUtf8(std::string(&dest[0], &dest[10])));
// attwithfn2231utf8
CPPUNIT_ASSERT_EQUAL
(std::string("foo-ä-€.html"),
util::getContentDispositionFilename
("filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html"));
val = "attachment; filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html";
CPPUNIT_ASSERT_EQUAL(15, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("UTF-8"), std::string(cs, cslen));
CPPUNIT_ASSERT_EQUAL(std::string("foo-ä-€.html"),
std::string(&dest[0], &dest[15]));
// attwithfn2231noc
val = "attachment; filename*=''foo-%c3%a4-%e2%82%ac.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithfn2231utf8comp
val = "attachment; filename*=UTF-8''foo-a%cc%88.html";
CPPUNIT_ASSERT_EQUAL(12, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
val = "foo-a%cc%88.html";
CPPUNIT_ASSERT_EQUAL(std::string(util::percentDecode(val.begin(),
val.end())),
std::string(&dest[0], &dest[12]));
// attwithfn2231utf8-bad
CPPUNIT_ASSERT_EQUAL
(std::string(""),
util::getContentDispositionFilename
("filename*=iso-8859-1''foo-%c3%a4-%e2%82%ac.html"));
val = "attachment; filename*=iso-8859-1''foo-%c3%a4-%e2%82%ac.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithfn2231iso-bad
val = "attachment; filename*=utf-8''foo-%E4.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithfn2231ws1
CPPUNIT_ASSERT_EQUAL
(std::string(""),
util::getContentDispositionFilename("filename *=UTF-8''foo-%c3%a4.html"));
val = "attachment; filename *=UTF-8''foo-%c3%a4.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithfn2231ws2
CPPUNIT_ASSERT_EQUAL
(std::string("foo-ä.html"),
util::getContentDispositionFilename("filename*= UTF-8''foo-%c3%a4.html"));
val = "attachment; filename*= UTF-8''foo-%c3%a4.html";
CPPUNIT_ASSERT_EQUAL(11, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"),
std::string(&dest[0], &dest[11]));
// attwithfn2231ws3
CPPUNIT_ASSERT_EQUAL
(std::string("foo-ä.html"),
util::getContentDispositionFilename("filename* =UTF-8''foo-%c3%a4.html"));
val = "attachment; filename* =UTF-8''foo-%c3%a4.html";
CPPUNIT_ASSERT_EQUAL(11, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"),
std::string(&dest[0], &dest[11]));
// attwithfn2231quot
CPPUNIT_ASSERT_EQUAL
(std::string(""),
util::getContentDispositionFilename
("filename*=\"UTF-8''foo-%c3%a4.html\""));
val = "attachment; filename*=\"UTF-8''foo-%c3%a4.html\"";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithfn2231quot2
val = "attachment; filename*=\"foo%20bar.html\"";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithfn2231singleqmissing
val = "attachment; filename*=UTF-8'foo-%c3%a4.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithfn2231nbadpct1
val = "attachment; filename*=UTF-8''foo%";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithfn2231nbadpct2
val = "attachment; filename*=UTF-8''f%oo.html";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attwithfn2231dpct
val = "attachment; filename*=UTF-8''A-%2541.html";
CPPUNIT_ASSERT_EQUAL(10, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("A-%41.html"),
std::string(&dest[0], &dest[10]));
// attwithfn2231abspathdisguised
val = "attachment; filename*=UTF-8''%5cfoo.html";
CPPUNIT_ASSERT_EQUAL(9, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("\\foo.html"),
std::string(&dest[0], &dest[9]));
// attfnboth
val = "attachment; filename=\"foo-ae.html\"; filename*=UTF-8''foo-%c3%a4.html";
CPPUNIT_ASSERT_EQUAL(11, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"),
std::string(&dest[0], &dest[11]));
// attfnboth2
val = "attachment; filename*=UTF-8''foo-%c3%a4.html; filename=\"foo-ae.html\"";
CPPUNIT_ASSERT_EQUAL(11, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo-ä.html"),
std::string(&dest[0], &dest[11]));
// attfnboth3
val = "attachment; filename*0*=ISO-8859-15''euro-sign%3d%a4; filename*=ISO-8859-1''currency-sign%3d%a4";
CPPUNIT_ASSERT_EQUAL(15, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("ISO-8859-1"), std::string(cs, cslen));
CPPUNIT_ASSERT_EQUAL(std::string("currency-sign=¤"),
util::iso8859p1ToUtf8(std::string(&dest[0], &dest[15])));
// attnewandfn
val = "attachment; foobar=x; filename=\"foo.html\"";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
// attrfc2047token
val = "attachment; filename==?ISO-8859-1?Q?foo-=E4.html?=";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// attrfc2047quoted
val = "attachment; filename=\"=?ISO-8859-1?Q?foo-=E4.html?=\"";
CPPUNIT_ASSERT_EQUAL(29, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("=?ISO-8859-1?Q?foo-=E4.html?="),
std::string(&dest[0], &dest[29]));
// aria2 original testcases
// zero-length filename. token cannot be empty, so this is invalid.
val = "attachment; filename=";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// zero-length filename. quoted-string can be empty string, so this
// is ok.
val = "attachment; filename=\"\"";
CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// empty value is not allowed
val = "attachment; filename=;";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// / is not valid char in token.
val = "attachment; filename=dir/file";
CPPUNIT_ASSERT_EQUAL(-1, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
// value-chars is *(pct-encoded / attr-char), so empty string is
// allowed.
val = "attachment; filename*=UTF-8''";
CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("UTF-8"), std::string(cs, cslen));
val = "attachment; filename*=UTF-8''; filename=foo";
CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("UTF-8"), std::string(cs, cslen));
val = "attachment; filename*=UTF-8'' ; filename=foo";
CPPUNIT_ASSERT_EQUAL(0, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("UTF-8"), std::string(cs, cslen));
// with language
val = "attachment; filename*=UTF-8'japanese'konnichiwa";
CPPUNIT_ASSERT_EQUAL(10, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("konnichiwa"),
std::string(&dest[0], &dest[10]));
// lws before and after "="
val = "attachment; filename = foo.html";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
// lws before and after "=" with quoted-string
val = "attachment; filename = \"foo.html\"";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
// lws after parm
val = "attachment; filename=foo.html ";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
val = "attachment; filename=foo.html ; hello=world";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
val = "attachment; filename=\"foo.html\" ";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
val = "attachment; filename=\"foo.html\" ; hello=world";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
val = "attachment; filename*=UTF-8''foo.html ; hello=world";
CPPUNIT_ASSERT_EQUAL(8, util::parse_content_disposition
(dest, destlen, &cs, &cslen, val.c_str(), val.size()));
CPPUNIT_ASSERT_EQUAL(std::string("foo.html"),
std::string(&dest[0], &dest[8]));
}
class Printer {