mirror of https://github.com/aria2/aria2
2010-02-19 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>
Added unit tests for util::getContentDispositionFilename() from http://greenbytes.de/tech/tc2231/ Fixed the function so that added tests are passed. * src/util.cc * test/UtilTest.ccpull/1/head
parent
cc056289e7
commit
780aaf9c80
|
@ -1,3 +1,11 @@
|
|||
2010-02-19 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>
|
||||
|
||||
Added unit tests for util::getContentDispositionFilename() from
|
||||
http://greenbytes.de/tech/tc2231/ Fixed the function so that added
|
||||
tests are passed.
|
||||
* src/util.cc
|
||||
* test/UtilTest.cc
|
||||
|
||||
2010-02-18 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>
|
||||
|
||||
Removed setlocale() for LC_CTYPE. It may affect isxdigit in
|
||||
|
|
157
src/util.cc
157
src/util.cc
|
@ -201,6 +201,21 @@ std::string replace(const std::string& target, const std::string& oldstr, const
|
|||
return result;
|
||||
}
|
||||
|
||||
bool isAlpha(const char c)
|
||||
{
|
||||
return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
|
||||
}
|
||||
|
||||
bool isDigit(const char c)
|
||||
{
|
||||
return '0' <= c && c <= '9';
|
||||
}
|
||||
|
||||
bool isHexDigit(const char c)
|
||||
{
|
||||
return isDigit(c) || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f');
|
||||
}
|
||||
|
||||
bool inRFC3986ReservedChars(const char c)
|
||||
{
|
||||
static const char reserved[] = {
|
||||
|
@ -214,15 +229,34 @@ bool inRFC3986ReservedChars(const char c)
|
|||
bool inRFC3986UnreservedChars(const char c)
|
||||
{
|
||||
static const char unreserved[] = { '-', '.', '_', '~' };
|
||||
return
|
||||
// ALPHA
|
||||
('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') ||
|
||||
// DIGIT
|
||||
('0' <= c && c <= '9') ||
|
||||
return isAlpha(c) || isDigit(c) ||
|
||||
std::find(&unreserved[0], &unreserved[arrayLength(unreserved)], c) !=
|
||||
&unreserved[arrayLength(unreserved)];
|
||||
}
|
||||
|
||||
bool inRFC2978MIMECharset(const char c)
|
||||
{
|
||||
static const char chars[] = {
|
||||
'!', '#', '$', '%', '&',
|
||||
'\'', '+', '-', '^', '_',
|
||||
'`', '{', '}', '~'
|
||||
};
|
||||
return isAlpha(c) || isDigit(c) ||
|
||||
std::find(&chars[0], &chars[arrayLength(chars)], c) !=
|
||||
&chars[arrayLength(chars)];
|
||||
}
|
||||
|
||||
bool inRFC2616HttpToken(const char c)
|
||||
{
|
||||
static const char chars[] = {
|
||||
'!', '#', '$', '%', '&', '\'', '*', '+', '-', '.',
|
||||
'^', '_', '`', '|', '~'
|
||||
};
|
||||
return isAlpha(c) || isDigit(c) ||
|
||||
std::find(&chars[0], &chars[arrayLength(chars)], c) !=
|
||||
&chars[arrayLength(chars)];
|
||||
}
|
||||
|
||||
std::string urlencode(const unsigned char* target, size_t len) {
|
||||
std::string dest;
|
||||
for(size_t i = 0; i < len; ++i) {
|
||||
|
@ -244,9 +278,7 @@ std::string urlencode(const std::string& target)
|
|||
std::string torrentUrlencode(const unsigned char* target, size_t len) {
|
||||
std::string dest;
|
||||
for(size_t i = 0; i < len; ++i) {
|
||||
if(('0' <= target[i] && target[i] <= '9') ||
|
||||
('A' <= target[i] && target[i] <= 'Z') ||
|
||||
('a' <= target[i] && target[i] <= 'z')) {
|
||||
if(isAlpha(target[i]) || isDigit(target[i])) {
|
||||
dest += target[i];
|
||||
} else {
|
||||
dest.append(StringFormat("%%%02X", target[i]).str());
|
||||
|
@ -267,7 +299,7 @@ std::string urldecode(const std::string& target) {
|
|||
itr != target.end(); ++itr) {
|
||||
if(*itr == '%') {
|
||||
if(itr+1 != target.end() && itr+2 != target.end() &&
|
||||
isxdigit(*(itr+1)) && isxdigit(*(itr+2))) {
|
||||
isHexDigit(*(itr+1)) && isHexDigit(*(itr+2))) {
|
||||
result += parseInt(std::string(itr+1, itr+3), 16);
|
||||
itr += 2;
|
||||
} else {
|
||||
|
@ -614,12 +646,16 @@ static std::string trimBasename(const std::string& src)
|
|||
{
|
||||
static const std::string TRIMMED("\r\n\t '\"");
|
||||
std::string fn = File(trim(src, TRIMMED)).getBasename();
|
||||
std::string::iterator enditer = std::remove(fn.begin(), fn.end(), '\\');
|
||||
fn = std::string(fn.begin(), enditer);
|
||||
if(fn == ".." || fn == A2STR::DOT_C) {
|
||||
fn = A2STR::NIL;
|
||||
}
|
||||
return fn;
|
||||
}
|
||||
|
||||
// Converts ISO/IEC 8859-1 string to UTF-8 string. If there is a
|
||||
// character not in ISO/IEC 8859-1, returns empty string.
|
||||
std::string iso8859ToUtf8(const std::string& src)
|
||||
{
|
||||
std::string dest;
|
||||
|
@ -632,6 +668,8 @@ std::string iso8859ToUtf8(const std::string& src)
|
|||
dest += 0xc3;
|
||||
}
|
||||
dest += c&(~0x40);
|
||||
} else if(0x80 <= c && c <= 0x9f) {
|
||||
return A2STR::NIL;
|
||||
} else {
|
||||
dest += c;
|
||||
}
|
||||
|
@ -648,15 +686,74 @@ std::string getContentDispositionFilename(const std::string& header)
|
|||
i != params.end(); ++i) {
|
||||
std::string& param = *i;
|
||||
static const std::string keyName = "filename";
|
||||
if(!startsWith(param, keyName)) {
|
||||
if(!startsWith(toLower(param), keyName) || param.size() == keyName.size()) {
|
||||
continue;
|
||||
}
|
||||
std::string::iterator markeritr = param.begin()+keyName.size();
|
||||
for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
|
||||
if(markeritr == param.end()) {
|
||||
continue;
|
||||
}
|
||||
if(*markeritr == '=') {
|
||||
if(*markeritr == '*') {
|
||||
// See RFC2231 Section4 and draft-reschke-rfc2231-in-http.
|
||||
// Please note that this function doesn't do charset conversion
|
||||
// except that if iso-8859-1 is specified, it is converted to
|
||||
// utf-8.
|
||||
++markeritr;
|
||||
for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
|
||||
if(markeritr == param.end() || *markeritr != '=') {
|
||||
continue;
|
||||
}
|
||||
std::pair<std::string, std::string> paramPair;
|
||||
split(paramPair, param, '=');
|
||||
std::string value = paramPair.second;
|
||||
std::vector<std::string> extValues;
|
||||
split(value, std::back_inserter(extValues), "'", false, true);
|
||||
if(extValues.size() != 3) {
|
||||
continue;
|
||||
}
|
||||
bool bad = false;
|
||||
const std::string& charset = extValues[0];
|
||||
for(std::string::const_iterator j = charset.begin(); j != charset.end();
|
||||
++j) {
|
||||
// Since we first split parameter by ', we can safely assume
|
||||
// that ' is not included in charset.
|
||||
if(!inRFC2978MIMECharset(*j)) {
|
||||
bad = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(bad) {
|
||||
continue;
|
||||
}
|
||||
bad = false;
|
||||
value = extValues[2];
|
||||
for(std::string::const_iterator j = value.begin(); j != value.end(); ++j){
|
||||
if(*j == '%') {
|
||||
if(j+1 != value.end() && isHexDigit(*(j+1)) &&
|
||||
j+2 != value.end() && isHexDigit(*(j+2))) {
|
||||
j += 2;
|
||||
} else {
|
||||
bad = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if(*j == '*' || *j == '\'' || !inRFC2616HttpToken(*j)) {
|
||||
bad = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(bad) {
|
||||
continue;
|
||||
}
|
||||
value = trimBasename(urldecode(value));
|
||||
if(toLower(extValues[0]) == "iso-8859-1") {
|
||||
value = iso8859ToUtf8(value);
|
||||
}
|
||||
filename = value;
|
||||
break;
|
||||
} else {
|
||||
for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
|
||||
if(markeritr == param.end() || *markeritr != '=') {
|
||||
continue;
|
||||
}
|
||||
std::pair<std::string, std::string> paramPair;
|
||||
split(paramPair, param, '=');
|
||||
std::string value = paramPair.second;
|
||||
|
@ -672,35 +769,9 @@ std::string getContentDispositionFilename(const std::string& header)
|
|||
} else {
|
||||
filenameLast = value.end();
|
||||
}
|
||||
value = trimBasename(std::string(value.begin(), filenameLast));
|
||||
if(value.empty()) {
|
||||
continue;
|
||||
}
|
||||
filename = urldecode(value);
|
||||
// continue because there is a chance we can find filename*=...
|
||||
} else if(*markeritr == '*') {
|
||||
// See RFC2231 Section4 and draft-reschke-rfc2231-in-http.
|
||||
// Please note that this function doesn't do charset conversion
|
||||
// except that if iso-8859-1 is specified, it is converted to
|
||||
// utf-8.
|
||||
std::pair<std::string, std::string> paramPair;
|
||||
split(paramPair, param, '=');
|
||||
std::string value = paramPair.second;
|
||||
std::vector<std::string> extValues;
|
||||
split(value, std::back_inserter(extValues), "'", false, true);
|
||||
if(extValues.size() != 3) {
|
||||
continue;
|
||||
}
|
||||
value = trimBasename(extValues[2]);
|
||||
if(value.empty()) {
|
||||
continue;
|
||||
}
|
||||
value = urldecode(value);
|
||||
if(extValues[0] == "iso-8859-1") {
|
||||
value = iso8859ToUtf8(value);
|
||||
}
|
||||
value = trimBasename(urldecode(std::string(value.begin(), filenameLast)));
|
||||
filename = value;
|
||||
break;
|
||||
// continue because there is a chance we can find filename*=...
|
||||
}
|
||||
}
|
||||
return filename;
|
||||
|
|
|
@ -314,9 +314,6 @@ void UtilTest::testGetContentDispositionFilename() {
|
|||
CPPUNIT_ASSERT_EQUAL(std::string(),
|
||||
util::getContentDispositionFilename(currentDir));
|
||||
// RFC2231 Section4
|
||||
std::string extparam1 = "attachment; filename * = UTF-8'ja'filename";
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("filename"),
|
||||
util::getContentDispositionFilename(extparam1));
|
||||
std::string extparam2 = "filename*=''aria2";
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("aria2"),
|
||||
util::getContentDispositionFilename(extparam2));
|
||||
|
@ -338,12 +335,53 @@ void UtilTest::testGetContentDispositionFilename() {
|
|||
std::string extparam8 = "filename=aria2;filename*=UTF-8''hello%20world";
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("hello world"),
|
||||
util::getContentDispositionFilename(extparam8));
|
||||
std::string extparam9 = "filename*=iso-8859-1''%A3";
|
||||
std::string extparam9 = "filename*=ISO-8859-1''%A3";
|
||||
std::string extparam9ans;
|
||||
extparam9ans += 0xc2;
|
||||
extparam9ans += 0xa3;
|
||||
CPPUNIT_ASSERT_EQUAL(extparam9ans,
|
||||
util::getContentDispositionFilename(extparam9));
|
||||
|
||||
// Tests from http://greenbytes.de/tech/tc2231/
|
||||
// attwithasciifnescapedchar
|
||||
CPPUNIT_ASSERT_EQUAL
|
||||
(std::string("foo.html"),
|
||||
util::getContentDispositionFilename("filename=\"f\\oo.html\""));
|
||||
// attwithasciifilenameucase
|
||||
CPPUNIT_ASSERT_EQUAL
|
||||
(std::string("foo.html"),
|
||||
util::getContentDispositionFilename("FILENAME=\"foo.html\""));
|
||||
// attwithisofn2231iso
|
||||
CPPUNIT_ASSERT_EQUAL
|
||||
(std::string("foo-ä.html"),
|
||||
util::getContentDispositionFilename("filename*=iso-8859-1''foo-%E4.html"));
|
||||
// attwithfn2231utf8
|
||||
CPPUNIT_ASSERT_EQUAL
|
||||
(std::string("foo-ä-€.html"),
|
||||
util::getContentDispositionFilename
|
||||
("filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html"));
|
||||
// attwithfn2231utf8-bad
|
||||
CPPUNIT_ASSERT_EQUAL
|
||||
(std::string(""),
|
||||
util::getContentDispositionFilename
|
||||
("filename*=iso-8859-1''foo-%c3%a4-%e2%82%ac.html"));
|
||||
// attwithfn2231ws1
|
||||
CPPUNIT_ASSERT_EQUAL
|
||||
(std::string(""),
|
||||
util::getContentDispositionFilename("filename *=UTF-8''foo-%c3%a4.html"));
|
||||
// attwithfn2231ws2
|
||||
CPPUNIT_ASSERT_EQUAL
|
||||
(std::string("foo-ä.html"),
|
||||
util::getContentDispositionFilename("filename*= UTF-8''foo-%c3%a4.html"));
|
||||
// attwithfn2231ws3
|
||||
CPPUNIT_ASSERT_EQUAL
|
||||
(std::string("foo-ä.html"),
|
||||
util::getContentDispositionFilename("filename* =UTF-8''foo-%c3%a4.html"));
|
||||
// attwithfn2231quot
|
||||
CPPUNIT_ASSERT_EQUAL
|
||||
(std::string(""),
|
||||
util::getContentDispositionFilename
|
||||
("filename*=\"UTF-8''foo-%c3%a4.html\""));
|
||||
}
|
||||
|
||||
class Printer {
|
||||
|
|
Loading…
Reference in New Issue