mirror of https://github.com/aria2/aria2
2010-10-02 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>
Non-UTF8 filenames are now percent-encoded. For example, filename for http://example.org/%90%A2%8AE will be %90%A2%8AE because it is Shift_JIS. The comments and name in .torrent file in XML-RPC response are percent-encoded if they are not UTF-8. * src/FtpNegotiationCommand.cc * src/HttpRequestCommand.cc * src/HttpResponseCommand.cc * src/XmlRpcMethodImpl.cc * src/bittorrent_helper.cc * src/util.cc * src/util.h * test/BittorrentHelperTest.cc * test/UtilTest.ccpull/1/head
parent
6c348f0493
commit
d956ea0b70
16
ChangeLog
16
ChangeLog
|
@ -1,3 +1,19 @@
|
|||
2010-10-02 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>
|
||||
|
||||
Non-UTF8 filenames are now percent-encoded. For example, filename
|
||||
for http://example.org/%90%A2%8AE will be %90%A2%8AE because it is
|
||||
Shift_JIS. The comments and name in .torrent file in XML-RPC
|
||||
response are percent-encoded if they are not UTF-8.
|
||||
* src/FtpNegotiationCommand.cc
|
||||
* src/HttpRequestCommand.cc
|
||||
* src/HttpResponseCommand.cc
|
||||
* src/XmlRpcMethodImpl.cc
|
||||
* src/bittorrent_helper.cc
|
||||
* src/util.cc
|
||||
* src/util.h
|
||||
* test/BittorrentHelperTest.cc
|
||||
* test/UtilTest.cc
|
||||
|
||||
2010-09-26 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>
|
||||
|
||||
Renamed TripletGet as TupleGet. Renamed TripletNthType as
|
||||
|
|
|
@ -377,10 +377,9 @@ bool FtpNegotiationCommand::onFileSizeDetermined(uint64_t totalLength)
|
|||
getFileEntry()->setLength(totalLength);
|
||||
if(getFileEntry()->getPath().empty()) {
|
||||
getFileEntry()->setPath
|
||||
(util::applyDir
|
||||
(util::createSafePath
|
||||
(getDownloadContext()->getDir(),
|
||||
util::fixTaintedBasename
|
||||
(util::percentDecode(getRequest()->getFile()))));
|
||||
util::percentDecode(getRequest()->getFile())));
|
||||
}
|
||||
getRequestGroup()->preDownloadProcessing();
|
||||
if(getDownloadEngine()->getRequestGroupMan()->
|
||||
|
|
|
@ -159,9 +159,9 @@ bool HttpRequestCommand::executeInternal() {
|
|||
} else {
|
||||
if(getFileEntry()->getPath().empty()) {
|
||||
getFileEntry()->setPath
|
||||
(util::applyDir
|
||||
(util::createSafePath
|
||||
(getDownloadContext()->getDir(),
|
||||
util::fixTaintedBasename(getRequest()->getFile())));
|
||||
util::percentDecode(getRequest()->getFile())));
|
||||
}
|
||||
File ctrlfile(getFileEntry()->getPath()+
|
||||
DefaultBtProgressInfoFile::getSuffix());
|
||||
|
|
|
@ -168,9 +168,8 @@ bool HttpResponseCommand::executeInternal()
|
|||
getFileEntry()->setLength(totalLength);
|
||||
if(getFileEntry()->getPath().empty()) {
|
||||
getFileEntry()->setPath
|
||||
(util::applyDir
|
||||
(getDownloadContext()->getDir(),
|
||||
util::fixTaintedBasename(httpResponse->determinFilename())));
|
||||
(util::createSafePath
|
||||
(getDownloadContext()->getDir(), httpResponse->determinFilename()));
|
||||
}
|
||||
getFileEntry()->setContentType(httpResponse->getContentType());
|
||||
getRequestGroup()->preDownloadProcessing();
|
||||
|
|
|
@ -597,7 +597,7 @@ void gatherBitTorrentMetadata
|
|||
const SharedHandle<TorrentAttribute>& torrentAttrs)
|
||||
{
|
||||
if(!torrentAttrs->comment.empty()) {
|
||||
btDict->put(KEY_COMMENT, torrentAttrs->comment);
|
||||
btDict->put(KEY_COMMENT, util::encodeNonUtf8(torrentAttrs->comment));
|
||||
}
|
||||
if(torrentAttrs->creationDate) {
|
||||
btDict->put(KEY_CREATION_DATE, Integer::g(torrentAttrs->creationDate));
|
||||
|
@ -619,7 +619,7 @@ void gatherBitTorrentMetadata
|
|||
btDict->put(KEY_ANNOUNCE_LIST, destAnnounceList);
|
||||
if(!torrentAttrs->metadata.empty()) {
|
||||
SharedHandle<Dict> infoDict = Dict::g();
|
||||
infoDict->put(KEY_NAME, torrentAttrs->name);
|
||||
infoDict->put(KEY_NAME, util::encodeNonUtf8(torrentAttrs->name));
|
||||
btDict->put(KEY_INFO, infoDict);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -187,6 +187,7 @@ static void extractFileEntries
|
|||
const std::vector<std::string>& urlList)
|
||||
{
|
||||
std::string name;
|
||||
std::string utf8Name;
|
||||
if(overrideName.empty()) {
|
||||
std::string nameKey;
|
||||
if(infoDict->containsKey(C_NAME_UTF8)) {
|
||||
|
@ -196,17 +197,18 @@ static void extractFileEntries
|
|||
}
|
||||
const String* nameData = asString(infoDict->get(nameKey));
|
||||
if(nameData) {
|
||||
if(util::detectDirTraversal(nameData->s())) {
|
||||
utf8Name = util::encodeNonUtf8(nameData->s());
|
||||
if(util::detectDirTraversal(utf8Name)) {
|
||||
throw DL_ABORT_EX
|
||||
(StringFormat
|
||||
(MSG_DIR_TRAVERSAL_DETECTED,nameData->s().c_str()).str());
|
||||
}
|
||||
name = nameData->s();
|
||||
} else {
|
||||
name = strconcat(File(defaultName).getBasename(), ".file");
|
||||
name = utf8Name = strconcat(File(defaultName).getBasename(), ".file");
|
||||
}
|
||||
} else {
|
||||
name = overrideName;
|
||||
name = utf8Name = overrideName;
|
||||
}
|
||||
torrent->name = name;
|
||||
std::vector<SharedHandle<FileEntry> > fileEntries;
|
||||
|
@ -255,9 +257,11 @@ static void extractFileEntries
|
|||
}
|
||||
}
|
||||
std::string path = strjoin(pathelem.begin(), pathelem.end(), '/');
|
||||
if(util::detectDirTraversal(path)) {
|
||||
std::string utf8Path = strjoin(pathelem.begin(), pathelem.end(), '/',
|
||||
std::ptr_fun(util::encodeNonUtf8));
|
||||
if(util::detectDirTraversal(utf8Path)) {
|
||||
throw DL_ABORT_EX
|
||||
(StringFormat(MSG_DIR_TRAVERSAL_DETECTED, path.c_str()).str());
|
||||
(StringFormat(MSG_DIR_TRAVERSAL_DETECTED, utf8Path.c_str()).str());
|
||||
}
|
||||
std::string pePath =
|
||||
strjoin(pathelem.begin(), pathelem.end(), '/',
|
||||
|
@ -266,9 +270,8 @@ static void extractFileEntries
|
|||
std::vector<std::string> uris;
|
||||
createUri(urlList.begin(), urlList.end(),std::back_inserter(uris),pePath);
|
||||
SharedHandle<FileEntry> fileEntry
|
||||
(new FileEntry(util::applyDir(ctx->getDir(), util::escapePath(path)),
|
||||
fileLengthData->i(),
|
||||
offset, uris));
|
||||
(new FileEntry(util::applyDir(ctx->getDir(),util::escapePath(utf8Path)),
|
||||
fileLengthData->i(), offset, uris));
|
||||
fileEntry->setOriginalName(path);
|
||||
fileEntries.push_back(fileEntry);
|
||||
offset += fileEntry->getLength();
|
||||
|
@ -294,17 +297,15 @@ static void extractFileEntries
|
|||
uris.push_back(*i);
|
||||
}
|
||||
}
|
||||
|
||||
SharedHandle<FileEntry> fileEntry
|
||||
(new FileEntry(util::applyDir(ctx->getDir(), util::escapePath(name)),
|
||||
totalLength, 0,
|
||||
uris));
|
||||
(new FileEntry(util::applyDir(ctx->getDir(), util::escapePath(utf8Name)),
|
||||
totalLength, 0, uris));
|
||||
fileEntry->setOriginalName(name);
|
||||
fileEntries.push_back(fileEntry);
|
||||
}
|
||||
ctx->setFileEntries(fileEntries.begin(), fileEntries.end());
|
||||
if(torrent->mode == MULTI) {
|
||||
ctx->setBasePath(util::applyDir(ctx->getDir(), name));
|
||||
ctx->setBasePath(util::applyDir(ctx->getDir(), utf8Name));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
89
src/util.cc
89
src/util.cc
|
@ -291,6 +291,80 @@ bool inRFC2616HttpToken(const char c)
|
|||
std::find(vbegin(chars), vend(chars), c) != vend(chars);
|
||||
}
|
||||
|
||||
namespace {
|
||||
bool in(unsigned char ch, unsigned char s, unsigned char t)
|
||||
{
|
||||
return s <= ch && ch <= t;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
bool isUtf8Tail(unsigned char ch)
|
||||
{
|
||||
return in(ch, 0x80, 0xbf);
|
||||
}
|
||||
}
|
||||
|
||||
bool isUtf8(const std::string& str)
|
||||
{
|
||||
for(std::string::const_iterator s = str.begin(), eos = str.end(); s != eos;
|
||||
++s) {
|
||||
unsigned char firstChar = *s;
|
||||
// See ABNF in http://tools.ietf.org/search/rfc3629#section-4
|
||||
if(in(firstChar, 0x20, 0x7e) ||
|
||||
firstChar == 0x09 || firstChar == 0x0a ||firstChar == 0x0d) {
|
||||
// UTF8-1 (without ctrl chars)
|
||||
} else if(in(firstChar, 0xc2, 0xdf)) {
|
||||
// UTF8-2
|
||||
if(++s == eos || !isUtf8Tail(*s)) {
|
||||
return false;
|
||||
}
|
||||
} else if(0xe0 == firstChar) {
|
||||
// UTF8-3
|
||||
if(++s == eos || !in(*s, 0xa0, 0xbf) ||
|
||||
++s == eos || !isUtf8Tail(*s)) {
|
||||
return false;
|
||||
}
|
||||
} else if(in(firstChar, 0xe1, 0xec) || in(firstChar, 0xee, 0xef)) {
|
||||
// UTF8-3
|
||||
if(++s == eos || !isUtf8Tail(*s) ||
|
||||
++s == eos || !isUtf8Tail(*s)) {
|
||||
return false;
|
||||
}
|
||||
} else if(0xed == firstChar) {
|
||||
// UTF8-3
|
||||
if(++s == eos || !in(*s, 0x80, 0x9f) ||
|
||||
++s == eos || !isUtf8Tail(*s)) {
|
||||
return false;
|
||||
}
|
||||
} else if(0xf0 == firstChar) {
|
||||
// UTF8-4
|
||||
if(++s == eos || !in(*s, 0x90, 0xbf) ||
|
||||
++s == eos || !isUtf8Tail(*s) ||
|
||||
++s == eos || !isUtf8Tail(*s)) {
|
||||
return false;
|
||||
}
|
||||
} else if(in(firstChar, 0xf1, 0xf3)) {
|
||||
// UTF8-4
|
||||
if(++s == eos || !isUtf8Tail(*s) ||
|
||||
++s == eos || !isUtf8Tail(*s) ||
|
||||
++s == eos || !isUtf8Tail(*s)) {
|
||||
return false;
|
||||
}
|
||||
} else if(0xf4 == firstChar) {
|
||||
// UTF8-4
|
||||
if(++s == eos || !in(*s, 0x80, 0x8f) ||
|
||||
++s == eos || !isUtf8Tail(*s) ||
|
||||
++s == eos || !isUtf8Tail(*s)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string percentEncode(const unsigned char* target, size_t len) {
|
||||
std::string dest;
|
||||
for(size_t i = 0; i < len; ++i) {
|
||||
|
@ -1406,6 +1480,21 @@ void executeHookByOptName
|
|||
}
|
||||
}
|
||||
|
||||
std::string createSafePath
|
||||
(const std::string& dir, const std::string& filename)
|
||||
{
|
||||
return util::applyDir
|
||||
(dir,
|
||||
util::isUtf8(filename)?
|
||||
util::fixTaintedBasename(filename):
|
||||
util::escapePath(util::percentEncode(filename)));
|
||||
}
|
||||
|
||||
std::string encodeNonUtf8(const std::string& s)
|
||||
{
|
||||
return util::isUtf8(s)?s:util::percentEncode(s);
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
|
||||
} // namespace aria2
|
||||
|
|
|
@ -148,6 +148,8 @@ bool inRFC3986ReservedChars(const char c);
|
|||
|
||||
bool inRFC3986UnreservedChars(const char c);
|
||||
|
||||
bool isUtf8(const std::string& str);
|
||||
|
||||
std::string percentDecode(const std::string& target);
|
||||
|
||||
std::string torrentPercentEncode(const unsigned char* target, size_t len);
|
||||
|
@ -405,6 +407,10 @@ void executeHookByOptName
|
|||
void executeHookByOptName
|
||||
(const RequestGroup* group, const Option* option, const std::string& opt);
|
||||
|
||||
std::string createSafePath(const std::string& dir, const std::string& filename);
|
||||
|
||||
std::string encodeNonUtf8(const std::string& s);
|
||||
|
||||
} // namespace util
|
||||
|
||||
} // namespace aria2
|
||||
|
|
|
@ -52,6 +52,8 @@ class BittorrentHelperTest:public CppUnit::TestFixture {
|
|||
CPPUNIT_TEST(testLoadFromMemory_overrideName);
|
||||
CPPUNIT_TEST(testLoadFromMemory_multiFileDirTraversal);
|
||||
CPPUNIT_TEST(testLoadFromMemory_singleFileDirTraversal);
|
||||
CPPUNIT_TEST(testLoadFromMemory_multiFileNonUtf8Path);
|
||||
CPPUNIT_TEST(testLoadFromMemory_singleFileNonUtf8Path);
|
||||
CPPUNIT_TEST(testGetNodes);
|
||||
CPPUNIT_TEST(testGetBasePath);
|
||||
CPPUNIT_TEST(testSetFileFilter_single);
|
||||
|
@ -102,6 +104,8 @@ public:
|
|||
void testLoadFromMemory_overrideName();
|
||||
void testLoadFromMemory_multiFileDirTraversal();
|
||||
void testLoadFromMemory_singleFileDirTraversal();
|
||||
void testLoadFromMemory_multiFileNonUtf8Path();
|
||||
void testLoadFromMemory_singleFileNonUtf8Path();
|
||||
void testGetNodes();
|
||||
void testGetBasePath();
|
||||
void testSetFileFilter_single();
|
||||
|
@ -400,6 +404,50 @@ void BittorrentHelperTest::testGetFileEntries_singleFileUrlListEndsWithSlash() {
|
|||
uris1[0]);
|
||||
}
|
||||
|
||||
void BittorrentHelperTest::testLoadFromMemory_multiFileNonUtf8Path()
|
||||
{
|
||||
SharedHandle<List> path = List::g();
|
||||
path->append("path");
|
||||
path->append(util::fromHex("90a28a")+"E");
|
||||
SharedHandle<Dict> file = Dict::g();
|
||||
file->put("length", Integer::g(1024));
|
||||
file->put("path", path);
|
||||
SharedHandle<List> files = List::g();
|
||||
files->append(file);
|
||||
SharedHandle<Dict> info = Dict::g();
|
||||
info->put("files", files);
|
||||
info->put("piece length", Integer::g(1024));
|
||||
info->put("pieces", "01234567890123456789");
|
||||
info->put("name", util::fromHex("1b")+"$B%O%m!<"+util::fromHex("1b")+"(B");
|
||||
Dict dict;
|
||||
dict.put("info", info);
|
||||
SharedHandle<DownloadContext> dctx(new DownloadContext());
|
||||
loadFromMemory(bencode2::encode(&dict), dctx, "default");
|
||||
|
||||
const SharedHandle<FileEntry>& fe = dctx->getFirstFileEntry();
|
||||
CPPUNIT_ASSERT_EQUAL
|
||||
(std::string("./%1B%24B%25O%25m%21%3C%1B%28B/path/%90%A2%8AE"),
|
||||
fe->getPath());
|
||||
CPPUNIT_ASSERT_EQUAL
|
||||
(std::string("./%1B%24B%25O%25m%21%3C%1B%28B"), dctx->getBasePath());
|
||||
}
|
||||
|
||||
void BittorrentHelperTest::testLoadFromMemory_singleFileNonUtf8Path()
|
||||
{
|
||||
SharedHandle<Dict> info = Dict::g();
|
||||
info->put("piece length", Integer::g(1024));
|
||||
info->put("pieces", "01234567890123456789");
|
||||
info->put("name", util::fromHex("90a28a")+"E");
|
||||
info->put("length", Integer::g(1024));
|
||||
Dict dict;
|
||||
dict.put("info", info);
|
||||
SharedHandle<DownloadContext> dctx(new DownloadContext());
|
||||
loadFromMemory(bencode2::encode(&dict), dctx, "default");
|
||||
|
||||
const SharedHandle<FileEntry>& fe = dctx->getFirstFileEntry();
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("./%90%A2%8AE"), fe->getPath());
|
||||
}
|
||||
|
||||
void BittorrentHelperTest::testLoadFromMemory()
|
||||
{
|
||||
std::string memory = "d8:announce36:http://aria.rednoah.com/announce.php13:announce-listll16:http://tracker1 el15:http://tracker2el15:http://tracker3ee7:comment17:REDNOAH.COM RULES13:creation datei1123456789e4:infod5:filesld6:lengthi284e4:pathl5:aria23:src6:aria2ceed6:lengthi100e4:pathl19:aria2-0.2.2.tar.bz2eee4:name10:aria2-test12:piece lengthi128e6:pieces60:AAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCee";
|
||||
|
|
|
@ -65,6 +65,7 @@ class UtilTest:public CppUnit::TestFixture {
|
|||
CPPUNIT_TEST(testEscapePath);
|
||||
CPPUNIT_TEST(testGetCidrPrefix);
|
||||
CPPUNIT_TEST(testInSameCidrBlock);
|
||||
CPPUNIT_TEST(testIsUtf8String);
|
||||
CPPUNIT_TEST_SUITE_END();
|
||||
private:
|
||||
|
||||
|
@ -118,6 +119,7 @@ public:
|
|||
void testEscapePath();
|
||||
void testGetCidrPrefix();
|
||||
void testInSameCidrBlock();
|
||||
void testIsUtf8String();
|
||||
};
|
||||
|
||||
|
||||
|
@ -1098,4 +1100,36 @@ void UtilTest::testInSameCidrBlock()
|
|||
CPPUNIT_ASSERT(!util::inSameCidrBlock("192.168.128.1", "192.168.0.1", 17));
|
||||
}
|
||||
|
||||
void UtilTest::testIsUtf8String()
|
||||
{
|
||||
CPPUNIT_ASSERT(util::isUtf8("ascii"));
|
||||
// "Hello World" in Japanese UTF-8
|
||||
CPPUNIT_ASSERT(util::isUtf8
|
||||
(util::fromHex("e38193e38293e381abe381a1e381afe4b896e7958c")));
|
||||
// "World" in Shift_JIS
|
||||
CPPUNIT_ASSERT(!util::isUtf8(util::fromHex("90a28a")+"E"));
|
||||
// UTF8-2
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("c280")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("dfbf")));
|
||||
// UTF8-3
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("e0a080")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("e0bf80")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("e18080")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ec8080")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ed8080")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ed9f80")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ee8080")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ef8080")));
|
||||
// UTF8-4
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f0908080")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f0bf8080")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f1808080")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f3808080")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f4808080")));
|
||||
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f48f8080")));
|
||||
|
||||
CPPUNIT_ASSERT(util::isUtf8(""));
|
||||
CPPUNIT_ASSERT(!util::isUtf8(util::fromHex("00")));
|
||||
}
|
||||
|
||||
} // namespace aria2
|
||||
|
|
Loading…
Reference in New Issue