diff --git a/ChangeLog b/ChangeLog index 3847493f..bc973a45 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +2010-10-02 Tatsuhiro Tsujikawa + + Non-UTF8 filenames are now percent-encoded. For example, filename + for http://example.org/%90%A2%8AE will be %90%A2%8AE because it is + Shift_JIS. The comments and name in .torrent file in XML-RPC + response are percent-encoded if they are not UTF-8. + * src/FtpNegotiationCommand.cc + * src/HttpRequestCommand.cc + * src/HttpResponseCommand.cc + * src/XmlRpcMethodImpl.cc + * src/bittorrent_helper.cc + * src/util.cc + * src/util.h + * test/BittorrentHelperTest.cc + * test/UtilTest.cc + 2010-09-26 Tatsuhiro Tsujikawa Renamed TripletGet as TupleGet. Renamed TripletNthType as diff --git a/src/FtpNegotiationCommand.cc b/src/FtpNegotiationCommand.cc index ae1e1c9d..ce6c2b6b 100644 --- a/src/FtpNegotiationCommand.cc +++ b/src/FtpNegotiationCommand.cc @@ -377,10 +377,9 @@ bool FtpNegotiationCommand::onFileSizeDetermined(uint64_t totalLength) getFileEntry()->setLength(totalLength); if(getFileEntry()->getPath().empty()) { getFileEntry()->setPath - (util::applyDir + (util::createSafePath (getDownloadContext()->getDir(), - util::fixTaintedBasename - (util::percentDecode(getRequest()->getFile())))); + util::percentDecode(getRequest()->getFile()))); } getRequestGroup()->preDownloadProcessing(); if(getDownloadEngine()->getRequestGroupMan()-> diff --git a/src/HttpRequestCommand.cc b/src/HttpRequestCommand.cc index 852f6bb1..bc446505 100644 --- a/src/HttpRequestCommand.cc +++ b/src/HttpRequestCommand.cc @@ -159,9 +159,9 @@ bool HttpRequestCommand::executeInternal() { } else { if(getFileEntry()->getPath().empty()) { getFileEntry()->setPath - (util::applyDir + (util::createSafePath (getDownloadContext()->getDir(), - util::fixTaintedBasename(getRequest()->getFile()))); + util::percentDecode(getRequest()->getFile()))); } File ctrlfile(getFileEntry()->getPath()+ DefaultBtProgressInfoFile::getSuffix()); diff --git a/src/HttpResponseCommand.cc b/src/HttpResponseCommand.cc index ae741fc3..4c160d0d 100644 --- a/src/HttpResponseCommand.cc +++ b/src/HttpResponseCommand.cc @@ -168,9 +168,8 @@ bool HttpResponseCommand::executeInternal() getFileEntry()->setLength(totalLength); if(getFileEntry()->getPath().empty()) { getFileEntry()->setPath - (util::applyDir - (getDownloadContext()->getDir(), - util::fixTaintedBasename(httpResponse->determinFilename()))); + (util::createSafePath + (getDownloadContext()->getDir(), httpResponse->determinFilename())); } getFileEntry()->setContentType(httpResponse->getContentType()); getRequestGroup()->preDownloadProcessing(); diff --git a/src/XmlRpcMethodImpl.cc b/src/XmlRpcMethodImpl.cc index 3e3d63a7..6a56e70d 100644 --- a/src/XmlRpcMethodImpl.cc +++ b/src/XmlRpcMethodImpl.cc @@ -597,7 +597,7 @@ void gatherBitTorrentMetadata const SharedHandle& torrentAttrs) { if(!torrentAttrs->comment.empty()) { - btDict->put(KEY_COMMENT, torrentAttrs->comment); + btDict->put(KEY_COMMENT, util::encodeNonUtf8(torrentAttrs->comment)); } if(torrentAttrs->creationDate) { btDict->put(KEY_CREATION_DATE, Integer::g(torrentAttrs->creationDate)); @@ -619,7 +619,7 @@ void gatherBitTorrentMetadata btDict->put(KEY_ANNOUNCE_LIST, destAnnounceList); if(!torrentAttrs->metadata.empty()) { SharedHandle infoDict = Dict::g(); - infoDict->put(KEY_NAME, torrentAttrs->name); + infoDict->put(KEY_NAME, util::encodeNonUtf8(torrentAttrs->name)); btDict->put(KEY_INFO, infoDict); } } diff --git a/src/bittorrent_helper.cc b/src/bittorrent_helper.cc index bfa204ce..a54ab525 100644 --- a/src/bittorrent_helper.cc +++ b/src/bittorrent_helper.cc @@ -187,6 +187,7 @@ static void extractFileEntries const std::vector& urlList) { std::string name; + std::string utf8Name; if(overrideName.empty()) { std::string nameKey; if(infoDict->containsKey(C_NAME_UTF8)) { @@ -196,17 +197,18 @@ static void extractFileEntries } const String* nameData = asString(infoDict->get(nameKey)); if(nameData) { - if(util::detectDirTraversal(nameData->s())) { + utf8Name = util::encodeNonUtf8(nameData->s()); + if(util::detectDirTraversal(utf8Name)) { throw DL_ABORT_EX (StringFormat (MSG_DIR_TRAVERSAL_DETECTED,nameData->s().c_str()).str()); } name = nameData->s(); } else { - name = strconcat(File(defaultName).getBasename(), ".file"); + name = utf8Name = strconcat(File(defaultName).getBasename(), ".file"); } } else { - name = overrideName; + name = utf8Name = overrideName; } torrent->name = name; std::vector > fileEntries; @@ -255,9 +257,11 @@ static void extractFileEntries } } std::string path = strjoin(pathelem.begin(), pathelem.end(), '/'); - if(util::detectDirTraversal(path)) { + std::string utf8Path = strjoin(pathelem.begin(), pathelem.end(), '/', + std::ptr_fun(util::encodeNonUtf8)); + if(util::detectDirTraversal(utf8Path)) { throw DL_ABORT_EX - (StringFormat(MSG_DIR_TRAVERSAL_DETECTED, path.c_str()).str()); + (StringFormat(MSG_DIR_TRAVERSAL_DETECTED, utf8Path.c_str()).str()); } std::string pePath = strjoin(pathelem.begin(), pathelem.end(), '/', @@ -266,9 +270,8 @@ static void extractFileEntries std::vector uris; createUri(urlList.begin(), urlList.end(),std::back_inserter(uris),pePath); SharedHandle fileEntry - (new FileEntry(util::applyDir(ctx->getDir(), util::escapePath(path)), - fileLengthData->i(), - offset, uris)); + (new FileEntry(util::applyDir(ctx->getDir(),util::escapePath(utf8Path)), + fileLengthData->i(), offset, uris)); fileEntry->setOriginalName(path); fileEntries.push_back(fileEntry); offset += fileEntry->getLength(); @@ -294,17 +297,15 @@ static void extractFileEntries uris.push_back(*i); } } - SharedHandle fileEntry - (new FileEntry(util::applyDir(ctx->getDir(), util::escapePath(name)), - totalLength, 0, - uris)); + (new FileEntry(util::applyDir(ctx->getDir(), util::escapePath(utf8Name)), + totalLength, 0, uris)); fileEntry->setOriginalName(name); fileEntries.push_back(fileEntry); } ctx->setFileEntries(fileEntries.begin(), fileEntries.end()); if(torrent->mode == MULTI) { - ctx->setBasePath(util::applyDir(ctx->getDir(), name)); + ctx->setBasePath(util::applyDir(ctx->getDir(), utf8Name)); } } diff --git a/src/util.cc b/src/util.cc index 6a0944e4..1022aa9c 100644 --- a/src/util.cc +++ b/src/util.cc @@ -291,6 +291,80 @@ bool inRFC2616HttpToken(const char c) std::find(vbegin(chars), vend(chars), c) != vend(chars); } +namespace { +bool in(unsigned char ch, unsigned char s, unsigned char t) +{ + return s <= ch && ch <= t; +} +} + +namespace { +bool isUtf8Tail(unsigned char ch) +{ + return in(ch, 0x80, 0xbf); +} +} + +bool isUtf8(const std::string& str) +{ + for(std::string::const_iterator s = str.begin(), eos = str.end(); s != eos; + ++s) { + unsigned char firstChar = *s; + // See ABNF in http://tools.ietf.org/search/rfc3629#section-4 + if(in(firstChar, 0x20, 0x7e) || + firstChar == 0x09 || firstChar == 0x0a ||firstChar == 0x0d) { + // UTF8-1 (without ctrl chars) + } else if(in(firstChar, 0xc2, 0xdf)) { + // UTF8-2 + if(++s == eos || !isUtf8Tail(*s)) { + return false; + } + } else if(0xe0 == firstChar) { + // UTF8-3 + if(++s == eos || !in(*s, 0xa0, 0xbf) || + ++s == eos || !isUtf8Tail(*s)) { + return false; + } + } else if(in(firstChar, 0xe1, 0xec) || in(firstChar, 0xee, 0xef)) { + // UTF8-3 + if(++s == eos || !isUtf8Tail(*s) || + ++s == eos || !isUtf8Tail(*s)) { + return false; + } + } else if(0xed == firstChar) { + // UTF8-3 + if(++s == eos || !in(*s, 0x80, 0x9f) || + ++s == eos || !isUtf8Tail(*s)) { + return false; + } + } else if(0xf0 == firstChar) { + // UTF8-4 + if(++s == eos || !in(*s, 0x90, 0xbf) || + ++s == eos || !isUtf8Tail(*s) || + ++s == eos || !isUtf8Tail(*s)) { + return false; + } + } else if(in(firstChar, 0xf1, 0xf3)) { + // UTF8-4 + if(++s == eos || !isUtf8Tail(*s) || + ++s == eos || !isUtf8Tail(*s) || + ++s == eos || !isUtf8Tail(*s)) { + return false; + } + } else if(0xf4 == firstChar) { + // UTF8-4 + if(++s == eos || !in(*s, 0x80, 0x8f) || + ++s == eos || !isUtf8Tail(*s) || + ++s == eos || !isUtf8Tail(*s)) { + return false; + } + } else { + return false; + } + } + return true; +} + std::string percentEncode(const unsigned char* target, size_t len) { std::string dest; for(size_t i = 0; i < len; ++i) { @@ -1406,6 +1480,21 @@ void executeHookByOptName } } +std::string createSafePath +(const std::string& dir, const std::string& filename) +{ + return util::applyDir + (dir, + util::isUtf8(filename)? + util::fixTaintedBasename(filename): + util::escapePath(util::percentEncode(filename))); +} + +std::string encodeNonUtf8(const std::string& s) +{ + return util::isUtf8(s)?s:util::percentEncode(s); +} + } // namespace util } // namespace aria2 diff --git a/src/util.h b/src/util.h index af241569..e47d6a67 100644 --- a/src/util.h +++ b/src/util.h @@ -148,6 +148,8 @@ bool inRFC3986ReservedChars(const char c); bool inRFC3986UnreservedChars(const char c); +bool isUtf8(const std::string& str); + std::string percentDecode(const std::string& target); std::string torrentPercentEncode(const unsigned char* target, size_t len); @@ -405,6 +407,10 @@ void executeHookByOptName void executeHookByOptName (const RequestGroup* group, const Option* option, const std::string& opt); +std::string createSafePath(const std::string& dir, const std::string& filename); + +std::string encodeNonUtf8(const std::string& s); + } // namespace util } // namespace aria2 diff --git a/test/BittorrentHelperTest.cc b/test/BittorrentHelperTest.cc index dbb2b388..531660b1 100644 --- a/test/BittorrentHelperTest.cc +++ b/test/BittorrentHelperTest.cc @@ -52,6 +52,8 @@ class BittorrentHelperTest:public CppUnit::TestFixture { CPPUNIT_TEST(testLoadFromMemory_overrideName); CPPUNIT_TEST(testLoadFromMemory_multiFileDirTraversal); CPPUNIT_TEST(testLoadFromMemory_singleFileDirTraversal); + CPPUNIT_TEST(testLoadFromMemory_multiFileNonUtf8Path); + CPPUNIT_TEST(testLoadFromMemory_singleFileNonUtf8Path); CPPUNIT_TEST(testGetNodes); CPPUNIT_TEST(testGetBasePath); CPPUNIT_TEST(testSetFileFilter_single); @@ -102,6 +104,8 @@ public: void testLoadFromMemory_overrideName(); void testLoadFromMemory_multiFileDirTraversal(); void testLoadFromMemory_singleFileDirTraversal(); + void testLoadFromMemory_multiFileNonUtf8Path(); + void testLoadFromMemory_singleFileNonUtf8Path(); void testGetNodes(); void testGetBasePath(); void testSetFileFilter_single(); @@ -400,6 +404,50 @@ void BittorrentHelperTest::testGetFileEntries_singleFileUrlListEndsWithSlash() { uris1[0]); } +void BittorrentHelperTest::testLoadFromMemory_multiFileNonUtf8Path() +{ + SharedHandle path = List::g(); + path->append("path"); + path->append(util::fromHex("90a28a")+"E"); + SharedHandle file = Dict::g(); + file->put("length", Integer::g(1024)); + file->put("path", path); + SharedHandle files = List::g(); + files->append(file); + SharedHandle info = Dict::g(); + info->put("files", files); + info->put("piece length", Integer::g(1024)); + info->put("pieces", "01234567890123456789"); + info->put("name", util::fromHex("1b")+"$B%O%m!<"+util::fromHex("1b")+"(B"); + Dict dict; + dict.put("info", info); + SharedHandle dctx(new DownloadContext()); + loadFromMemory(bencode2::encode(&dict), dctx, "default"); + + const SharedHandle& fe = dctx->getFirstFileEntry(); + CPPUNIT_ASSERT_EQUAL + (std::string("./%1B%24B%25O%25m%21%3C%1B%28B/path/%90%A2%8AE"), + fe->getPath()); + CPPUNIT_ASSERT_EQUAL + (std::string("./%1B%24B%25O%25m%21%3C%1B%28B"), dctx->getBasePath()); +} + +void BittorrentHelperTest::testLoadFromMemory_singleFileNonUtf8Path() +{ + SharedHandle info = Dict::g(); + info->put("piece length", Integer::g(1024)); + info->put("pieces", "01234567890123456789"); + info->put("name", util::fromHex("90a28a")+"E"); + info->put("length", Integer::g(1024)); + Dict dict; + dict.put("info", info); + SharedHandle dctx(new DownloadContext()); + loadFromMemory(bencode2::encode(&dict), dctx, "default"); + + const SharedHandle& fe = dctx->getFirstFileEntry(); + CPPUNIT_ASSERT_EQUAL(std::string("./%90%A2%8AE"), fe->getPath()); +} + void BittorrentHelperTest::testLoadFromMemory() { std::string memory = "d8:announce36:http://aria.rednoah.com/announce.php13:announce-listll16:http://tracker1 el15:http://tracker2el15:http://tracker3ee7:comment17:REDNOAH.COM RULES13:creation datei1123456789e4:infod5:filesld6:lengthi284e4:pathl5:aria23:src6:aria2ceed6:lengthi100e4:pathl19:aria2-0.2.2.tar.bz2eee4:name10:aria2-test12:piece lengthi128e6:pieces60:AAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCee"; diff --git a/test/UtilTest.cc b/test/UtilTest.cc index 6a3700da..be255fd3 100644 --- a/test/UtilTest.cc +++ b/test/UtilTest.cc @@ -65,6 +65,7 @@ class UtilTest:public CppUnit::TestFixture { CPPUNIT_TEST(testEscapePath); CPPUNIT_TEST(testGetCidrPrefix); CPPUNIT_TEST(testInSameCidrBlock); + CPPUNIT_TEST(testIsUtf8String); CPPUNIT_TEST_SUITE_END(); private: @@ -118,6 +119,7 @@ public: void testEscapePath(); void testGetCidrPrefix(); void testInSameCidrBlock(); + void testIsUtf8String(); }; @@ -1098,4 +1100,36 @@ void UtilTest::testInSameCidrBlock() CPPUNIT_ASSERT(!util::inSameCidrBlock("192.168.128.1", "192.168.0.1", 17)); } +void UtilTest::testIsUtf8String() +{ + CPPUNIT_ASSERT(util::isUtf8("ascii")); + // "Hello World" in Japanese UTF-8 + CPPUNIT_ASSERT(util::isUtf8 + (util::fromHex("e38193e38293e381abe381a1e381afe4b896e7958c"))); + // "World" in Shift_JIS + CPPUNIT_ASSERT(!util::isUtf8(util::fromHex("90a28a")+"E")); + // UTF8-2 + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("c280"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("dfbf"))); + // UTF8-3 + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("e0a080"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("e0bf80"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("e18080"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ec8080"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ed8080"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ed9f80"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ee8080"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ef8080"))); + // UTF8-4 + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f0908080"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f0bf8080"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f1808080"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f3808080"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f4808080"))); + CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f48f8080"))); + + CPPUNIT_ASSERT(util::isUtf8("")); + CPPUNIT_ASSERT(!util::isUtf8(util::fromHex("00"))); +} + } // namespace aria2