2010-10-02 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>

Non-UTF8 filenames are now percent-encoded.  For example, filename
	for http://example.org/%90%A2%8AE will be %90%A2%8AE because it is
	Shift_JIS. The comments and name in .torrent file in XML-RPC
	response are percent-encoded if they are not UTF-8.
	* src/FtpNegotiationCommand.cc
	* src/HttpRequestCommand.cc
	* src/HttpResponseCommand.cc
	* src/XmlRpcMethodImpl.cc
	* src/bittorrent_helper.cc
	* src/util.cc
	* src/util.h
	* test/BittorrentHelperTest.cc
	* test/UtilTest.cc
pull/1/head
Tatsuhiro Tsujikawa 2010-10-02 07:54:43 +00:00
parent 6c348f0493
commit d956ea0b70
10 changed files with 215 additions and 23 deletions

View File

@ -1,3 +1,19 @@
2010-10-02 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>
Non-UTF8 filenames are now percent-encoded. For example, filename
for http://example.org/%90%A2%8AE will be %90%A2%8AE because it is
Shift_JIS. The comments and name in .torrent file in XML-RPC
response are percent-encoded if they are not UTF-8.
* src/FtpNegotiationCommand.cc
* src/HttpRequestCommand.cc
* src/HttpResponseCommand.cc
* src/XmlRpcMethodImpl.cc
* src/bittorrent_helper.cc
* src/util.cc
* src/util.h
* test/BittorrentHelperTest.cc
* test/UtilTest.cc
2010-09-26 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>
Renamed TripletGet as TupleGet. Renamed TripletNthType as

View File

@ -377,10 +377,9 @@ bool FtpNegotiationCommand::onFileSizeDetermined(uint64_t totalLength)
getFileEntry()->setLength(totalLength);
if(getFileEntry()->getPath().empty()) {
getFileEntry()->setPath
(util::applyDir
(util::createSafePath
(getDownloadContext()->getDir(),
util::fixTaintedBasename
(util::percentDecode(getRequest()->getFile()))));
util::percentDecode(getRequest()->getFile())));
}
getRequestGroup()->preDownloadProcessing();
if(getDownloadEngine()->getRequestGroupMan()->

View File

@ -159,9 +159,9 @@ bool HttpRequestCommand::executeInternal() {
} else {
if(getFileEntry()->getPath().empty()) {
getFileEntry()->setPath
(util::applyDir
(util::createSafePath
(getDownloadContext()->getDir(),
util::fixTaintedBasename(getRequest()->getFile())));
util::percentDecode(getRequest()->getFile())));
}
File ctrlfile(getFileEntry()->getPath()+
DefaultBtProgressInfoFile::getSuffix());

View File

@ -168,9 +168,8 @@ bool HttpResponseCommand::executeInternal()
getFileEntry()->setLength(totalLength);
if(getFileEntry()->getPath().empty()) {
getFileEntry()->setPath
(util::applyDir
(getDownloadContext()->getDir(),
util::fixTaintedBasename(httpResponse->determinFilename())));
(util::createSafePath
(getDownloadContext()->getDir(), httpResponse->determinFilename()));
}
getFileEntry()->setContentType(httpResponse->getContentType());
getRequestGroup()->preDownloadProcessing();

View File

@ -597,7 +597,7 @@ void gatherBitTorrentMetadata
const SharedHandle<TorrentAttribute>& torrentAttrs)
{
if(!torrentAttrs->comment.empty()) {
btDict->put(KEY_COMMENT, torrentAttrs->comment);
btDict->put(KEY_COMMENT, util::encodeNonUtf8(torrentAttrs->comment));
}
if(torrentAttrs->creationDate) {
btDict->put(KEY_CREATION_DATE, Integer::g(torrentAttrs->creationDate));
@ -619,7 +619,7 @@ void gatherBitTorrentMetadata
btDict->put(KEY_ANNOUNCE_LIST, destAnnounceList);
if(!torrentAttrs->metadata.empty()) {
SharedHandle<Dict> infoDict = Dict::g();
infoDict->put(KEY_NAME, torrentAttrs->name);
infoDict->put(KEY_NAME, util::encodeNonUtf8(torrentAttrs->name));
btDict->put(KEY_INFO, infoDict);
}
}

View File

@ -187,6 +187,7 @@ static void extractFileEntries
const std::vector<std::string>& urlList)
{
std::string name;
std::string utf8Name;
if(overrideName.empty()) {
std::string nameKey;
if(infoDict->containsKey(C_NAME_UTF8)) {
@ -196,17 +197,18 @@ static void extractFileEntries
}
const String* nameData = asString(infoDict->get(nameKey));
if(nameData) {
if(util::detectDirTraversal(nameData->s())) {
utf8Name = util::encodeNonUtf8(nameData->s());
if(util::detectDirTraversal(utf8Name)) {
throw DL_ABORT_EX
(StringFormat
(MSG_DIR_TRAVERSAL_DETECTED,nameData->s().c_str()).str());
}
name = nameData->s();
} else {
name = strconcat(File(defaultName).getBasename(), ".file");
name = utf8Name = strconcat(File(defaultName).getBasename(), ".file");
}
} else {
name = overrideName;
name = utf8Name = overrideName;
}
torrent->name = name;
std::vector<SharedHandle<FileEntry> > fileEntries;
@ -255,9 +257,11 @@ static void extractFileEntries
}
}
std::string path = strjoin(pathelem.begin(), pathelem.end(), '/');
if(util::detectDirTraversal(path)) {
std::string utf8Path = strjoin(pathelem.begin(), pathelem.end(), '/',
std::ptr_fun(util::encodeNonUtf8));
if(util::detectDirTraversal(utf8Path)) {
throw DL_ABORT_EX
(StringFormat(MSG_DIR_TRAVERSAL_DETECTED, path.c_str()).str());
(StringFormat(MSG_DIR_TRAVERSAL_DETECTED, utf8Path.c_str()).str());
}
std::string pePath =
strjoin(pathelem.begin(), pathelem.end(), '/',
@ -266,9 +270,8 @@ static void extractFileEntries
std::vector<std::string> uris;
createUri(urlList.begin(), urlList.end(),std::back_inserter(uris),pePath);
SharedHandle<FileEntry> fileEntry
(new FileEntry(util::applyDir(ctx->getDir(), util::escapePath(path)),
fileLengthData->i(),
offset, uris));
(new FileEntry(util::applyDir(ctx->getDir(),util::escapePath(utf8Path)),
fileLengthData->i(), offset, uris));
fileEntry->setOriginalName(path);
fileEntries.push_back(fileEntry);
offset += fileEntry->getLength();
@ -294,17 +297,15 @@ static void extractFileEntries
uris.push_back(*i);
}
}
SharedHandle<FileEntry> fileEntry
(new FileEntry(util::applyDir(ctx->getDir(), util::escapePath(name)),
totalLength, 0,
uris));
(new FileEntry(util::applyDir(ctx->getDir(), util::escapePath(utf8Name)),
totalLength, 0, uris));
fileEntry->setOriginalName(name);
fileEntries.push_back(fileEntry);
}
ctx->setFileEntries(fileEntries.begin(), fileEntries.end());
if(torrent->mode == MULTI) {
ctx->setBasePath(util::applyDir(ctx->getDir(), name));
ctx->setBasePath(util::applyDir(ctx->getDir(), utf8Name));
}
}

View File

@ -291,6 +291,80 @@ bool inRFC2616HttpToken(const char c)
std::find(vbegin(chars), vend(chars), c) != vend(chars);
}
namespace {
bool in(unsigned char ch, unsigned char s, unsigned char t)
{
return s <= ch && ch <= t;
}
}
namespace {
bool isUtf8Tail(unsigned char ch)
{
return in(ch, 0x80, 0xbf);
}
}
bool isUtf8(const std::string& str)
{
for(std::string::const_iterator s = str.begin(), eos = str.end(); s != eos;
++s) {
unsigned char firstChar = *s;
// See ABNF in http://tools.ietf.org/search/rfc3629#section-4
if(in(firstChar, 0x20, 0x7e) ||
firstChar == 0x09 || firstChar == 0x0a ||firstChar == 0x0d) {
// UTF8-1 (without ctrl chars)
} else if(in(firstChar, 0xc2, 0xdf)) {
// UTF8-2
if(++s == eos || !isUtf8Tail(*s)) {
return false;
}
} else if(0xe0 == firstChar) {
// UTF8-3
if(++s == eos || !in(*s, 0xa0, 0xbf) ||
++s == eos || !isUtf8Tail(*s)) {
return false;
}
} else if(in(firstChar, 0xe1, 0xec) || in(firstChar, 0xee, 0xef)) {
// UTF8-3
if(++s == eos || !isUtf8Tail(*s) ||
++s == eos || !isUtf8Tail(*s)) {
return false;
}
} else if(0xed == firstChar) {
// UTF8-3
if(++s == eos || !in(*s, 0x80, 0x9f) ||
++s == eos || !isUtf8Tail(*s)) {
return false;
}
} else if(0xf0 == firstChar) {
// UTF8-4
if(++s == eos || !in(*s, 0x90, 0xbf) ||
++s == eos || !isUtf8Tail(*s) ||
++s == eos || !isUtf8Tail(*s)) {
return false;
}
} else if(in(firstChar, 0xf1, 0xf3)) {
// UTF8-4
if(++s == eos || !isUtf8Tail(*s) ||
++s == eos || !isUtf8Tail(*s) ||
++s == eos || !isUtf8Tail(*s)) {
return false;
}
} else if(0xf4 == firstChar) {
// UTF8-4
if(++s == eos || !in(*s, 0x80, 0x8f) ||
++s == eos || !isUtf8Tail(*s) ||
++s == eos || !isUtf8Tail(*s)) {
return false;
}
} else {
return false;
}
}
return true;
}
std::string percentEncode(const unsigned char* target, size_t len) {
std::string dest;
for(size_t i = 0; i < len; ++i) {
@ -1406,6 +1480,21 @@ void executeHookByOptName
}
}
std::string createSafePath
(const std::string& dir, const std::string& filename)
{
return util::applyDir
(dir,
util::isUtf8(filename)?
util::fixTaintedBasename(filename):
util::escapePath(util::percentEncode(filename)));
}
std::string encodeNonUtf8(const std::string& s)
{
return util::isUtf8(s)?s:util::percentEncode(s);
}
} // namespace util
} // namespace aria2

View File

@ -148,6 +148,8 @@ bool inRFC3986ReservedChars(const char c);
bool inRFC3986UnreservedChars(const char c);
bool isUtf8(const std::string& str);
std::string percentDecode(const std::string& target);
std::string torrentPercentEncode(const unsigned char* target, size_t len);
@ -405,6 +407,10 @@ void executeHookByOptName
void executeHookByOptName
(const RequestGroup* group, const Option* option, const std::string& opt);
std::string createSafePath(const std::string& dir, const std::string& filename);
std::string encodeNonUtf8(const std::string& s);
} // namespace util
} // namespace aria2

View File

@ -52,6 +52,8 @@ class BittorrentHelperTest:public CppUnit::TestFixture {
CPPUNIT_TEST(testLoadFromMemory_overrideName);
CPPUNIT_TEST(testLoadFromMemory_multiFileDirTraversal);
CPPUNIT_TEST(testLoadFromMemory_singleFileDirTraversal);
CPPUNIT_TEST(testLoadFromMemory_multiFileNonUtf8Path);
CPPUNIT_TEST(testLoadFromMemory_singleFileNonUtf8Path);
CPPUNIT_TEST(testGetNodes);
CPPUNIT_TEST(testGetBasePath);
CPPUNIT_TEST(testSetFileFilter_single);
@ -102,6 +104,8 @@ public:
void testLoadFromMemory_overrideName();
void testLoadFromMemory_multiFileDirTraversal();
void testLoadFromMemory_singleFileDirTraversal();
void testLoadFromMemory_multiFileNonUtf8Path();
void testLoadFromMemory_singleFileNonUtf8Path();
void testGetNodes();
void testGetBasePath();
void testSetFileFilter_single();
@ -400,6 +404,50 @@ void BittorrentHelperTest::testGetFileEntries_singleFileUrlListEndsWithSlash() {
uris1[0]);
}
void BittorrentHelperTest::testLoadFromMemory_multiFileNonUtf8Path()
{
SharedHandle<List> path = List::g();
path->append("path");
path->append(util::fromHex("90a28a")+"E");
SharedHandle<Dict> file = Dict::g();
file->put("length", Integer::g(1024));
file->put("path", path);
SharedHandle<List> files = List::g();
files->append(file);
SharedHandle<Dict> info = Dict::g();
info->put("files", files);
info->put("piece length", Integer::g(1024));
info->put("pieces", "01234567890123456789");
info->put("name", util::fromHex("1b")+"$B%O%m!<"+util::fromHex("1b")+"(B");
Dict dict;
dict.put("info", info);
SharedHandle<DownloadContext> dctx(new DownloadContext());
loadFromMemory(bencode2::encode(&dict), dctx, "default");
const SharedHandle<FileEntry>& fe = dctx->getFirstFileEntry();
CPPUNIT_ASSERT_EQUAL
(std::string("./%1B%24B%25O%25m%21%3C%1B%28B/path/%90%A2%8AE"),
fe->getPath());
CPPUNIT_ASSERT_EQUAL
(std::string("./%1B%24B%25O%25m%21%3C%1B%28B"), dctx->getBasePath());
}
void BittorrentHelperTest::testLoadFromMemory_singleFileNonUtf8Path()
{
SharedHandle<Dict> info = Dict::g();
info->put("piece length", Integer::g(1024));
info->put("pieces", "01234567890123456789");
info->put("name", util::fromHex("90a28a")+"E");
info->put("length", Integer::g(1024));
Dict dict;
dict.put("info", info);
SharedHandle<DownloadContext> dctx(new DownloadContext());
loadFromMemory(bencode2::encode(&dict), dctx, "default");
const SharedHandle<FileEntry>& fe = dctx->getFirstFileEntry();
CPPUNIT_ASSERT_EQUAL(std::string("./%90%A2%8AE"), fe->getPath());
}
void BittorrentHelperTest::testLoadFromMemory()
{
std::string memory = "d8:announce36:http://aria.rednoah.com/announce.php13:announce-listll16:http://tracker1 el15:http://tracker2el15:http://tracker3ee7:comment17:REDNOAH.COM RULES13:creation datei1123456789e4:infod5:filesld6:lengthi284e4:pathl5:aria23:src6:aria2ceed6:lengthi100e4:pathl19:aria2-0.2.2.tar.bz2eee4:name10:aria2-test12:piece lengthi128e6:pieces60:AAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCee";

View File

@ -65,6 +65,7 @@ class UtilTest:public CppUnit::TestFixture {
CPPUNIT_TEST(testEscapePath);
CPPUNIT_TEST(testGetCidrPrefix);
CPPUNIT_TEST(testInSameCidrBlock);
CPPUNIT_TEST(testIsUtf8String);
CPPUNIT_TEST_SUITE_END();
private:
@ -118,6 +119,7 @@ public:
void testEscapePath();
void testGetCidrPrefix();
void testInSameCidrBlock();
void testIsUtf8String();
};
@ -1098,4 +1100,36 @@ void UtilTest::testInSameCidrBlock()
CPPUNIT_ASSERT(!util::inSameCidrBlock("192.168.128.1", "192.168.0.1", 17));
}
void UtilTest::testIsUtf8String()
{
CPPUNIT_ASSERT(util::isUtf8("ascii"));
// "Hello World" in Japanese UTF-8
CPPUNIT_ASSERT(util::isUtf8
(util::fromHex("e38193e38293e381abe381a1e381afe4b896e7958c")));
// "World" in Shift_JIS
CPPUNIT_ASSERT(!util::isUtf8(util::fromHex("90a28a")+"E"));
// UTF8-2
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("c280")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("dfbf")));
// UTF8-3
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("e0a080")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("e0bf80")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("e18080")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ec8080")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ed8080")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ed9f80")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ee8080")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("ef8080")));
// UTF8-4
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f0908080")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f0bf8080")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f1808080")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f3808080")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f4808080")));
CPPUNIT_ASSERT(util::isUtf8(util::fromHex("f48f8080")));
CPPUNIT_ASSERT(util::isUtf8(""));
CPPUNIT_ASSERT(!util::isUtf8(util::fromHex("00")));
}
} // namespace aria2