mirror of https://github.com/aria2/aria2
Rewrite URI path component normalization
parent
f022444d81
commit
126a4bde61
156
src/uri.cc
156
src/uri.cc
|
@ -209,6 +209,148 @@ std::string construct(const UriStruct& us)
|
|||
return res;
|
||||
}
|
||||
|
||||
enum {
|
||||
NPATH_START,
|
||||
NPATH_SLASH,
|
||||
NPATH_SDOT,
|
||||
NPATH_DDOT,
|
||||
NPATH_PATHCOMP
|
||||
};
|
||||
|
||||
std::string normalizePath(std::string path)
|
||||
{
|
||||
std::string::iterator begin = path.begin(), out = begin;
|
||||
int state = NPATH_START;
|
||||
bool startWithSlash = false;
|
||||
std::vector<int> range;
|
||||
// 32 is arbitrary
|
||||
range.reserve(32);
|
||||
for(std::string::iterator in = begin, eoi = path.end(); in != eoi; ++in) {
|
||||
switch(state) {
|
||||
case NPATH_START:
|
||||
switch(*in) {
|
||||
case '.':
|
||||
state = NPATH_SDOT;
|
||||
range.push_back(in-begin);
|
||||
break;
|
||||
case '/':
|
||||
startWithSlash = true;
|
||||
state = NPATH_SLASH;
|
||||
break;
|
||||
default:
|
||||
state = NPATH_PATHCOMP;
|
||||
range.push_back(in-begin);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NPATH_SLASH:
|
||||
switch(*in) {
|
||||
case '.':
|
||||
state = NPATH_SDOT;
|
||||
range.push_back(in-begin);
|
||||
break;
|
||||
case '/':
|
||||
// drop duplicate '/'
|
||||
break;
|
||||
default:
|
||||
state = NPATH_PATHCOMP;
|
||||
range.push_back(in-begin);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NPATH_SDOT:
|
||||
switch(*in) {
|
||||
case '.':
|
||||
state = NPATH_DDOT;
|
||||
break;
|
||||
case '/':
|
||||
// drop path component '.'
|
||||
state = NPATH_SLASH;
|
||||
range.pop_back();
|
||||
break;
|
||||
default:
|
||||
state = NPATH_PATHCOMP;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NPATH_DDOT:
|
||||
switch(*in) {
|
||||
case '/':
|
||||
// drop previous path component before '..'
|
||||
for(int i = 0; i < 3 && !range.empty(); ++i) {
|
||||
range.pop_back();
|
||||
}
|
||||
state = NPATH_SLASH;
|
||||
break;
|
||||
default:
|
||||
state = NPATH_PATHCOMP;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NPATH_PATHCOMP:
|
||||
if(*in == '/') {
|
||||
range.push_back(in+1-begin);
|
||||
state = NPATH_SLASH;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
switch(state) {
|
||||
case NPATH_SDOT:
|
||||
range.pop_back();
|
||||
break;
|
||||
case NPATH_DDOT:
|
||||
for(int i = 0; i < 3 && !range.empty(); ++i) {
|
||||
range.pop_back();
|
||||
}
|
||||
break;
|
||||
case NPATH_PATHCOMP:
|
||||
range.push_back(path.end()-begin);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if(startWithSlash) {
|
||||
++out;
|
||||
}
|
||||
for(int i = 0; i < (int)range.size(); i += 2) {
|
||||
std::string::iterator a = begin+range[i];
|
||||
std::string::iterator b = begin+range[i+1];
|
||||
if(a == out) {
|
||||
out = b;
|
||||
} else {
|
||||
out = std::copy(a, b, out);
|
||||
}
|
||||
}
|
||||
path.erase(out, path.end());
|
||||
return path;
|
||||
}
|
||||
|
||||
namespace {
|
||||
std::string joinPath(std::string basePath,
|
||||
std::string::const_iterator newPathFirst,
|
||||
std::string::const_iterator newPathLast)
|
||||
{
|
||||
if(newPathFirst == newPathLast) {
|
||||
return basePath;
|
||||
} else if(basePath.empty() || *newPathFirst == '/') {
|
||||
return normalizePath(std::string(newPathFirst, newPathLast));
|
||||
} else if(basePath[basePath.size()-1] == '/') {
|
||||
basePath.append(newPathFirst, newPathLast);
|
||||
return normalizePath(basePath);
|
||||
} else {
|
||||
basePath += "/";
|
||||
basePath.append(newPathFirst, newPathLast);
|
||||
return normalizePath(basePath);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::string joinPath(const std::string& basePath, const std::string& newPath)
|
||||
{
|
||||
return joinPath(basePath, newPath.begin(), newPath.end());
|
||||
}
|
||||
|
||||
std::string joinUri(const std::string& baseUri, const std::string& uri)
|
||||
{
|
||||
UriStruct us;
|
||||
|
@ -219,11 +361,6 @@ std::string joinUri(const std::string& baseUri, const std::string& uri)
|
|||
if(!parse(bus, baseUri)) {
|
||||
return uri;
|
||||
}
|
||||
std::vector<std::string> parts;
|
||||
if(uri.empty() || uri[0] != '/') {
|
||||
util::split(bus.dir.begin(), bus.dir.end(), std::back_inserter(parts),
|
||||
'/');
|
||||
}
|
||||
std::string::const_iterator qend;
|
||||
for(qend = uri.begin(); qend != uri.end(); ++qend) {
|
||||
if(*qend == '#') {
|
||||
|
@ -236,14 +373,15 @@ std::string joinUri(const std::string& baseUri, const std::string& uri)
|
|||
break;
|
||||
}
|
||||
}
|
||||
util::split(uri.begin(), end, std::back_inserter(parts), '/');
|
||||
std::string newpath = joinPath(bus.dir, uri.begin(), end);
|
||||
bus.dir.clear();
|
||||
bus.file.clear();
|
||||
bus.query.clear();
|
||||
std::string res = construct(bus);
|
||||
res += util::joinPath(parts.begin(), parts.end());
|
||||
if((uri.begin() == end || *(end-1) == '/') && *(res.end()-1) != '/') {
|
||||
res += "/";
|
||||
if(!newpath.empty()) {
|
||||
// res always ends with '/'. Since bus.dir also starts with '/',
|
||||
// regardless of uri, newpath always starts with '/'.
|
||||
res.append(newpath.begin()+1, newpath.end());
|
||||
}
|
||||
res.append(end, qend);
|
||||
return res;
|
||||
|
|
|
@ -82,6 +82,14 @@ std::string construct(const UriStruct& us);
|
|||
|
||||
std::string joinUri(const std::string& baseUri, const std::string& uri);
|
||||
|
||||
std::string joinPath(const std::string& basePath, const std::string& newPath);
|
||||
|
||||
// Normalizes path so that: 1) it does not contain successive / and 2)
|
||||
// resolve path component '.' and '..'. If there is not enough path
|
||||
// component to resolve '..', those '..' are discarded. The resulting
|
||||
// path starts / only if path starts with /.
|
||||
std::string normalizePath(std::string path);
|
||||
|
||||
} // namespace uri
|
||||
|
||||
} // namespace aria2
|
||||
|
|
|
@ -36,6 +36,7 @@ class UriTest:public CppUnit::TestFixture {
|
|||
CPPUNIT_TEST(testConstruct);
|
||||
CPPUNIT_TEST(testSwap);
|
||||
CPPUNIT_TEST(testJoinUri);
|
||||
CPPUNIT_TEST(testJoinPath);
|
||||
CPPUNIT_TEST_SUITE_END();
|
||||
|
||||
public:
|
||||
|
@ -66,6 +67,7 @@ public:
|
|||
void testConstruct();
|
||||
void testSwap();
|
||||
void testJoinUri();
|
||||
void testJoinPath();
|
||||
};
|
||||
|
||||
|
||||
|
@ -527,6 +529,40 @@ void UriTest::testJoinUri()
|
|||
"/file#a?q=x"));
|
||||
}
|
||||
|
||||
void UriTest::testJoinPath()
|
||||
{
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/b"), joinPath("/a", "/b"));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo"),
|
||||
joinPath("/alpha", "bravo"));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/bravo"),
|
||||
joinPath("/a", "/alpha/../bravo"));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/charlie/"),
|
||||
joinPath("/a", "/alpha/bravo/../charlie/"));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
|
||||
joinPath("/a", "/alpha////bravo//"));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
|
||||
joinPath("/a", "/alpha/././bravo/"));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
|
||||
joinPath("/a", "/alpha/bravo/./"));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
|
||||
joinPath("/a", "/alpha/bravo/."));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/"),
|
||||
joinPath("/a", "/alpha/bravo/.."));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/"),
|
||||
joinPath("/", "../alpha/"));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/bravo/"),
|
||||
joinPath("/alpha", "../bravo/"));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("/bravo/"),
|
||||
joinPath("/alpha", "../../bravo/"));
|
||||
// If neither paths do not start with '/', the resulting path also
|
||||
// does not start with '/'.
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("alpha/bravo"),
|
||||
joinPath("alpha", "bravo"));
|
||||
CPPUNIT_ASSERT_EQUAL(std::string("bravo/"),
|
||||
joinPath("alpha", "../../bravo/"));
|
||||
|
||||
}
|
||||
|
||||
} // namespace uri
|
||||
|
||||
} // namespace aria2
|
||||
|
|
Loading…
Reference in New Issue