Rewrite URI path component normalization

pull/103/head
Tatsuhiro Tsujikawa 2013-06-18 21:27:01 +09:00
parent f022444d81
commit 126a4bde61
3 changed files with 191 additions and 9 deletions

View File

@ -209,6 +209,148 @@ std::string construct(const UriStruct& us)
return res;
}
enum {
NPATH_START,
NPATH_SLASH,
NPATH_SDOT,
NPATH_DDOT,
NPATH_PATHCOMP
};
std::string normalizePath(std::string path)
{
std::string::iterator begin = path.begin(), out = begin;
int state = NPATH_START;
bool startWithSlash = false;
std::vector<int> range;
// 32 is arbitrary
range.reserve(32);
for(std::string::iterator in = begin, eoi = path.end(); in != eoi; ++in) {
switch(state) {
case NPATH_START:
switch(*in) {
case '.':
state = NPATH_SDOT;
range.push_back(in-begin);
break;
case '/':
startWithSlash = true;
state = NPATH_SLASH;
break;
default:
state = NPATH_PATHCOMP;
range.push_back(in-begin);
break;
}
break;
case NPATH_SLASH:
switch(*in) {
case '.':
state = NPATH_SDOT;
range.push_back(in-begin);
break;
case '/':
// drop duplicate '/'
break;
default:
state = NPATH_PATHCOMP;
range.push_back(in-begin);
break;
}
break;
case NPATH_SDOT:
switch(*in) {
case '.':
state = NPATH_DDOT;
break;
case '/':
// drop path component '.'
state = NPATH_SLASH;
range.pop_back();
break;
default:
state = NPATH_PATHCOMP;
break;
}
break;
case NPATH_DDOT:
switch(*in) {
case '/':
// drop previous path component before '..'
for(int i = 0; i < 3 && !range.empty(); ++i) {
range.pop_back();
}
state = NPATH_SLASH;
break;
default:
state = NPATH_PATHCOMP;
break;
}
break;
case NPATH_PATHCOMP:
if(*in == '/') {
range.push_back(in+1-begin);
state = NPATH_SLASH;
}
break;
}
}
switch(state) {
case NPATH_SDOT:
range.pop_back();
break;
case NPATH_DDOT:
for(int i = 0; i < 3 && !range.empty(); ++i) {
range.pop_back();
}
break;
case NPATH_PATHCOMP:
range.push_back(path.end()-begin);
break;
default:
break;
}
if(startWithSlash) {
++out;
}
for(int i = 0; i < (int)range.size(); i += 2) {
std::string::iterator a = begin+range[i];
std::string::iterator b = begin+range[i+1];
if(a == out) {
out = b;
} else {
out = std::copy(a, b, out);
}
}
path.erase(out, path.end());
return path;
}
namespace {
std::string joinPath(std::string basePath,
std::string::const_iterator newPathFirst,
std::string::const_iterator newPathLast)
{
if(newPathFirst == newPathLast) {
return basePath;
} else if(basePath.empty() || *newPathFirst == '/') {
return normalizePath(std::string(newPathFirst, newPathLast));
} else if(basePath[basePath.size()-1] == '/') {
basePath.append(newPathFirst, newPathLast);
return normalizePath(basePath);
} else {
basePath += "/";
basePath.append(newPathFirst, newPathLast);
return normalizePath(basePath);
}
}
} // namespace
std::string joinPath(const std::string& basePath, const std::string& newPath)
{
return joinPath(basePath, newPath.begin(), newPath.end());
}
std::string joinUri(const std::string& baseUri, const std::string& uri)
{
UriStruct us;
@ -219,11 +361,6 @@ std::string joinUri(const std::string& baseUri, const std::string& uri)
if(!parse(bus, baseUri)) {
return uri;
}
std::vector<std::string> parts;
if(uri.empty() || uri[0] != '/') {
util::split(bus.dir.begin(), bus.dir.end(), std::back_inserter(parts),
'/');
}
std::string::const_iterator qend;
for(qend = uri.begin(); qend != uri.end(); ++qend) {
if(*qend == '#') {
@ -236,14 +373,15 @@ std::string joinUri(const std::string& baseUri, const std::string& uri)
break;
}
}
util::split(uri.begin(), end, std::back_inserter(parts), '/');
std::string newpath = joinPath(bus.dir, uri.begin(), end);
bus.dir.clear();
bus.file.clear();
bus.query.clear();
std::string res = construct(bus);
res += util::joinPath(parts.begin(), parts.end());
if((uri.begin() == end || *(end-1) == '/') && *(res.end()-1) != '/') {
res += "/";
if(!newpath.empty()) {
// res always ends with '/'. Since bus.dir also starts with '/',
// regardless of uri, newpath always starts with '/'.
res.append(newpath.begin()+1, newpath.end());
}
res.append(end, qend);
return res;

View File

@ -82,6 +82,14 @@ std::string construct(const UriStruct& us);
std::string joinUri(const std::string& baseUri, const std::string& uri);
std::string joinPath(const std::string& basePath, const std::string& newPath);
// Normalizes path so that: 1) it does not contain successive / and 2)
// resolve path component '.' and '..'. If there is not enough path
// component to resolve '..', those '..' are discarded. The resulting
// path starts / only if path starts with /.
std::string normalizePath(std::string path);
} // namespace uri
} // namespace aria2

View File

@ -36,6 +36,7 @@ class UriTest:public CppUnit::TestFixture {
CPPUNIT_TEST(testConstruct);
CPPUNIT_TEST(testSwap);
CPPUNIT_TEST(testJoinUri);
CPPUNIT_TEST(testJoinPath);
CPPUNIT_TEST_SUITE_END();
public:
@ -66,6 +67,7 @@ public:
void testConstruct();
void testSwap();
void testJoinUri();
void testJoinPath();
};
@ -527,6 +529,40 @@ void UriTest::testJoinUri()
"/file#a?q=x"));
}
void UriTest::testJoinPath()
{
CPPUNIT_ASSERT_EQUAL(std::string("/b"), joinPath("/a", "/b"));
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo"),
joinPath("/alpha", "bravo"));
CPPUNIT_ASSERT_EQUAL(std::string("/bravo"),
joinPath("/a", "/alpha/../bravo"));
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/charlie/"),
joinPath("/a", "/alpha/bravo/../charlie/"));
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
joinPath("/a", "/alpha////bravo//"));
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
joinPath("/a", "/alpha/././bravo/"));
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
joinPath("/a", "/alpha/bravo/./"));
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
joinPath("/a", "/alpha/bravo/."));
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/"),
joinPath("/a", "/alpha/bravo/.."));
CPPUNIT_ASSERT_EQUAL(std::string("/alpha/"),
joinPath("/", "../alpha/"));
CPPUNIT_ASSERT_EQUAL(std::string("/bravo/"),
joinPath("/alpha", "../bravo/"));
CPPUNIT_ASSERT_EQUAL(std::string("/bravo/"),
joinPath("/alpha", "../../bravo/"));
// If neither paths do not start with '/', the resulting path also
// does not start with '/'.
CPPUNIT_ASSERT_EQUAL(std::string("alpha/bravo"),
joinPath("alpha", "bravo"));
CPPUNIT_ASSERT_EQUAL(std::string("bravo/"),
joinPath("alpha", "../../bravo/"));
}
} // namespace uri
} // namespace aria2