From 126a4bde612eb0e6c48cf2f13ebe3eac45cc71a8 Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Tue, 18 Jun 2013 21:27:01 +0900 Subject: [PATCH] Rewrite URI path component normalization --- src/uri.cc | 156 +++++++++++++++++++++++++++++++++++++++++++++--- src/uri.h | 8 +++ test/UriTest.cc | 36 +++++++++++ 3 files changed, 191 insertions(+), 9 deletions(-) diff --git a/src/uri.cc b/src/uri.cc index b922b17f..75d83b92 100644 --- a/src/uri.cc +++ b/src/uri.cc @@ -209,6 +209,148 @@ std::string construct(const UriStruct& us) return res; } +enum { + NPATH_START, + NPATH_SLASH, + NPATH_SDOT, + NPATH_DDOT, + NPATH_PATHCOMP +}; + +std::string normalizePath(std::string path) +{ + std::string::iterator begin = path.begin(), out = begin; + int state = NPATH_START; + bool startWithSlash = false; + std::vector range; + // 32 is arbitrary + range.reserve(32); + for(std::string::iterator in = begin, eoi = path.end(); in != eoi; ++in) { + switch(state) { + case NPATH_START: + switch(*in) { + case '.': + state = NPATH_SDOT; + range.push_back(in-begin); + break; + case '/': + startWithSlash = true; + state = NPATH_SLASH; + break; + default: + state = NPATH_PATHCOMP; + range.push_back(in-begin); + break; + } + break; + case NPATH_SLASH: + switch(*in) { + case '.': + state = NPATH_SDOT; + range.push_back(in-begin); + break; + case '/': + // drop duplicate '/' + break; + default: + state = NPATH_PATHCOMP; + range.push_back(in-begin); + break; + } + break; + case NPATH_SDOT: + switch(*in) { + case '.': + state = NPATH_DDOT; + break; + case '/': + // drop path component '.' + state = NPATH_SLASH; + range.pop_back(); + break; + default: + state = NPATH_PATHCOMP; + break; + } + break; + case NPATH_DDOT: + switch(*in) { + case '/': + // drop previous path component before '..' + for(int i = 0; i < 3 && !range.empty(); ++i) { + range.pop_back(); + } + state = NPATH_SLASH; + break; + default: + state = NPATH_PATHCOMP; + break; + } + break; + case NPATH_PATHCOMP: + if(*in == '/') { + range.push_back(in+1-begin); + state = NPATH_SLASH; + } + break; + } + } + switch(state) { + case NPATH_SDOT: + range.pop_back(); + break; + case NPATH_DDOT: + for(int i = 0; i < 3 && !range.empty(); ++i) { + range.pop_back(); + } + break; + case NPATH_PATHCOMP: + range.push_back(path.end()-begin); + break; + default: + break; + } + if(startWithSlash) { + ++out; + } + for(int i = 0; i < (int)range.size(); i += 2) { + std::string::iterator a = begin+range[i]; + std::string::iterator b = begin+range[i+1]; + if(a == out) { + out = b; + } else { + out = std::copy(a, b, out); + } + } + path.erase(out, path.end()); + return path; +} + +namespace { +std::string joinPath(std::string basePath, + std::string::const_iterator newPathFirst, + std::string::const_iterator newPathLast) +{ + if(newPathFirst == newPathLast) { + return basePath; + } else if(basePath.empty() || *newPathFirst == '/') { + return normalizePath(std::string(newPathFirst, newPathLast)); + } else if(basePath[basePath.size()-1] == '/') { + basePath.append(newPathFirst, newPathLast); + return normalizePath(basePath); + } else { + basePath += "/"; + basePath.append(newPathFirst, newPathLast); + return normalizePath(basePath); + } +} +} // namespace + +std::string joinPath(const std::string& basePath, const std::string& newPath) +{ + return joinPath(basePath, newPath.begin(), newPath.end()); +} + std::string joinUri(const std::string& baseUri, const std::string& uri) { UriStruct us; @@ -219,11 +361,6 @@ std::string joinUri(const std::string& baseUri, const std::string& uri) if(!parse(bus, baseUri)) { return uri; } - std::vector parts; - if(uri.empty() || uri[0] != '/') { - util::split(bus.dir.begin(), bus.dir.end(), std::back_inserter(parts), - '/'); - } std::string::const_iterator qend; for(qend = uri.begin(); qend != uri.end(); ++qend) { if(*qend == '#') { @@ -236,14 +373,15 @@ std::string joinUri(const std::string& baseUri, const std::string& uri) break; } } - util::split(uri.begin(), end, std::back_inserter(parts), '/'); + std::string newpath = joinPath(bus.dir, uri.begin(), end); bus.dir.clear(); bus.file.clear(); bus.query.clear(); std::string res = construct(bus); - res += util::joinPath(parts.begin(), parts.end()); - if((uri.begin() == end || *(end-1) == '/') && *(res.end()-1) != '/') { - res += "/"; + if(!newpath.empty()) { + // res always ends with '/'. Since bus.dir also starts with '/', + // regardless of uri, newpath always starts with '/'. + res.append(newpath.begin()+1, newpath.end()); } res.append(end, qend); return res; diff --git a/src/uri.h b/src/uri.h index 706359f4..b4ba85d9 100644 --- a/src/uri.h +++ b/src/uri.h @@ -82,6 +82,14 @@ std::string construct(const UriStruct& us); std::string joinUri(const std::string& baseUri, const std::string& uri); +std::string joinPath(const std::string& basePath, const std::string& newPath); + +// Normalizes path so that: 1) it does not contain successive / and 2) +// resolve path component '.' and '..'. If there is not enough path +// component to resolve '..', those '..' are discarded. The resulting +// path starts / only if path starts with /. +std::string normalizePath(std::string path); + } // namespace uri } // namespace aria2 diff --git a/test/UriTest.cc b/test/UriTest.cc index 9c49f490..c5186d85 100644 --- a/test/UriTest.cc +++ b/test/UriTest.cc @@ -36,6 +36,7 @@ class UriTest:public CppUnit::TestFixture { CPPUNIT_TEST(testConstruct); CPPUNIT_TEST(testSwap); CPPUNIT_TEST(testJoinUri); + CPPUNIT_TEST(testJoinPath); CPPUNIT_TEST_SUITE_END(); public: @@ -66,6 +67,7 @@ public: void testConstruct(); void testSwap(); void testJoinUri(); + void testJoinPath(); }; @@ -527,6 +529,40 @@ void UriTest::testJoinUri() "/file#a?q=x")); } +void UriTest::testJoinPath() +{ + CPPUNIT_ASSERT_EQUAL(std::string("/b"), joinPath("/a", "/b")); + CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo"), + joinPath("/alpha", "bravo")); + CPPUNIT_ASSERT_EQUAL(std::string("/bravo"), + joinPath("/a", "/alpha/../bravo")); + CPPUNIT_ASSERT_EQUAL(std::string("/alpha/charlie/"), + joinPath("/a", "/alpha/bravo/../charlie/")); + CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"), + joinPath("/a", "/alpha////bravo//")); + CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"), + joinPath("/a", "/alpha/././bravo/")); + CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"), + joinPath("/a", "/alpha/bravo/./")); + CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"), + joinPath("/a", "/alpha/bravo/.")); + CPPUNIT_ASSERT_EQUAL(std::string("/alpha/"), + joinPath("/a", "/alpha/bravo/..")); + CPPUNIT_ASSERT_EQUAL(std::string("/alpha/"), + joinPath("/", "../alpha/")); + CPPUNIT_ASSERT_EQUAL(std::string("/bravo/"), + joinPath("/alpha", "../bravo/")); + CPPUNIT_ASSERT_EQUAL(std::string("/bravo/"), + joinPath("/alpha", "../../bravo/")); + // If neither paths do not start with '/', the resulting path also + // does not start with '/'. + CPPUNIT_ASSERT_EQUAL(std::string("alpha/bravo"), + joinPath("alpha", "bravo")); + CPPUNIT_ASSERT_EQUAL(std::string("bravo/"), + joinPath("alpha", "../../bravo/")); + +} + } // namespace uri } // namespace aria2