mirror of https://github.com/aria2/aria2
SessionSerializer: Truly unique URIs
Before, only spent uris where sanitized not to be contained within remaining uris. Change this so that each uri in the union(remaining,spent) get saved once at most. The order of the uris will won't be changed, with remaining uris going first followed by spent uris. Also avoid copying the uri std::strings around during dupe checking, usually resulting in better performance regarding CPU and space.pull/97/head
parent
ca7c63aa7e
commit
3b32dcb9f1
|
@ -137,18 +137,35 @@ bool writeOption(IOFile& fp, const SharedHandle<Option>& op)
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
template<typename T>
|
||||||
|
class Unique {
|
||||||
|
typedef T type;
|
||||||
|
struct PointerCmp {
|
||||||
|
inline bool operator()(const type* x, const type* y) {
|
||||||
|
return *x < *y;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
std::set<const type*, PointerCmp> known;
|
||||||
|
public:
|
||||||
|
inline bool operator()(const type& v) {
|
||||||
|
return known.insert(&v).second;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
bool writeUri(IOFile& fp, const std::string& uri)
|
bool writeUri(IOFile& fp, const std::string& uri)
|
||||||
{
|
{
|
||||||
return fp.write(uri.c_str(), uri.size()) == uri.size() &&
|
return fp.write(uri.c_str(), uri.size()) == uri.size() &&
|
||||||
fp.write("\t", 1) == 1;
|
fp.write("\t", 1) == 1;
|
||||||
}
|
}
|
||||||
} // namespace
|
|
||||||
|
|
||||||
namespace {
|
template<typename InputIterator, class UnaryPredicate>
|
||||||
template<typename InputIterator>
|
bool writeUri(IOFile& fp, InputIterator first, InputIterator last,
|
||||||
bool writeUri(IOFile& fp, InputIterator first, InputIterator last)
|
UnaryPredicate& filter)
|
||||||
{
|
{
|
||||||
for(; first != last; ++first) {
|
for(; first != last; ++first) {
|
||||||
|
if (!filter(*first)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if(!writeUri(fp, *first)) {
|
if(!writeUri(fp, *first)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -196,29 +213,27 @@ bool writeDownloadResult
|
||||||
}
|
}
|
||||||
const SharedHandle<FileEntry>& file = dr->fileEntries[0];
|
const SharedHandle<FileEntry>& file = dr->fileEntries[0];
|
||||||
// Don't save download if there are no URIs.
|
// Don't save download if there are no URIs.
|
||||||
if(file->getRemainingUris().empty() &&
|
const bool hasRemaining = !file->getRemainingUris().empty();
|
||||||
file->getSpentUris().empty()) {
|
const bool hasSpent = !file->getSpentUris().empty();
|
||||||
|
if (!hasRemaining && !hasSpent) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save spent URIs + remaining URIs. Remove URI in spent URI which
|
// Save spent URIs + remaining URIs. Remove URI in spent URI which
|
||||||
// also exists in remaining URIs.
|
// also exists in remaining URIs.
|
||||||
std::set<std::string> uriSet(file->getRemainingUris().begin(),
|
{
|
||||||
file->getRemainingUris().end());
|
Unique<std::string> unique;
|
||||||
for(std::deque<std::string>::const_iterator i =
|
if (hasRemaining && !writeUri(fp, file->getRemainingUris().begin(),
|
||||||
file->getSpentUris().begin(), eoi = file->getSpentUris().end();
|
file->getRemainingUris().end(),
|
||||||
i != eoi; ++i) {
|
unique)) {
|
||||||
if(uriSet.count(*i)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
uriSet.insert(*i);
|
|
||||||
if(!writeUri(fp, *i)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
if (hasSpent && !writeUri(fp, file->getSpentUris().begin(),
|
||||||
if(!writeUri(fp, file->getRemainingUris().begin(),
|
file->getSpentUris().end(),
|
||||||
file->getRemainingUris().end())) {
|
unique)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if(fp.write("\n", 1) != 1) {
|
if(fp.write("\n", 1) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,8 +61,19 @@ void SessionSerializerTest::testSave()
|
||||||
};
|
};
|
||||||
// This URI will be discarded because same URI exists in remaining
|
// This URI will be discarded because same URI exists in remaining
|
||||||
// URIs.
|
// URIs.
|
||||||
|
drs[1]->fileEntries[0]->getRemainingUris().push_back("http://error");
|
||||||
|
drs[1]->fileEntries[0]->getRemainingUris().push_back("http://error3");
|
||||||
|
// This URI will be discarded because same URI exists in remaining
|
||||||
|
// URIs.
|
||||||
|
drs[1]->fileEntries[0]->getRemainingUris().push_back("http://error");
|
||||||
|
//
|
||||||
|
// This URI will be discarded because same URI exists in remaining
|
||||||
|
// URIs.
|
||||||
drs[1]->fileEntries[0]->getSpentUris().push_back("http://error");
|
drs[1]->fileEntries[0]->getSpentUris().push_back("http://error");
|
||||||
drs[1]->fileEntries[0]->getSpentUris().push_back("http://error2");
|
drs[1]->fileEntries[0]->getSpentUris().push_back("http://error2");
|
||||||
|
// This URI will be discarded because same URI exists in remaining
|
||||||
|
// URIs.
|
||||||
|
drs[1]->fileEntries[0]->getSpentUris().push_back("http://error");
|
||||||
|
|
||||||
drs[3]->option->put(PREF_FORCE_SAVE, A2_V_TRUE);
|
drs[3]->option->put(PREF_FORCE_SAVE, A2_V_TRUE);
|
||||||
for(size_t i = 0; i < sizeof(drs)/sizeof(drs[0]); ++i) {
|
for(size_t i = 0; i < sizeof(drs)/sizeof(drs[0]); ++i) {
|
||||||
|
@ -79,7 +90,7 @@ void SessionSerializerTest::testSave()
|
||||||
std::ifstream ss(filename.c_str(), std::ios::binary);
|
std::ifstream ss(filename.c_str(), std::ios::binary);
|
||||||
std::string line;
|
std::string line;
|
||||||
std::getline(ss, line);
|
std::getline(ss, line);
|
||||||
CPPUNIT_ASSERT_EQUAL(std::string("http://error2\thttp://error\t"), line);
|
CPPUNIT_ASSERT_EQUAL(std::string("http://error\thttp://error3\thttp://error2\t"), line);
|
||||||
std::getline(ss, line);
|
std::getline(ss, line);
|
||||||
CPPUNIT_ASSERT_EQUAL(fmt(" gid=%s", drs[1]->gid->toHex().c_str()), line);
|
CPPUNIT_ASSERT_EQUAL(fmt(" gid=%s", drs[1]->gid->toHex().c_str()), line);
|
||||||
std::getline(ss, line);
|
std::getline(ss, line);
|
||||||
|
|
Loading…
Reference in New Issue