Improve lines sorting memory consumption

Use reference instead of copy for the sorting result.
Also improve lines sorting performance slightly: Sorting a 200 MB text file takes 13.71 seconds instead of 14.63 seconds.

Fix #10435, close #13852
pull/13858/head
Don Ho 2023-07-01 18:10:01 +02:00
parent 02dd1d36fc
commit 9e24ec55db
5 changed files with 29 additions and 31 deletions

View File

@ -746,11 +746,10 @@ generic_string stringReplace(generic_string subject, const generic_string& searc
} }
std::vector<generic_string> stringSplit(const generic_string& input, const generic_string& delimiter) void stringSplit(const generic_string& input, const generic_string& delimiter, std::vector<generic_string>& output)
{ {
size_t start = 0U; size_t start = 0U;
size_t end = input.find(delimiter); size_t end = input.find(delimiter);
std::vector<generic_string> output;
const size_t delimiterLength = delimiter.length(); const size_t delimiterLength = delimiter.length();
while (end != std::string::npos) while (end != std::string::npos)
{ {
@ -759,7 +758,6 @@ std::vector<generic_string> stringSplit(const generic_string& input, const gener
end = input.find(delimiter, start); end = input.find(delimiter, start);
} }
output.push_back(input.substr(start, end)); output.push_back(input.substr(start, end));
return output;
} }
@ -784,7 +782,8 @@ bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect)
} }
} }
std::vector<generic_string> v = stringSplit(str2convert, TEXT(" ")); std::vector<generic_string> v;
stringSplit(str2convert, TEXT(" "), v);
for (const auto& i : v) for (const auto& i : v)
{ {
// Don't treat empty string and the number greater than 9999 // Don't treat empty string and the number greater than 9999
@ -796,19 +795,17 @@ bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect)
return true; return true;
} }
generic_string stringJoin(const std::vector<generic_string>& strings, const generic_string& separator) void stringJoin(const std::vector<generic_string>& strings, const generic_string& separator, generic_string& joinedString)
{ {
generic_string joined;
size_t length = strings.size(); size_t length = strings.size();
for (size_t i = 0; i < length; ++i) for (size_t i = 0; i < length; ++i)
{ {
joined += strings.at(i); joinedString += strings.at(i);
if (i != length - 1) if (i != length - 1)
{ {
joined += separator; joinedString += separator;
} }
} }
return joined;
} }

View File

@ -157,9 +157,9 @@ COLORREF getCtrlBgColor(HWND hWnd);
generic_string stringToUpper(generic_string strToConvert); generic_string stringToUpper(generic_string strToConvert);
generic_string stringToLower(generic_string strToConvert); generic_string stringToLower(generic_string strToConvert);
generic_string stringReplace(generic_string subject, const generic_string& search, const generic_string& replace); generic_string stringReplace(generic_string subject, const generic_string& search, const generic_string& replace);
std::vector<generic_string> stringSplit(const generic_string& input, const generic_string& delimiter); void stringSplit(const generic_string& input, const generic_string& delimiter, std::vector<generic_string>& output);
bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect); bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect);
generic_string stringJoin(const std::vector<generic_string>& strings, const generic_string& separator); void stringJoin(const std::vector<generic_string>& strings, const generic_string& separator, generic_string& joinedString);
generic_string stringTakeWhileAdmissable(const generic_string& input, const generic_string& admissable); generic_string stringTakeWhileAdmissable(const generic_string& input, const generic_string& admissable);
double stodLocale(const generic_string& str, _locale_t loc, size_t* idx = NULL); double stodLocale(const generic_string& str, _locale_t loc, size_t* idx = NULL);

View File

@ -67,7 +67,7 @@ public:
assert(_fromColumn <= _toColumn); assert(_fromColumn <= _toColumn);
}; };
virtual ~ISorter() { }; virtual ~ISorter() { };
virtual std::vector<generic_string> sort(std::vector<generic_string> lines) = 0; virtual void sort(std::vector<generic_string>& lines) = 0;
}; };
// Implementation of lexicographic sorting of lines. // Implementation of lexicographic sorting of lines.
@ -76,7 +76,7 @@ class LexicographicSorter : public ISorter
public: public:
LexicographicSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { }; LexicographicSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
std::vector<generic_string> sort(std::vector<generic_string> lines) override { void sort(std::vector<generic_string>& lines) override {
// Note that both branches here are equivalent in the sense that they always give the same answer. // Note that both branches here are equivalent in the sense that they always give the same answer.
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling // However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
// getSortKey() so many times. // getSortKey() so many times.
@ -109,7 +109,6 @@ public:
} }
}); });
} }
return lines;
}; };
}; };
@ -119,7 +118,7 @@ class LexicographicCaseInsensitiveSorter : public ISorter
public: public:
LexicographicCaseInsensitiveSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { }; LexicographicCaseInsensitiveSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
std::vector<generic_string> sort(std::vector<generic_string> lines) override { void sort(std::vector<generic_string>& lines) override {
// Note that both branches here are equivalent in the sense that they always give the same answer. // Note that both branches here are equivalent in the sense that they always give the same answer.
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling // However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
// getSortKey() so many times. // getSortKey() so many times.
@ -151,7 +150,6 @@ public:
} }
}); });
} }
return lines;
}; };
}; };
@ -160,7 +158,7 @@ class IntegerSorter : public ISorter
public: public:
IntegerSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { }; IntegerSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
std::vector<generic_string> sort(std::vector<generic_string> lines) override { void sort(std::vector<generic_string>& lines) override {
if (isSortingSpecificColumns()) if (isSortingSpecificColumns())
{ {
std::stable_sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn) std::stable_sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
@ -496,8 +494,6 @@ public:
} }
}); });
} }
return lines;
}; };
}; };
@ -523,7 +519,7 @@ public:
#endif #endif
} }
std::vector<generic_string> sort(std::vector<generic_string> lines) override { void sort(std::vector<generic_string>& lines) override {
// Note that empty lines are filtered out and added back manually to the output at the end. // Note that empty lines are filtered out and added back manually to the output at the end.
std::vector<std::pair<size_t, T_Num>> nonEmptyInputAsNumbers; std::vector<std::pair<size_t, T_Num>> nonEmptyInputAsNumbers;
std::vector<generic_string> empties; std::vector<generic_string> empties;
@ -581,7 +577,7 @@ public:
} }
assert(output.size() == lines.size()); assert(output.size() == lines.size());
return output; lines = output;
}; };
protected: protected:
@ -640,9 +636,8 @@ class ReverseSorter : public ISorter
public: public:
ReverseSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { }; ReverseSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
std::vector<generic_string> sort(std::vector<generic_string> lines) override { void sort(std::vector<generic_string>& lines) override {
std::reverse(lines.begin(), lines.end()); std::reverse(lines.begin(), lines.end());
return lines;
}; };
}; };
@ -655,9 +650,8 @@ public:
seed = static_cast<unsigned>(time(NULL)); seed = static_cast<unsigned>(time(NULL));
}; };
std::vector<generic_string> sort(std::vector<generic_string> lines) override { void sort(std::vector<generic_string>& lines) override {
std::shuffle(lines.begin(), lines.end(), std::default_random_engine(seed)); std::shuffle(lines.begin(), lines.end(), std::default_random_engine(seed));
return lines;
}; };
}; };

View File

@ -4821,7 +4821,9 @@ void Finder::copy()
} }
} }
} }
const generic_string toClipboard = stringJoin(lines, TEXT("\r\n")) + TEXT("\r\n"); generic_string toClipboard;
stringJoin(lines, TEXT("\r\n"), toClipboard);
toClipboard += TEXT("\r\n");
if (!toClipboard.empty()) if (!toClipboard.empty())
{ {
if (!str2Clipboard(toClipboard, _hSelf)) if (!str2Clipboard(toClipboard, _hSelf))

View File

@ -3943,7 +3943,8 @@ void ScintillaEditView::sortLines(size_t fromLine, size_t toLine, ISorter* pSort
const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine); const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine);
const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine); const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine);
const generic_string text = getGenericTextAsString(startPos, endPos); const generic_string text = getGenericTextAsString(startPos, endPos);
std::vector<generic_string> splitText = stringSplit(text, getEOLString()); std::vector<generic_string> splitText;
stringSplit(text, getEOLString(), splitText);
const size_t lineCount = execute(SCI_GETLINECOUNT); const size_t lineCount = execute(SCI_GETLINECOUNT);
const bool sortEntireDocument = toLine == lineCount - 1; const bool sortEntireDocument = toLine == lineCount - 1;
if (!sortEntireDocument) if (!sortEntireDocument)
@ -3954,8 +3955,10 @@ void ScintillaEditView::sortLines(size_t fromLine, size_t toLine, ISorter* pSort
} }
} }
assert(toLine - fromLine + 1 == splitText.size()); assert(toLine - fromLine + 1 == splitText.size());
const std::vector<generic_string> sortedText = pSort->sort(splitText); pSort->sort(splitText);
generic_string joined = stringJoin(sortedText, getEOLString()); generic_string joined;
stringJoin(splitText, getEOLString(), joined);
if (sortEntireDocument) if (sortEntireDocument)
{ {
assert(joined.length() == text.length()); assert(joined.length() == text.length());
@ -4272,7 +4275,8 @@ void ScintillaEditView::removeAnyDuplicateLines()
const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine); const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine);
const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine); const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine);
const generic_string text = getGenericTextAsString(startPos, endPos); const generic_string text = getGenericTextAsString(startPos, endPos);
std::vector<generic_string> linesVect = stringSplit(text, getEOLString()); std::vector<generic_string> linesVect;
stringSplit(text, getEOLString(), linesVect);
const size_t lineCount = execute(SCI_GETLINECOUNT); const size_t lineCount = execute(SCI_GETLINECOUNT);
const bool doingEntireDocument = toLine == lineCount - 1; const bool doingEntireDocument = toLine == lineCount - 1;
@ -4288,7 +4292,8 @@ void ScintillaEditView::removeAnyDuplicateLines()
size_t newSize = vecRemoveDuplicates(linesVect); size_t newSize = vecRemoveDuplicates(linesVect);
if (origSize != newSize) if (origSize != newSize)
{ {
generic_string joined = stringJoin(linesVect, getEOLString()); generic_string joined;
stringJoin(linesVect, getEOLString(), joined);
if (!doingEntireDocument) if (!doingEntireDocument)
{ {
joined += getEOLString(); joined += getEOLString();