From 9e24ec55db566f3feafdb4c73cf65d46095b49a2 Mon Sep 17 00:00:00 2001 From: Don Ho Date: Sat, 1 Jul 2023 18:10:01 +0200 Subject: [PATCH] Improve lines sorting memory consumption Use reference instead of copy for the sorting result. Also improve lines sorting performance slightly: Sorting a 200 MB text file takes 13.71 seconds instead of 14.63 seconds. Fix #10435, close #13852 --- PowerEditor/src/MISC/Common/Common.cpp | 15 +++++-------- PowerEditor/src/MISC/Common/Common.h | 4 ++-- PowerEditor/src/MISC/Common/Sorters.h | 22 +++++++------------ .../src/ScintillaComponent/FindReplaceDlg.cpp | 4 +++- .../ScintillaComponent/ScintillaEditView.cpp | 15 ++++++++----- 5 files changed, 29 insertions(+), 31 deletions(-) diff --git a/PowerEditor/src/MISC/Common/Common.cpp b/PowerEditor/src/MISC/Common/Common.cpp index bc10fdc02..633afbeec 100644 --- a/PowerEditor/src/MISC/Common/Common.cpp +++ b/PowerEditor/src/MISC/Common/Common.cpp @@ -746,11 +746,10 @@ generic_string stringReplace(generic_string subject, const generic_string& searc } -std::vector stringSplit(const generic_string& input, const generic_string& delimiter) +void stringSplit(const generic_string& input, const generic_string& delimiter, std::vector& output) { size_t start = 0U; size_t end = input.find(delimiter); - std::vector output; const size_t delimiterLength = delimiter.length(); while (end != std::string::npos) { @@ -759,7 +758,6 @@ std::vector stringSplit(const generic_string& input, const gener end = input.find(delimiter, start); } output.push_back(input.substr(start, end)); - return output; } @@ -784,7 +782,8 @@ bool str2numberVector(generic_string str2convert, std::vector& numVect) } } - std::vector v = stringSplit(str2convert, TEXT(" ")); + std::vector v; + stringSplit(str2convert, TEXT(" "), v); for (const auto& i : v) { // Don't treat empty string and the number greater than 9999 @@ -796,19 +795,17 @@ bool str2numberVector(generic_string str2convert, std::vector& numVect) return true; } -generic_string stringJoin(const std::vector& strings, const generic_string& separator) +void stringJoin(const std::vector& strings, const generic_string& separator, generic_string& joinedString) { - generic_string joined; size_t length = strings.size(); for (size_t i = 0; i < length; ++i) { - joined += strings.at(i); + joinedString += strings.at(i); if (i != length - 1) { - joined += separator; + joinedString += separator; } } - return joined; } diff --git a/PowerEditor/src/MISC/Common/Common.h b/PowerEditor/src/MISC/Common/Common.h index 0a60bc062..12c9201e8 100644 --- a/PowerEditor/src/MISC/Common/Common.h +++ b/PowerEditor/src/MISC/Common/Common.h @@ -157,9 +157,9 @@ COLORREF getCtrlBgColor(HWND hWnd); generic_string stringToUpper(generic_string strToConvert); generic_string stringToLower(generic_string strToConvert); generic_string stringReplace(generic_string subject, const generic_string& search, const generic_string& replace); -std::vector stringSplit(const generic_string& input, const generic_string& delimiter); +void stringSplit(const generic_string& input, const generic_string& delimiter, std::vector& output); bool str2numberVector(generic_string str2convert, std::vector& numVect); -generic_string stringJoin(const std::vector& strings, const generic_string& separator); +void stringJoin(const std::vector& strings, const generic_string& separator, generic_string& joinedString); generic_string stringTakeWhileAdmissable(const generic_string& input, const generic_string& admissable); double stodLocale(const generic_string& str, _locale_t loc, size_t* idx = NULL); diff --git a/PowerEditor/src/MISC/Common/Sorters.h b/PowerEditor/src/MISC/Common/Sorters.h index 785422b5f..c8b5acb5e 100644 --- a/PowerEditor/src/MISC/Common/Sorters.h +++ b/PowerEditor/src/MISC/Common/Sorters.h @@ -67,7 +67,7 @@ public: assert(_fromColumn <= _toColumn); }; virtual ~ISorter() { }; - virtual std::vector sort(std::vector lines) = 0; + virtual void sort(std::vector& lines) = 0; }; // Implementation of lexicographic sorting of lines. @@ -76,7 +76,7 @@ class LexicographicSorter : public ISorter public: LexicographicSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { }; - std::vector sort(std::vector lines) override { + void sort(std::vector& lines) override { // Note that both branches here are equivalent in the sense that they always give the same answer. // However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling // getSortKey() so many times. @@ -109,7 +109,6 @@ public: } }); } - return lines; }; }; @@ -119,7 +118,7 @@ class LexicographicCaseInsensitiveSorter : public ISorter public: LexicographicCaseInsensitiveSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { }; - std::vector sort(std::vector lines) override { + void sort(std::vector& lines) override { // Note that both branches here are equivalent in the sense that they always give the same answer. // However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling // getSortKey() so many times. @@ -151,7 +150,6 @@ public: } }); } - return lines; }; }; @@ -160,7 +158,7 @@ class IntegerSorter : public ISorter public: IntegerSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { }; - std::vector sort(std::vector lines) override { + void sort(std::vector& lines) override { if (isSortingSpecificColumns()) { std::stable_sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn) @@ -496,8 +494,6 @@ public: } }); } - - return lines; }; }; @@ -523,7 +519,7 @@ public: #endif } - std::vector sort(std::vector lines) override { + void sort(std::vector& lines) override { // Note that empty lines are filtered out and added back manually to the output at the end. std::vector> nonEmptyInputAsNumbers; std::vector empties; @@ -581,7 +577,7 @@ public: } assert(output.size() == lines.size()); - return output; + lines = output; }; protected: @@ -640,9 +636,8 @@ class ReverseSorter : public ISorter public: ReverseSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { }; - std::vector sort(std::vector lines) override { + void sort(std::vector& lines) override { std::reverse(lines.begin(), lines.end()); - return lines; }; }; @@ -655,9 +650,8 @@ public: seed = static_cast(time(NULL)); }; - std::vector sort(std::vector lines) override { + void sort(std::vector& lines) override { std::shuffle(lines.begin(), lines.end(), std::default_random_engine(seed)); - return lines; }; }; diff --git a/PowerEditor/src/ScintillaComponent/FindReplaceDlg.cpp b/PowerEditor/src/ScintillaComponent/FindReplaceDlg.cpp index 57c8ebdd3..35aac3a43 100644 --- a/PowerEditor/src/ScintillaComponent/FindReplaceDlg.cpp +++ b/PowerEditor/src/ScintillaComponent/FindReplaceDlg.cpp @@ -4821,7 +4821,9 @@ void Finder::copy() } } } - const generic_string toClipboard = stringJoin(lines, TEXT("\r\n")) + TEXT("\r\n"); + generic_string toClipboard; + stringJoin(lines, TEXT("\r\n"), toClipboard); + toClipboard += TEXT("\r\n"); if (!toClipboard.empty()) { if (!str2Clipboard(toClipboard, _hSelf)) diff --git a/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp b/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp index ee1ab652e..c0073a559 100644 --- a/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp +++ b/PowerEditor/src/ScintillaComponent/ScintillaEditView.cpp @@ -3943,7 +3943,8 @@ void ScintillaEditView::sortLines(size_t fromLine, size_t toLine, ISorter* pSort const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine); const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine); const generic_string text = getGenericTextAsString(startPos, endPos); - std::vector splitText = stringSplit(text, getEOLString()); + std::vector splitText; + stringSplit(text, getEOLString(), splitText); const size_t lineCount = execute(SCI_GETLINECOUNT); const bool sortEntireDocument = toLine == lineCount - 1; if (!sortEntireDocument) @@ -3954,8 +3955,10 @@ void ScintillaEditView::sortLines(size_t fromLine, size_t toLine, ISorter* pSort } } assert(toLine - fromLine + 1 == splitText.size()); - const std::vector sortedText = pSort->sort(splitText); - generic_string joined = stringJoin(sortedText, getEOLString()); + pSort->sort(splitText); + generic_string joined; + stringJoin(splitText, getEOLString(), joined); + if (sortEntireDocument) { assert(joined.length() == text.length()); @@ -4272,7 +4275,8 @@ void ScintillaEditView::removeAnyDuplicateLines() const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine); const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine); const generic_string text = getGenericTextAsString(startPos, endPos); - std::vector linesVect = stringSplit(text, getEOLString()); + std::vector linesVect; + stringSplit(text, getEOLString(), linesVect); const size_t lineCount = execute(SCI_GETLINECOUNT); const bool doingEntireDocument = toLine == lineCount - 1; @@ -4288,7 +4292,8 @@ void ScintillaEditView::removeAnyDuplicateLines() size_t newSize = vecRemoveDuplicates(linesVect); if (origSize != newSize) { - generic_string joined = stringJoin(linesVect, getEOLString()); + generic_string joined; + stringJoin(linesVect, getEOLString(), joined); if (!doingEntireDocument) { joined += getEOLString();