Improve lines sorting memory consumption
Use reference instead of copy for the sorting result. Also improve lines sorting performance slightly: Sorting a 200 MB text file takes 13.71 seconds instead of 14.63 seconds. Fix #10435, close #13852pull/13858/head
parent
02dd1d36fc
commit
9e24ec55db
|
@ -746,11 +746,10 @@ generic_string stringReplace(generic_string subject, const generic_string& searc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<generic_string> stringSplit(const generic_string& input, const generic_string& delimiter)
|
void stringSplit(const generic_string& input, const generic_string& delimiter, std::vector<generic_string>& output)
|
||||||
{
|
{
|
||||||
size_t start = 0U;
|
size_t start = 0U;
|
||||||
size_t end = input.find(delimiter);
|
size_t end = input.find(delimiter);
|
||||||
std::vector<generic_string> output;
|
|
||||||
const size_t delimiterLength = delimiter.length();
|
const size_t delimiterLength = delimiter.length();
|
||||||
while (end != std::string::npos)
|
while (end != std::string::npos)
|
||||||
{
|
{
|
||||||
|
@ -759,7 +758,6 @@ std::vector<generic_string> stringSplit(const generic_string& input, const gener
|
||||||
end = input.find(delimiter, start);
|
end = input.find(delimiter, start);
|
||||||
}
|
}
|
||||||
output.push_back(input.substr(start, end));
|
output.push_back(input.substr(start, end));
|
||||||
return output;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -784,7 +782,8 @@ bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<generic_string> v = stringSplit(str2convert, TEXT(" "));
|
std::vector<generic_string> v;
|
||||||
|
stringSplit(str2convert, TEXT(" "), v);
|
||||||
for (const auto& i : v)
|
for (const auto& i : v)
|
||||||
{
|
{
|
||||||
// Don't treat empty string and the number greater than 9999
|
// Don't treat empty string and the number greater than 9999
|
||||||
|
@ -796,19 +795,17 @@ bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
generic_string stringJoin(const std::vector<generic_string>& strings, const generic_string& separator)
|
void stringJoin(const std::vector<generic_string>& strings, const generic_string& separator, generic_string& joinedString)
|
||||||
{
|
{
|
||||||
generic_string joined;
|
|
||||||
size_t length = strings.size();
|
size_t length = strings.size();
|
||||||
for (size_t i = 0; i < length; ++i)
|
for (size_t i = 0; i < length; ++i)
|
||||||
{
|
{
|
||||||
joined += strings.at(i);
|
joinedString += strings.at(i);
|
||||||
if (i != length - 1)
|
if (i != length - 1)
|
||||||
{
|
{
|
||||||
joined += separator;
|
joinedString += separator;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return joined;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -157,9 +157,9 @@ COLORREF getCtrlBgColor(HWND hWnd);
|
||||||
generic_string stringToUpper(generic_string strToConvert);
|
generic_string stringToUpper(generic_string strToConvert);
|
||||||
generic_string stringToLower(generic_string strToConvert);
|
generic_string stringToLower(generic_string strToConvert);
|
||||||
generic_string stringReplace(generic_string subject, const generic_string& search, const generic_string& replace);
|
generic_string stringReplace(generic_string subject, const generic_string& search, const generic_string& replace);
|
||||||
std::vector<generic_string> stringSplit(const generic_string& input, const generic_string& delimiter);
|
void stringSplit(const generic_string& input, const generic_string& delimiter, std::vector<generic_string>& output);
|
||||||
bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect);
|
bool str2numberVector(generic_string str2convert, std::vector<size_t>& numVect);
|
||||||
generic_string stringJoin(const std::vector<generic_string>& strings, const generic_string& separator);
|
void stringJoin(const std::vector<generic_string>& strings, const generic_string& separator, generic_string& joinedString);
|
||||||
generic_string stringTakeWhileAdmissable(const generic_string& input, const generic_string& admissable);
|
generic_string stringTakeWhileAdmissable(const generic_string& input, const generic_string& admissable);
|
||||||
double stodLocale(const generic_string& str, _locale_t loc, size_t* idx = NULL);
|
double stodLocale(const generic_string& str, _locale_t loc, size_t* idx = NULL);
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,7 @@ public:
|
||||||
assert(_fromColumn <= _toColumn);
|
assert(_fromColumn <= _toColumn);
|
||||||
};
|
};
|
||||||
virtual ~ISorter() { };
|
virtual ~ISorter() { };
|
||||||
virtual std::vector<generic_string> sort(std::vector<generic_string> lines) = 0;
|
virtual void sort(std::vector<generic_string>& lines) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Implementation of lexicographic sorting of lines.
|
// Implementation of lexicographic sorting of lines.
|
||||||
|
@ -76,7 +76,7 @@ class LexicographicSorter : public ISorter
|
||||||
public:
|
public:
|
||||||
LexicographicSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
LexicographicSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
||||||
|
|
||||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
void sort(std::vector<generic_string>& lines) override {
|
||||||
// Note that both branches here are equivalent in the sense that they always give the same answer.
|
// Note that both branches here are equivalent in the sense that they always give the same answer.
|
||||||
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
|
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
|
||||||
// getSortKey() so many times.
|
// getSortKey() so many times.
|
||||||
|
@ -109,7 +109,6 @@ public:
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return lines;
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -119,7 +118,7 @@ class LexicographicCaseInsensitiveSorter : public ISorter
|
||||||
public:
|
public:
|
||||||
LexicographicCaseInsensitiveSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
LexicographicCaseInsensitiveSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
||||||
|
|
||||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
void sort(std::vector<generic_string>& lines) override {
|
||||||
// Note that both branches here are equivalent in the sense that they always give the same answer.
|
// Note that both branches here are equivalent in the sense that they always give the same answer.
|
||||||
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
|
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
|
||||||
// getSortKey() so many times.
|
// getSortKey() so many times.
|
||||||
|
@ -151,7 +150,6 @@ public:
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return lines;
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -160,7 +158,7 @@ class IntegerSorter : public ISorter
|
||||||
public:
|
public:
|
||||||
IntegerSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
IntegerSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
||||||
|
|
||||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
void sort(std::vector<generic_string>& lines) override {
|
||||||
if (isSortingSpecificColumns())
|
if (isSortingSpecificColumns())
|
||||||
{
|
{
|
||||||
std::stable_sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
|
std::stable_sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
|
||||||
|
@ -496,8 +494,6 @@ public:
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return lines;
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -523,7 +519,7 @@ public:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
void sort(std::vector<generic_string>& lines) override {
|
||||||
// Note that empty lines are filtered out and added back manually to the output at the end.
|
// Note that empty lines are filtered out and added back manually to the output at the end.
|
||||||
std::vector<std::pair<size_t, T_Num>> nonEmptyInputAsNumbers;
|
std::vector<std::pair<size_t, T_Num>> nonEmptyInputAsNumbers;
|
||||||
std::vector<generic_string> empties;
|
std::vector<generic_string> empties;
|
||||||
|
@ -581,7 +577,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(output.size() == lines.size());
|
assert(output.size() == lines.size());
|
||||||
return output;
|
lines = output;
|
||||||
};
|
};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -640,9 +636,8 @@ class ReverseSorter : public ISorter
|
||||||
public:
|
public:
|
||||||
ReverseSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
ReverseSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
||||||
|
|
||||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
void sort(std::vector<generic_string>& lines) override {
|
||||||
std::reverse(lines.begin(), lines.end());
|
std::reverse(lines.begin(), lines.end());
|
||||||
return lines;
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -655,9 +650,8 @@ public:
|
||||||
seed = static_cast<unsigned>(time(NULL));
|
seed = static_cast<unsigned>(time(NULL));
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override {
|
void sort(std::vector<generic_string>& lines) override {
|
||||||
std::shuffle(lines.begin(), lines.end(), std::default_random_engine(seed));
|
std::shuffle(lines.begin(), lines.end(), std::default_random_engine(seed));
|
||||||
return lines;
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -4821,7 +4821,9 @@ void Finder::copy()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const generic_string toClipboard = stringJoin(lines, TEXT("\r\n")) + TEXT("\r\n");
|
generic_string toClipboard;
|
||||||
|
stringJoin(lines, TEXT("\r\n"), toClipboard);
|
||||||
|
toClipboard += TEXT("\r\n");
|
||||||
if (!toClipboard.empty())
|
if (!toClipboard.empty())
|
||||||
{
|
{
|
||||||
if (!str2Clipboard(toClipboard, _hSelf))
|
if (!str2Clipboard(toClipboard, _hSelf))
|
||||||
|
|
|
@ -3943,7 +3943,8 @@ void ScintillaEditView::sortLines(size_t fromLine, size_t toLine, ISorter* pSort
|
||||||
const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine);
|
const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine);
|
||||||
const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine);
|
const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine);
|
||||||
const generic_string text = getGenericTextAsString(startPos, endPos);
|
const generic_string text = getGenericTextAsString(startPos, endPos);
|
||||||
std::vector<generic_string> splitText = stringSplit(text, getEOLString());
|
std::vector<generic_string> splitText;
|
||||||
|
stringSplit(text, getEOLString(), splitText);
|
||||||
const size_t lineCount = execute(SCI_GETLINECOUNT);
|
const size_t lineCount = execute(SCI_GETLINECOUNT);
|
||||||
const bool sortEntireDocument = toLine == lineCount - 1;
|
const bool sortEntireDocument = toLine == lineCount - 1;
|
||||||
if (!sortEntireDocument)
|
if (!sortEntireDocument)
|
||||||
|
@ -3954,8 +3955,10 @@ void ScintillaEditView::sortLines(size_t fromLine, size_t toLine, ISorter* pSort
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(toLine - fromLine + 1 == splitText.size());
|
assert(toLine - fromLine + 1 == splitText.size());
|
||||||
const std::vector<generic_string> sortedText = pSort->sort(splitText);
|
pSort->sort(splitText);
|
||||||
generic_string joined = stringJoin(sortedText, getEOLString());
|
generic_string joined;
|
||||||
|
stringJoin(splitText, getEOLString(), joined);
|
||||||
|
|
||||||
if (sortEntireDocument)
|
if (sortEntireDocument)
|
||||||
{
|
{
|
||||||
assert(joined.length() == text.length());
|
assert(joined.length() == text.length());
|
||||||
|
@ -4272,7 +4275,8 @@ void ScintillaEditView::removeAnyDuplicateLines()
|
||||||
const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine);
|
const auto startPos = execute(SCI_POSITIONFROMLINE, fromLine);
|
||||||
const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine);
|
const auto endPos = execute(SCI_POSITIONFROMLINE, toLine) + execute(SCI_LINELENGTH, toLine);
|
||||||
const generic_string text = getGenericTextAsString(startPos, endPos);
|
const generic_string text = getGenericTextAsString(startPos, endPos);
|
||||||
std::vector<generic_string> linesVect = stringSplit(text, getEOLString());
|
std::vector<generic_string> linesVect;
|
||||||
|
stringSplit(text, getEOLString(), linesVect);
|
||||||
const size_t lineCount = execute(SCI_GETLINECOUNT);
|
const size_t lineCount = execute(SCI_GETLINECOUNT);
|
||||||
|
|
||||||
const bool doingEntireDocument = toLine == lineCount - 1;
|
const bool doingEntireDocument = toLine == lineCount - 1;
|
||||||
|
@ -4288,7 +4292,8 @@ void ScintillaEditView::removeAnyDuplicateLines()
|
||||||
size_t newSize = vecRemoveDuplicates(linesVect);
|
size_t newSize = vecRemoveDuplicates(linesVect);
|
||||||
if (origSize != newSize)
|
if (origSize != newSize)
|
||||||
{
|
{
|
||||||
generic_string joined = stringJoin(linesVect, getEOLString());
|
generic_string joined;
|
||||||
|
stringJoin(linesVect, getEOLString(), joined);
|
||||||
if (!doingEntireDocument)
|
if (!doingEntireDocument)
|
||||||
{
|
{
|
||||||
joined += getEOLString();
|
joined += getEOLString();
|
||||||
|
|
Loading…
Reference in New Issue