2022-01-04 23:07:50 +00:00
|
|
|
// Scintilla source code edit control
|
|
|
|
/** @file WordList.cxx
|
|
|
|
** Hold a list of words.
|
|
|
|
**/
|
|
|
|
// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
|
|
|
|
// The License.txt file describes the conditions under which this software may be distributed.
|
|
|
|
|
|
|
|
#include <cstdlib>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstring>
|
|
|
|
|
Update scintilla 5.3.4 and lexilla 5.2.4 with:
https://www.scintilla.org/scintilla534.zip
Released 8 March 2023.
Add multithreaded wrap to significantly improve performance of wrapping large files.
More typesafe bindings of *Full APIs in ScintillaCall. Feature #1477.
Fix overlapping of text with line end wrap marker. Bug #2378.
Fix clipping of line end wrap symbol for SC_WRAPVISUALFLAGLOC_END_BY_TEXT.
Where a multi-byte character contains multiple styles, display each byte as a representation. This makes it easier to see and fix lexers that change styles mid-character, commonly because they use fixed size buffers.
Fix a potential crash with autocompletion list fill-ups where a SCN_CHARADDED handler retriggered an autocompletion list, but with no items that match the typed character.
lexilla523
Released 8 March 2023.
Add scripts/PromoteNew.bat script to promote .new files after checking.
Makefile: Remove 1024-byte line length limit..
Ruby: Add new lexical classes for % literals SCE_RB_STRING_W (%w non-interpolable string array), SCE_RB_STRING_I (%i non-interpolable symbol array), SCE_RB_STRING_QI (%I interpolable symbol array), and SCE_RB_STRING_QS (%s symbol). Issue #124.
Ruby: Disambiguate %= which may be a quote or modulo assignment. Issue #124, Bug #1255, Bug #2182.
Ruby: Fix additional fold level for single character in SCE_RB_STRING_QW. Issue #132.
Ruby: Set SCE_RB_HERE_QQ for unquoted and double-quoted heredocs and SCE_RB_HERE_QX for backticks-quoted heredocs. Issue #134.
Ruby: Recognise #{} inside SCE_RB_HERE_QQ and SCE_RB_HERE_QX. Issue #134.
Ruby: Improve regex and heredoc recognition. Issue #136.
Ruby: Highlight #@, #@@ and #$ style interpolation. Issue #140.
Ruby: Fix folding for multiple heredocs started on one line. Fix folding when there is a space after heredoc opening delimiter. Issue #135.
YAML: Remove 1024-byte line length limit.
https://www.scintilla.org/lexilla524.zip
Released 13 March 2023.
C++: Fix failure to recognize keywords containing upper case. Issue #149.
GDScript: Support % and $ node paths. Issue #145, Pull request #146.
Close #13338
2023-03-10 02:37:21 +00:00
|
|
|
#include <string>
|
2022-01-04 23:07:50 +00:00
|
|
|
#include <algorithm>
|
|
|
|
#include <iterator>
|
|
|
|
#include <memory>
|
|
|
|
|
|
|
|
#include "WordList.h"
|
|
|
|
|
|
|
|
using namespace Lexilla;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates an array that points into each word in the string and puts \0 terminators
|
|
|
|
* after each word.
|
|
|
|
*/
|
|
|
|
std::unique_ptr<char *[]> ArrayFromWordList(char *wordlist, size_t slen, size_t *len, bool onlyLineEnds = false) {
|
2022-08-14 10:23:34 +00:00
|
|
|
assert(wordlist);
|
2022-01-04 23:07:50 +00:00
|
|
|
size_t words = 0;
|
|
|
|
// For rapid determination of whether a character is a separator, build
|
|
|
|
// a look up table.
|
|
|
|
bool wordSeparator[256] = {}; // Initialise all to false.
|
|
|
|
wordSeparator[static_cast<unsigned int>('\r')] = true;
|
|
|
|
wordSeparator[static_cast<unsigned int>('\n')] = true;
|
|
|
|
if (!onlyLineEnds) {
|
|
|
|
wordSeparator[static_cast<unsigned int>(' ')] = true;
|
|
|
|
wordSeparator[static_cast<unsigned int>('\t')] = true;
|
|
|
|
}
|
|
|
|
unsigned char prev = '\n';
|
|
|
|
for (int j = 0; wordlist[j]; j++) {
|
|
|
|
const unsigned char curr = wordlist[j];
|
|
|
|
if (!wordSeparator[curr] && wordSeparator[prev])
|
|
|
|
words++;
|
|
|
|
prev = curr;
|
|
|
|
}
|
|
|
|
std::unique_ptr<char *[]> keywords = std::make_unique<char *[]>(words + 1);
|
|
|
|
size_t wordsStore = 0;
|
|
|
|
if (words) {
|
|
|
|
unsigned char previous = '\0';
|
|
|
|
for (size_t k = 0; k < slen; k++) {
|
|
|
|
if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
|
|
|
|
if (!previous) {
|
|
|
|
keywords[wordsStore] = &wordlist[k];
|
|
|
|
wordsStore++;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
wordlist[k] = '\0';
|
|
|
|
}
|
|
|
|
previous = wordlist[k];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(wordsStore < (words + 1));
|
|
|
|
keywords[wordsStore] = &wordlist[slen];
|
|
|
|
*len = wordsStore;
|
|
|
|
return keywords;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool cmpWords(const char *a, const char *b) noexcept {
|
|
|
|
return strcmp(a, b) < 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
WordList::WordList(bool onlyLineEnds_) noexcept :
|
|
|
|
words(nullptr), list(nullptr), len(0), onlyLineEnds(onlyLineEnds_) {
|
|
|
|
// Prevent warnings by static analyzers about uninitialized starts.
|
|
|
|
starts[0] = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
WordList::~WordList() {
|
|
|
|
Clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
WordList::operator bool() const noexcept {
|
|
|
|
return len != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool WordList::operator!=(const WordList &other) const noexcept {
|
|
|
|
if (len != other.len)
|
|
|
|
return true;
|
|
|
|
for (size_t i=0; i<len; i++) {
|
|
|
|
if (strcmp(words[i], other.words[i]) != 0)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
int WordList::Length() const noexcept {
|
|
|
|
return static_cast<int>(len);
|
|
|
|
}
|
|
|
|
|
|
|
|
void WordList::Clear() noexcept {
|
|
|
|
delete []list;
|
|
|
|
list = nullptr;
|
|
|
|
delete []words;
|
|
|
|
words = nullptr;
|
|
|
|
len = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool WordList::Set(const char *s) {
|
|
|
|
const size_t lenS = strlen(s) + 1;
|
|
|
|
std::unique_ptr<char[]> listTemp = std::make_unique<char[]>(lenS);
|
|
|
|
memcpy(listTemp.get(), s, lenS);
|
|
|
|
size_t lenTemp = 0;
|
|
|
|
std::unique_ptr<char *[]> wordsTemp = ArrayFromWordList(listTemp.get(), lenS - 1, &lenTemp, onlyLineEnds);
|
|
|
|
std::sort(wordsTemp.get(), wordsTemp.get() + lenTemp, cmpWords);
|
|
|
|
|
|
|
|
if (lenTemp == len) {
|
|
|
|
bool changed = false;
|
|
|
|
for (size_t i = 0; i < lenTemp; i++) {
|
|
|
|
if (strcmp(words[i], wordsTemp[i]) != 0) {
|
|
|
|
changed = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!changed) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Clear();
|
|
|
|
words = wordsTemp.release();
|
|
|
|
list = listTemp.release();
|
|
|
|
len = lenTemp;
|
|
|
|
std::fill(starts, std::end(starts), -1);
|
|
|
|
for (int l = static_cast<int>(len - 1); l >= 0; l--) {
|
2023-05-31 23:11:12 +00:00
|
|
|
unsigned char const indexChar = words[l][0];
|
2022-01-04 23:07:50 +00:00
|
|
|
starts[indexChar] = l;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Check whether a string is in the list.
|
|
|
|
* List elements are either exact matches or prefixes.
|
|
|
|
* Prefix elements start with '^' and match all strings that start with the rest of the element
|
|
|
|
* so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
|
|
|
|
*/
|
|
|
|
bool WordList::InList(const char *s) const noexcept {
|
|
|
|
if (!words)
|
|
|
|
return false;
|
2023-02-09 16:57:24 +00:00
|
|
|
const char first = s[0];
|
|
|
|
const unsigned char firstChar = first;
|
2022-01-04 23:07:50 +00:00
|
|
|
int j = starts[firstChar];
|
|
|
|
if (j >= 0) {
|
2023-02-09 16:57:24 +00:00
|
|
|
while (words[j][0] == first) {
|
2022-01-04 23:07:50 +00:00
|
|
|
if (s[1] == words[j][1]) {
|
|
|
|
const char *a = words[j] + 1;
|
|
|
|
const char *b = s + 1;
|
|
|
|
while (*a && *a == *b) {
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
}
|
|
|
|
if (!*a && !*b)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
j = starts[static_cast<unsigned int>('^')];
|
|
|
|
if (j >= 0) {
|
|
|
|
while (words[j][0] == '^') {
|
|
|
|
const char *a = words[j] + 1;
|
|
|
|
const char *b = s;
|
|
|
|
while (*a && *a == *b) {
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
}
|
|
|
|
if (!*a)
|
|
|
|
return true;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
Update scintilla 5.3.4 and lexilla 5.2.4 with:
https://www.scintilla.org/scintilla534.zip
Released 8 March 2023.
Add multithreaded wrap to significantly improve performance of wrapping large files.
More typesafe bindings of *Full APIs in ScintillaCall. Feature #1477.
Fix overlapping of text with line end wrap marker. Bug #2378.
Fix clipping of line end wrap symbol for SC_WRAPVISUALFLAGLOC_END_BY_TEXT.
Where a multi-byte character contains multiple styles, display each byte as a representation. This makes it easier to see and fix lexers that change styles mid-character, commonly because they use fixed size buffers.
Fix a potential crash with autocompletion list fill-ups where a SCN_CHARADDED handler retriggered an autocompletion list, but with no items that match the typed character.
lexilla523
Released 8 March 2023.
Add scripts/PromoteNew.bat script to promote .new files after checking.
Makefile: Remove 1024-byte line length limit..
Ruby: Add new lexical classes for % literals SCE_RB_STRING_W (%w non-interpolable string array), SCE_RB_STRING_I (%i non-interpolable symbol array), SCE_RB_STRING_QI (%I interpolable symbol array), and SCE_RB_STRING_QS (%s symbol). Issue #124.
Ruby: Disambiguate %= which may be a quote or modulo assignment. Issue #124, Bug #1255, Bug #2182.
Ruby: Fix additional fold level for single character in SCE_RB_STRING_QW. Issue #132.
Ruby: Set SCE_RB_HERE_QQ for unquoted and double-quoted heredocs and SCE_RB_HERE_QX for backticks-quoted heredocs. Issue #134.
Ruby: Recognise #{} inside SCE_RB_HERE_QQ and SCE_RB_HERE_QX. Issue #134.
Ruby: Improve regex and heredoc recognition. Issue #136.
Ruby: Highlight #@, #@@ and #$ style interpolation. Issue #140.
Ruby: Fix folding for multiple heredocs started on one line. Fix folding when there is a space after heredoc opening delimiter. Issue #135.
YAML: Remove 1024-byte line length limit.
https://www.scintilla.org/lexilla524.zip
Released 13 March 2023.
C++: Fix failure to recognize keywords containing upper case. Issue #149.
GDScript: Support % and $ node paths. Issue #145, Pull request #146.
Close #13338
2023-03-10 02:37:21 +00:00
|
|
|
/** convenience overload so can easily call with std::string.
|
|
|
|
*/
|
|
|
|
bool WordList::InList(const std::string &s) const noexcept {
|
|
|
|
return InList(s.c_str());
|
|
|
|
}
|
|
|
|
|
2022-01-04 23:07:50 +00:00
|
|
|
/** similar to InList, but word s can be a substring of keyword.
|
|
|
|
* eg. the keyword define is defined as def~ine. This means the word must start
|
|
|
|
* with def to be a keyword, but also defi, defin and define are valid.
|
|
|
|
* The marker is ~ in this case.
|
|
|
|
*/
|
|
|
|
bool WordList::InListAbbreviated(const char *s, const char marker) const noexcept {
|
|
|
|
if (!words)
|
|
|
|
return false;
|
2023-02-09 16:57:24 +00:00
|
|
|
const char first = s[0];
|
|
|
|
const unsigned char firstChar = first;
|
2022-01-04 23:07:50 +00:00
|
|
|
int j = starts[firstChar];
|
|
|
|
if (j >= 0) {
|
2023-02-09 16:57:24 +00:00
|
|
|
while (words[j][0] == first) {
|
2022-01-04 23:07:50 +00:00
|
|
|
bool isSubword = false;
|
|
|
|
int start = 1;
|
|
|
|
if (words[j][1] == marker) {
|
|
|
|
isSubword = true;
|
|
|
|
start++;
|
|
|
|
}
|
|
|
|
if (s[1] == words[j][start]) {
|
|
|
|
const char *a = words[j] + start;
|
|
|
|
const char *b = s + 1;
|
|
|
|
while (*a && *a == *b) {
|
|
|
|
a++;
|
|
|
|
if (*a == marker) {
|
|
|
|
isSubword = true;
|
|
|
|
a++;
|
|
|
|
}
|
|
|
|
b++;
|
|
|
|
}
|
|
|
|
if ((!*a || isSubword) && !*b)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
j = starts[static_cast<unsigned int>('^')];
|
|
|
|
if (j >= 0) {
|
|
|
|
while (words[j][0] == '^') {
|
|
|
|
const char *a = words[j] + 1;
|
|
|
|
const char *b = s;
|
|
|
|
while (*a && *a == *b) {
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
}
|
|
|
|
if (!*a)
|
|
|
|
return true;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
Updated to Scintilla 5.4.2 & Lexilla 5.3.1
https://www.scintilla.org/scintilla542.zip
Release 5.4.2
Released 5 March 2024.
Significantly reduce memory used for undo actions, often to a half or quarter of previous versions. Feature #1458.
Add APIs for saving and restoring undo history.
For GTK, when laying out text, detect runs with both left-to-right and right-to-left ranges and divide into an ASCII prefix and more complex suffix. Lay out the ASCII prefix in the standard manner but, for the suffix, measure the whole width and spread that over the suffix bytes. This produces more usable results where the caret moves over the ASCII prefix correctly and over the suffix reasonably but not accurately.
For ScintillaEdit on Qt, fix reference from ScintillaDocument to Document to match change in 5.4.1 using IDocumentEditable for SCI_GETDOCPOINTER and SCI_SETDOCPOINTER.
For Direct2D on Win32, use the multi-threaded option to avoid crashes when Scintilla instances created on different threads. There may be more problems with this scenario so it should be avoided. Bug #2420.
For Win32, ensure keyboard-initiated context menu appears in multi-screen situations.
https://www.scintilla.org/lexilla531.zip
Release 5.3.1
Released 5 March 2024.
Assembler: After comments, treat \r\n line ends the same as \n. This makes testing easier.
Bash: Fix folding when line changed to/from comment and previous line is comment. Issue #224.
Batch: Fix handling ':' next to keywords. Issue #222.
JavaScript: in cpp lexer, add lexer.cpp.backquoted.strings=2 mode to treat ` back-quoted strings as template literals which allow embedded ${expressions}. Issue #94.
Python: fix lexing of rb'' and rf'' strings. Issue #223, Pull request #227.
Ruby: fix lexing of methods on numeric literals like '3.times' so the '.' and method name do not appear in numeric style. Issue #225.
2024-03-06 21:05:54 +00:00
|
|
|
/** similar to InListAbbreviated, but word s can be an abridged version of a keyword.
|
2022-01-04 23:07:50 +00:00
|
|
|
* eg. the keyword is defined as "after.~:". This means the word must have a prefix (begins with) of
|
|
|
|
* "after." and suffix (ends with) of ":" to be a keyword, Hence "after.field:" , "after.form.item:" are valid.
|
|
|
|
* Similarly "~.is.valid" keyword is suffix only... hence "field.is.valid" , "form.is.valid" are valid.
|
|
|
|
* The marker is ~ in this case.
|
|
|
|
* No multiple markers check is done and wont work.
|
|
|
|
*/
|
|
|
|
bool WordList::InListAbridged(const char *s, const char marker) const noexcept {
|
|
|
|
if (!words)
|
|
|
|
return false;
|
2023-02-09 16:57:24 +00:00
|
|
|
const char first = s[0];
|
|
|
|
const unsigned char firstChar = first;
|
2022-01-04 23:07:50 +00:00
|
|
|
int j = starts[firstChar];
|
|
|
|
if (j >= 0) {
|
2023-02-09 16:57:24 +00:00
|
|
|
while (words[j][0] == first) {
|
2022-01-04 23:07:50 +00:00
|
|
|
const char *a = words[j];
|
|
|
|
const char *b = s;
|
|
|
|
while (*a && *a == *b) {
|
|
|
|
a++;
|
|
|
|
if (*a == marker) {
|
|
|
|
a++;
|
|
|
|
const size_t suffixLengthA = strlen(a);
|
|
|
|
const size_t suffixLengthB = strlen(b);
|
|
|
|
if (suffixLengthA >= suffixLengthB)
|
|
|
|
break;
|
|
|
|
b = b + suffixLengthB - suffixLengthA - 1;
|
|
|
|
}
|
|
|
|
b++;
|
|
|
|
}
|
|
|
|
if (!*a && !*b)
|
|
|
|
return true;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
j = starts[static_cast<unsigned int>(marker)];
|
|
|
|
if (j >= 0) {
|
|
|
|
while (words[j][0] == marker) {
|
|
|
|
const char *a = words[j] + 1;
|
|
|
|
const char *b = s;
|
|
|
|
const size_t suffixLengthA = strlen(a);
|
|
|
|
const size_t suffixLengthB = strlen(b);
|
|
|
|
if (suffixLengthA > suffixLengthB) {
|
|
|
|
j++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
b = b + suffixLengthB - suffixLengthA;
|
|
|
|
|
|
|
|
while (*a && *a == *b) {
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
}
|
|
|
|
if (!*a && !*b)
|
|
|
|
return true;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *WordList::WordAt(int n) const noexcept {
|
|
|
|
return words[n];
|
|
|
|
}
|
|
|
|
|