You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
466 lines
13 KiB
466 lines
13 KiB
// Scintilla source code edit control
|
|
/** @file LexZig.cxx
|
|
** Lexer for Zig language.
|
|
**/
|
|
// Based on Zufu Liu's Notepad4 Zig lexer
|
|
// Modified for Scintilla by Jiri Techet, 2024
|
|
// The License.txt file describes the conditions under which this software may be distributed.
|
|
|
|
#include <cassert>
|
|
#include <cstring>
|
|
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <vector>
|
|
#include <map>
|
|
#include <algorithm>
|
|
|
|
#include "ILexer.h"
|
|
#include "Scintilla.h"
|
|
#include "SciLexer.h"
|
|
|
|
#include "WordList.h"
|
|
#include "LexAccessor.h"
|
|
#include "Accessor.h"
|
|
#include "StyleContext.h"
|
|
#include "CharacterSet.h"
|
|
#include "LexerModule.h"
|
|
#include "OptionSet.h"
|
|
#include "DefaultLexer.h"
|
|
|
|
using namespace Scintilla;
|
|
using namespace Lexilla;
|
|
|
|
namespace {
|
|
// Use an unnamed namespace to protect the functions and classes from name conflicts
|
|
|
|
constexpr bool IsAGraphic(int ch) noexcept {
|
|
// excludes C0 control characters and whitespace
|
|
return ch > 32 && ch < 127;
|
|
}
|
|
|
|
constexpr bool IsIdentifierStart(int ch) noexcept {
|
|
return IsUpperOrLowerCase(ch) || ch == '_';
|
|
}
|
|
|
|
constexpr bool IsIdentifierStartEx(int ch) noexcept {
|
|
return IsIdentifierStart(ch) || ch >= 0x80;
|
|
}
|
|
|
|
constexpr bool IsNumberStart(int ch, int chNext) noexcept {
|
|
return IsADigit(ch) || (ch == '.' && IsADigit(chNext));
|
|
}
|
|
|
|
constexpr bool IsIdentifierChar(int ch) noexcept {
|
|
return IsAlphaNumeric(ch) || ch == '_';
|
|
}
|
|
|
|
constexpr bool IsNumberContinue(int chPrev, int ch, int chNext) noexcept {
|
|
return ((ch == '+' || ch == '-') && (chPrev == 'e' || chPrev == 'E'))
|
|
|| (ch == '.' && chNext != '.');
|
|
}
|
|
|
|
constexpr bool IsDecimalNumber(int chPrev, int ch, int chNext) noexcept {
|
|
return IsIdentifierChar(ch) || IsNumberContinue(chPrev, ch, chNext);
|
|
}
|
|
|
|
constexpr bool IsIdentifierCharEx(int ch) noexcept {
|
|
return IsIdentifierChar(ch) || ch >= 0x80;
|
|
}
|
|
|
|
// https://ziglang.org/documentation/master/#Escape-Sequences
|
|
struct EscapeSequence {
|
|
int outerState = SCE_ZIG_DEFAULT;
|
|
int digitsLeft = 0;
|
|
bool brace = false;
|
|
|
|
// highlight any character as escape sequence.
|
|
void resetEscapeState(int state, int chNext) noexcept {
|
|
outerState = state;
|
|
digitsLeft = 1;
|
|
brace = false;
|
|
if (chNext == 'x') {
|
|
digitsLeft = 3;
|
|
} else if (chNext == 'u') {
|
|
digitsLeft = 5;
|
|
}
|
|
}
|
|
void resetEscapeState(int state) noexcept {
|
|
outerState = state;
|
|
digitsLeft = 1;
|
|
brace = false;
|
|
}
|
|
bool atEscapeEnd(int ch) noexcept {
|
|
--digitsLeft;
|
|
return digitsLeft <= 0 || !IsAHeXDigit(ch);
|
|
}
|
|
};
|
|
|
|
enum {
|
|
ZigLineStateMaskLineComment = 1, // line comment
|
|
ZigLineStateMaskMultilineString = 1 << 1, // multiline string
|
|
};
|
|
|
|
struct FoldLineState {
|
|
int lineComment;
|
|
int multilineString;
|
|
constexpr explicit FoldLineState(int lineState) noexcept:
|
|
lineComment(lineState & ZigLineStateMaskLineComment),
|
|
multilineString((lineState >> 1) & 1) {
|
|
}
|
|
};
|
|
|
|
enum class KeywordType {
|
|
None = SCE_ZIG_DEFAULT,
|
|
Function = SCE_ZIG_FUNCTION,
|
|
};
|
|
|
|
enum {
|
|
KeywordIndex_Primary = 0,
|
|
KeywordIndex_Secondary = 1,
|
|
KeywordIndex_Tertiary = 2,
|
|
KeywordIndex_Type = 3,
|
|
};
|
|
|
|
// Options used for LexerZig
|
|
struct OptionsZig {
|
|
bool fold = false;
|
|
};
|
|
|
|
const char *const zigWordListDesc[] = {
|
|
"Primary keywords",
|
|
"Secondary keywords",
|
|
"Tertiary keywords",
|
|
"Global type definitions",
|
|
nullptr
|
|
};
|
|
|
|
struct OptionSetZig : public OptionSet<OptionsZig> {
|
|
OptionSetZig() {
|
|
DefineProperty("fold", &OptionsZig::fold);
|
|
|
|
DefineWordListSets(zigWordListDesc);
|
|
}
|
|
};
|
|
|
|
LexicalClass lexicalClasses[] = {
|
|
// Lexer ZIG SCLEX_ZIG SCE_ZIG_:
|
|
0, "SCE_ZIG_DEFAULT", "default", "White space",
|
|
1, "SCE_ZIG_COMMENTLINE", "comment line", "Comment: //",
|
|
2, "SCE_ZIG_COMMENTLINEDOC", "comment line documentation", "Comment: ///",
|
|
3, "SCE_ZIG_COMMENTLINETOP", "comment line documentation", "Comment: //!",
|
|
4, "SCE_ZIG_NUMBER", "literal numeric", "Number",
|
|
5, "SCE_ZIG_OPERATOR", "operator", "Operator",
|
|
6, "SCE_ZIG_CHARACTER", "literal string character", "Single quoted string",
|
|
7, "SCE_ZIG_STRING", "literal string", "Double quoted string",
|
|
8, "SCE_ZIG_MULTISTRING", "literal string multiline", "Multiline string introduced by two backslashes",
|
|
9, "SCE_ZIG_ESCAPECHAR", "literal string escapesequence", "Escape sequence",
|
|
10, "SCE_ZIG_IDENTIFIER", "identifier", "Identifier",
|
|
11, "SCE_ZIG_FUNCTION", "identifier", "Function definition",
|
|
12, "SCE_ZIG_BUILTIN_FUNCTION", "identifier", "Builtin function",
|
|
13, "SCE_ZIG_KW_PRIMARY", "keyword", "Primary keywords",
|
|
14, "SCE_ZIG_KW_SECONDARY", "identifier", "Secondary keywords",
|
|
15, "SCE_ZIG_KW_TERTIARY", "identifier", "Tertiary keywords",
|
|
16, "SCE_ZIG_KW_TYPE", "identifier", "Global types",
|
|
};
|
|
|
|
class LexerZig : public DefaultLexer {
|
|
WordList keywordsPrimary;
|
|
WordList keywordsSecondary;
|
|
WordList keywordsTertiary;
|
|
WordList keywordsTypes;
|
|
OptionsZig options;
|
|
OptionSetZig osZig;
|
|
public:
|
|
LexerZig(const char *languageName_, int language_) :
|
|
DefaultLexer(languageName_, language_, lexicalClasses, std::size(lexicalClasses)) {
|
|
}
|
|
// Deleted so LexerZig objects can not be copied.
|
|
LexerZig(const LexerZig &) = delete;
|
|
LexerZig(LexerZig &&) = delete;
|
|
void operator=(const LexerZig &) = delete;
|
|
void operator=(LexerZig &&) = delete;
|
|
~LexerZig() override = default;
|
|
|
|
void SCI_METHOD Release() override {
|
|
delete this;
|
|
}
|
|
int SCI_METHOD Version() const override {
|
|
return lvRelease5;
|
|
}
|
|
const char *SCI_METHOD PropertyNames() override {
|
|
return osZig.PropertyNames();
|
|
}
|
|
int SCI_METHOD PropertyType(const char *name) override {
|
|
return osZig.PropertyType(name);
|
|
}
|
|
const char *SCI_METHOD DescribeProperty(const char *name) override {
|
|
return osZig.DescribeProperty(name);
|
|
}
|
|
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
|
|
const char *SCI_METHOD PropertyGet(const char *key) override {
|
|
return osZig.PropertyGet(key);
|
|
}
|
|
const char *SCI_METHOD DescribeWordListSets() override {
|
|
return osZig.DescribeWordListSets();
|
|
}
|
|
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
|
|
|
|
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
|
|
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
|
|
|
|
void *SCI_METHOD PrivateCall(int, void *) override {
|
|
return nullptr;
|
|
}
|
|
|
|
void BacktrackToStart(const LexAccessor &styler, int stateMask, Sci_PositionU &startPos, Sci_Position &lengthDoc, int &initStyle);
|
|
Sci_PositionU LookbackNonWhite(LexAccessor &styler, Sci_PositionU startPos, int &chPrevNonWhite, int &stylePrevNonWhite);
|
|
|
|
static ILexer5 *LexerFactoryZig() {
|
|
return new LexerZig("zig", SCLEX_ZIG);
|
|
}
|
|
};
|
|
|
|
Sci_Position SCI_METHOD LexerZig::PropertySet(const char *key, const char *val) {
|
|
if (osZig.PropertySet(&options, key, val)) {
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
Sci_Position SCI_METHOD LexerZig::WordListSet(int n, const char *wl) {
|
|
WordList *wordListN = nullptr;
|
|
switch (n) {
|
|
case 0:
|
|
wordListN = &keywordsPrimary;
|
|
break;
|
|
case 1:
|
|
wordListN = &keywordsSecondary;
|
|
break;
|
|
case 2:
|
|
wordListN = &keywordsTertiary;
|
|
break;
|
|
case 3:
|
|
wordListN = &keywordsTypes;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
Sci_Position firstModification = -1;
|
|
if (wordListN && wordListN->Set(wl, false)) {
|
|
firstModification = 0;
|
|
}
|
|
return firstModification;
|
|
}
|
|
|
|
void LexerZig::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess) {
|
|
Accessor styler(pAccess, nullptr);
|
|
|
|
KeywordType kwType = KeywordType::None;
|
|
int visibleChars = 0;
|
|
int lineState = 0;
|
|
EscapeSequence escSeq;
|
|
|
|
StyleContext sc(startPos, lengthDoc, initStyle, styler);
|
|
|
|
while (sc.More()) {
|
|
switch (sc.state) {
|
|
case SCE_ZIG_OPERATOR:
|
|
sc.SetState(SCE_ZIG_DEFAULT);
|
|
break;
|
|
|
|
case SCE_ZIG_NUMBER:
|
|
if (!IsDecimalNumber(sc.chPrev, sc.ch, sc.chNext)) {
|
|
sc.SetState(SCE_ZIG_DEFAULT);
|
|
}
|
|
break;
|
|
|
|
case SCE_ZIG_IDENTIFIER:
|
|
case SCE_ZIG_BUILTIN_FUNCTION:
|
|
if (!IsIdentifierCharEx(sc.ch)) {
|
|
if (sc.state == SCE_ZIG_IDENTIFIER) {
|
|
char s[64];
|
|
sc.GetCurrent(s, sizeof(s));
|
|
if (kwType != KeywordType::None) {
|
|
sc.ChangeState(static_cast<int>(kwType));
|
|
} else if (keywordsPrimary.InList(s)) {
|
|
sc.ChangeState(SCE_ZIG_KW_PRIMARY);
|
|
kwType = KeywordType::None;
|
|
if (strcmp(s, "fn") == 0) {
|
|
kwType = KeywordType::Function;
|
|
}
|
|
} else if (keywordsSecondary.InList(s)) {
|
|
sc.ChangeState(SCE_ZIG_KW_SECONDARY);
|
|
} else if (keywordsTertiary.InList(s)) {
|
|
sc.ChangeState(SCE_ZIG_KW_TERTIARY);
|
|
} else if (keywordsTypes.InList(s)) {
|
|
sc.ChangeState(SCE_ZIG_KW_TYPE);
|
|
}
|
|
}
|
|
if (sc.state != SCE_ZIG_KW_PRIMARY) {
|
|
kwType = KeywordType::None;
|
|
}
|
|
sc.SetState(SCE_ZIG_DEFAULT);
|
|
}
|
|
break;
|
|
|
|
case SCE_ZIG_CHARACTER:
|
|
case SCE_ZIG_STRING:
|
|
case SCE_ZIG_MULTISTRING:
|
|
if (sc.atLineStart) {
|
|
sc.SetState(SCE_ZIG_DEFAULT);
|
|
} else if (sc.ch == '\\' && sc.state != SCE_ZIG_MULTISTRING) {
|
|
escSeq.resetEscapeState(sc.state, sc.chNext);
|
|
sc.SetState(SCE_ZIG_ESCAPECHAR);
|
|
sc.Forward();
|
|
if (sc.Match('u', '{')) {
|
|
escSeq.brace = true;
|
|
escSeq.digitsLeft = 9;
|
|
sc.Forward();
|
|
}
|
|
} else if ((sc.ch == '\'' && sc.state == SCE_ZIG_CHARACTER) || (sc.ch == '\"' && sc.state == SCE_ZIG_STRING)) {
|
|
sc.ForwardSetState(SCE_ZIG_DEFAULT);
|
|
} else if (sc.state != SCE_ZIG_CHARACTER) {
|
|
if (sc.ch == '{' || sc.ch == '}') {
|
|
if (sc.ch == sc.chNext) {
|
|
escSeq.resetEscapeState(sc.state);
|
|
sc.SetState(SCE_ZIG_ESCAPECHAR);
|
|
sc.Forward();
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case SCE_ZIG_ESCAPECHAR:
|
|
if (escSeq.atEscapeEnd(sc.ch)) {
|
|
if (escSeq.brace && sc.ch == '}') {
|
|
sc.Forward();
|
|
}
|
|
sc.SetState(escSeq.outerState);
|
|
continue;
|
|
}
|
|
break;
|
|
|
|
case SCE_ZIG_COMMENTLINE:
|
|
case SCE_ZIG_COMMENTLINEDOC:
|
|
case SCE_ZIG_COMMENTLINETOP:
|
|
if (sc.atLineStart) {
|
|
sc.SetState(SCE_ZIG_DEFAULT);
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (sc.state == SCE_ZIG_DEFAULT) {
|
|
if (sc.Match('/', '/')) {
|
|
if (visibleChars == 0) {
|
|
lineState = ZigLineStateMaskLineComment;
|
|
}
|
|
sc.SetState(SCE_ZIG_COMMENTLINE);
|
|
sc.Forward(2);
|
|
if (sc.ch == '!') {
|
|
sc.ChangeState(SCE_ZIG_COMMENTLINETOP);
|
|
} else if (sc.ch == '/' && sc.chNext != '/') {
|
|
sc.ChangeState(SCE_ZIG_COMMENTLINEDOC);
|
|
}
|
|
} else if (sc.Match('\\', '\\')) {
|
|
lineState = ZigLineStateMaskMultilineString;
|
|
sc.SetState(SCE_ZIG_MULTISTRING);
|
|
} else if (sc.ch == '\"') {
|
|
sc.SetState(SCE_ZIG_STRING);
|
|
} else if (sc.ch == '\'') {
|
|
sc.SetState(SCE_ZIG_CHARACTER);
|
|
} else if (IsNumberStart(sc.ch, sc.chNext)) {
|
|
sc.SetState(SCE_ZIG_NUMBER);
|
|
} else if ((sc.ch == '@' && IsIdentifierStartEx(sc.chNext)) || IsIdentifierStartEx(sc.ch)) {
|
|
sc.SetState((sc.ch == '@') ? SCE_ZIG_BUILTIN_FUNCTION : SCE_ZIG_IDENTIFIER);
|
|
} else if (IsAGraphic(sc.ch)) {
|
|
sc.SetState(SCE_ZIG_OPERATOR);
|
|
}
|
|
}
|
|
|
|
if (visibleChars == 0 && !isspacechar(sc.ch)) {
|
|
visibleChars++;
|
|
}
|
|
if (sc.atLineEnd) {
|
|
styler.SetLineState(sc.currentLine, lineState);
|
|
lineState = 0;
|
|
kwType = KeywordType::None;
|
|
visibleChars = 0;
|
|
}
|
|
sc.Forward();
|
|
}
|
|
|
|
sc.Complete();
|
|
}
|
|
|
|
void LexerZig::Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess) {
|
|
if (!options.fold)
|
|
return;
|
|
|
|
Accessor styler(pAccess, nullptr);
|
|
const Sci_PositionU endPos = startPos + lengthDoc;
|
|
Sci_Position lineCurrent = styler.GetLine(startPos);
|
|
while (lineCurrent > 0) {
|
|
lineCurrent--;
|
|
startPos = styler.LineStart(lineCurrent);
|
|
initStyle = (startPos > 0) ? styler.StyleIndexAt(startPos) : 0;
|
|
if (!AnyOf(initStyle, SCE_ZIG_MULTISTRING,
|
|
SCE_ZIG_COMMENTLINE, SCE_ZIG_COMMENTLINEDOC, SCE_ZIG_COMMENTLINETOP)) {
|
|
break;
|
|
}
|
|
}
|
|
FoldLineState foldPrev(0);
|
|
int levelCurrent = SC_FOLDLEVELBASE;
|
|
if (lineCurrent > 0) {
|
|
levelCurrent = styler.LevelAt(lineCurrent - 1) >> 16;
|
|
foldPrev = FoldLineState(styler.GetLineState(lineCurrent - 1));
|
|
}
|
|
|
|
int levelNext = levelCurrent;
|
|
FoldLineState foldCurrent(styler.GetLineState(lineCurrent));
|
|
Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1);
|
|
lineStartNext = std::min(lineStartNext, endPos);
|
|
|
|
while (startPos < endPos) {
|
|
initStyle = styler.StyleIndexAt(startPos);
|
|
|
|
if (initStyle == SCE_ZIG_OPERATOR) {
|
|
const char ch = styler[startPos];
|
|
if (ch == '{' || ch == '[' || ch == '(') {
|
|
levelNext++;
|
|
} else if (ch == '}' || ch == ']' || ch == ')') {
|
|
levelNext--;
|
|
}
|
|
}
|
|
|
|
++startPos;
|
|
if (startPos == lineStartNext) {
|
|
const FoldLineState foldNext(styler.GetLineState(lineCurrent + 1));
|
|
levelNext = std::max(levelNext, SC_FOLDLEVELBASE);
|
|
if (foldCurrent.lineComment) {
|
|
levelNext += foldNext.lineComment - foldPrev.lineComment;
|
|
} else if (foldCurrent.multilineString) {
|
|
levelNext += foldNext.multilineString - foldPrev.multilineString;
|
|
}
|
|
|
|
const int levelUse = levelCurrent;
|
|
int lev = levelUse | (levelNext << 16);
|
|
if (levelUse < levelNext) {
|
|
lev |= SC_FOLDLEVELHEADERFLAG;
|
|
}
|
|
styler.SetLevel(lineCurrent, lev);
|
|
|
|
lineCurrent++;
|
|
lineStartNext = styler.LineStart(lineCurrent + 1);
|
|
lineStartNext = std::min(lineStartNext, endPos);
|
|
levelCurrent = levelNext;
|
|
foldPrev = foldCurrent;
|
|
foldCurrent = foldNext;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // unnamed namespace end
|
|
|
|
extern const LexerModule lmZig(SCLEX_ZIG, LexerZig::LexerFactoryZig, "zig", zigWordListDesc);
|