You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
465 lines
13 KiB
465 lines
13 KiB
// Scintilla source code edit control |
|
/** @file LexZig.cxx |
|
** Lexer for Zig language. |
|
**/ |
|
// Based on Zufu Liu's Notepad4 Zig lexer |
|
// Modified for Scintilla by Jiri Techet, 2024 |
|
// The License.txt file describes the conditions under which this software may be distributed. |
|
|
|
#include <cassert> |
|
#include <cstring> |
|
|
|
#include <string> |
|
#include <string_view> |
|
#include <vector> |
|
#include <map> |
|
#include <algorithm> |
|
|
|
#include "ILexer.h" |
|
#include "Scintilla.h" |
|
#include "SciLexer.h" |
|
|
|
#include "WordList.h" |
|
#include "LexAccessor.h" |
|
#include "Accessor.h" |
|
#include "StyleContext.h" |
|
#include "CharacterSet.h" |
|
#include "LexerModule.h" |
|
#include "OptionSet.h" |
|
#include "DefaultLexer.h" |
|
|
|
using namespace Scintilla; |
|
using namespace Lexilla; |
|
|
|
namespace { |
|
// Use an unnamed namespace to protect the functions and classes from name conflicts |
|
|
|
constexpr bool IsAGraphic(int ch) noexcept { |
|
// excludes C0 control characters and whitespace |
|
return ch > 32 && ch < 127; |
|
} |
|
|
|
constexpr bool IsIdentifierStart(int ch) noexcept { |
|
return IsUpperOrLowerCase(ch) || ch == '_'; |
|
} |
|
|
|
constexpr bool IsIdentifierStartEx(int ch) noexcept { |
|
return IsIdentifierStart(ch) || ch >= 0x80; |
|
} |
|
|
|
constexpr bool IsNumberStart(int ch, int chNext) noexcept { |
|
return IsADigit(ch) || (ch == '.' && IsADigit(chNext)); |
|
} |
|
|
|
constexpr bool IsIdentifierChar(int ch) noexcept { |
|
return IsAlphaNumeric(ch) || ch == '_'; |
|
} |
|
|
|
constexpr bool IsNumberContinue(int chPrev, int ch, int chNext) noexcept { |
|
return ((ch == '+' || ch == '-') && (chPrev == 'e' || chPrev == 'E')) |
|
|| (ch == '.' && chNext != '.'); |
|
} |
|
|
|
constexpr bool IsDecimalNumber(int chPrev, int ch, int chNext) noexcept { |
|
return IsIdentifierChar(ch) || IsNumberContinue(chPrev, ch, chNext); |
|
} |
|
|
|
constexpr bool IsIdentifierCharEx(int ch) noexcept { |
|
return IsIdentifierChar(ch) || ch >= 0x80; |
|
} |
|
|
|
// https://ziglang.org/documentation/master/#Escape-Sequences |
|
struct EscapeSequence { |
|
int outerState = SCE_ZIG_DEFAULT; |
|
int digitsLeft = 0; |
|
bool brace = false; |
|
|
|
// highlight any character as escape sequence. |
|
void resetEscapeState(int state, int chNext) noexcept { |
|
outerState = state; |
|
digitsLeft = 1; |
|
brace = false; |
|
if (chNext == 'x') { |
|
digitsLeft = 3; |
|
} else if (chNext == 'u') { |
|
digitsLeft = 5; |
|
} |
|
} |
|
void resetEscapeState(int state) noexcept { |
|
outerState = state; |
|
digitsLeft = 1; |
|
brace = false; |
|
} |
|
bool atEscapeEnd(int ch) noexcept { |
|
--digitsLeft; |
|
return digitsLeft <= 0 || !IsAHeXDigit(ch); |
|
} |
|
}; |
|
|
|
enum { |
|
ZigLineStateMaskLineComment = 1, // line comment |
|
ZigLineStateMaskMultilineString = 1 << 1, // multiline string |
|
}; |
|
|
|
struct FoldLineState { |
|
int lineComment; |
|
int multilineString; |
|
constexpr explicit FoldLineState(int lineState) noexcept: |
|
lineComment(lineState & ZigLineStateMaskLineComment), |
|
multilineString((lineState >> 1) & 1) { |
|
} |
|
}; |
|
|
|
enum class KeywordType { |
|
None = SCE_ZIG_DEFAULT, |
|
Function = SCE_ZIG_FUNCTION, |
|
}; |
|
|
|
enum { |
|
KeywordIndex_Primary = 0, |
|
KeywordIndex_Secondary = 1, |
|
KeywordIndex_Tertiary = 2, |
|
KeywordIndex_Type = 3, |
|
}; |
|
|
|
// Options used for LexerZig |
|
struct OptionsZig { |
|
bool fold = false; |
|
}; |
|
|
|
const char *const zigWordListDesc[] = { |
|
"Primary keywords", |
|
"Secondary keywords", |
|
"Tertiary keywords", |
|
"Global type definitions", |
|
nullptr |
|
}; |
|
|
|
struct OptionSetZig : public OptionSet<OptionsZig> { |
|
OptionSetZig() { |
|
DefineProperty("fold", &OptionsZig::fold); |
|
|
|
DefineWordListSets(zigWordListDesc); |
|
} |
|
}; |
|
|
|
LexicalClass lexicalClasses[] = { |
|
// Lexer ZIG SCLEX_ZIG SCE_ZIG_: |
|
0, "SCE_ZIG_DEFAULT", "default", "White space", |
|
1, "SCE_ZIG_COMMENTLINE", "comment line", "Comment: //", |
|
2, "SCE_ZIG_COMMENTLINEDOC", "comment line documentation", "Comment: ///", |
|
3, "SCE_ZIG_COMMENTLINETOP", "comment line documentation", "Comment: //!", |
|
4, "SCE_ZIG_NUMBER", "literal numeric", "Number", |
|
5, "SCE_ZIG_OPERATOR", "operator", "Operator", |
|
6, "SCE_ZIG_CHARACTER", "literal string character", "Single quoted string", |
|
7, "SCE_ZIG_STRING", "literal string", "Double quoted string", |
|
8, "SCE_ZIG_MULTISTRING", "literal string multiline", "Multiline string introduced by two backslashes", |
|
9, "SCE_ZIG_ESCAPECHAR", "literal string escapesequence", "Escape sequence", |
|
10, "SCE_ZIG_IDENTIFIER", "identifier", "Identifier", |
|
11, "SCE_ZIG_FUNCTION", "identifier", "Function definition", |
|
12, "SCE_ZIG_BUILTIN_FUNCTION", "identifier", "Builtin function", |
|
13, "SCE_ZIG_KW_PRIMARY", "keyword", "Primary keywords", |
|
14, "SCE_ZIG_KW_SECONDARY", "identifier", "Secondary keywords", |
|
15, "SCE_ZIG_KW_TERTIARY", "identifier", "Tertiary keywords", |
|
16, "SCE_ZIG_KW_TYPE", "identifier", "Global types", |
|
}; |
|
|
|
class LexerZig : public DefaultLexer { |
|
WordList keywordsPrimary; |
|
WordList keywordsSecondary; |
|
WordList keywordsTertiary; |
|
WordList keywordsTypes; |
|
OptionsZig options; |
|
OptionSetZig osZig; |
|
public: |
|
LexerZig(const char *languageName_, int language_) : |
|
DefaultLexer(languageName_, language_, lexicalClasses, std::size(lexicalClasses)) { |
|
} |
|
// Deleted so LexerZig objects can not be copied. |
|
LexerZig(const LexerZig &) = delete; |
|
LexerZig(LexerZig &&) = delete; |
|
void operator=(const LexerZig &) = delete; |
|
void operator=(LexerZig &&) = delete; |
|
~LexerZig() override = default; |
|
|
|
void SCI_METHOD Release() override { |
|
delete this; |
|
} |
|
int SCI_METHOD Version() const override { |
|
return lvRelease5; |
|
} |
|
const char *SCI_METHOD PropertyNames() override { |
|
return osZig.PropertyNames(); |
|
} |
|
int SCI_METHOD PropertyType(const char *name) override { |
|
return osZig.PropertyType(name); |
|
} |
|
const char *SCI_METHOD DescribeProperty(const char *name) override { |
|
return osZig.DescribeProperty(name); |
|
} |
|
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; |
|
const char *SCI_METHOD PropertyGet(const char *key) override { |
|
return osZig.PropertyGet(key); |
|
} |
|
const char *SCI_METHOD DescribeWordListSets() override { |
|
return osZig.DescribeWordListSets(); |
|
} |
|
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; |
|
|
|
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
|
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
|
|
|
void *SCI_METHOD PrivateCall(int, void *) override { |
|
return nullptr; |
|
} |
|
|
|
void BacktrackToStart(const LexAccessor &styler, int stateMask, Sci_PositionU &startPos, Sci_Position &lengthDoc, int &initStyle); |
|
Sci_PositionU LookbackNonWhite(LexAccessor &styler, Sci_PositionU startPos, int &chPrevNonWhite, int &stylePrevNonWhite); |
|
|
|
static ILexer5 *LexerFactoryZig() { |
|
return new LexerZig("zig", SCLEX_ZIG); |
|
} |
|
}; |
|
|
|
Sci_Position SCI_METHOD LexerZig::PropertySet(const char *key, const char *val) { |
|
if (osZig.PropertySet(&options, key, val)) { |
|
return 0; |
|
} |
|
return -1; |
|
} |
|
|
|
Sci_Position SCI_METHOD LexerZig::WordListSet(int n, const char *wl) { |
|
WordList *wordListN = nullptr; |
|
switch (n) { |
|
case 0: |
|
wordListN = &keywordsPrimary; |
|
break; |
|
case 1: |
|
wordListN = &keywordsSecondary; |
|
break; |
|
case 2: |
|
wordListN = &keywordsTertiary; |
|
break; |
|
case 3: |
|
wordListN = &keywordsTypes; |
|
break; |
|
default: |
|
break; |
|
} |
|
Sci_Position firstModification = -1; |
|
if (wordListN && wordListN->Set(wl, false)) { |
|
firstModification = 0; |
|
} |
|
return firstModification; |
|
} |
|
|
|
void LexerZig::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess) { |
|
Accessor styler(pAccess, nullptr); |
|
|
|
KeywordType kwType = KeywordType::None; |
|
int visibleChars = 0; |
|
int lineState = 0; |
|
EscapeSequence escSeq; |
|
|
|
StyleContext sc(startPos, lengthDoc, initStyle, styler); |
|
|
|
while (sc.More()) { |
|
switch (sc.state) { |
|
case SCE_ZIG_OPERATOR: |
|
sc.SetState(SCE_ZIG_DEFAULT); |
|
break; |
|
|
|
case SCE_ZIG_NUMBER: |
|
if (!IsDecimalNumber(sc.chPrev, sc.ch, sc.chNext)) { |
|
sc.SetState(SCE_ZIG_DEFAULT); |
|
} |
|
break; |
|
|
|
case SCE_ZIG_IDENTIFIER: |
|
case SCE_ZIG_BUILTIN_FUNCTION: |
|
if (!IsIdentifierCharEx(sc.ch)) { |
|
if (sc.state == SCE_ZIG_IDENTIFIER) { |
|
char s[64]; |
|
sc.GetCurrent(s, sizeof(s)); |
|
if (kwType != KeywordType::None) { |
|
sc.ChangeState(static_cast<int>(kwType)); |
|
} else if (keywordsPrimary.InList(s)) { |
|
sc.ChangeState(SCE_ZIG_KW_PRIMARY); |
|
kwType = KeywordType::None; |
|
if (strcmp(s, "fn") == 0) { |
|
kwType = KeywordType::Function; |
|
} |
|
} else if (keywordsSecondary.InList(s)) { |
|
sc.ChangeState(SCE_ZIG_KW_SECONDARY); |
|
} else if (keywordsTertiary.InList(s)) { |
|
sc.ChangeState(SCE_ZIG_KW_TERTIARY); |
|
} else if (keywordsTypes.InList(s)) { |
|
sc.ChangeState(SCE_ZIG_KW_TYPE); |
|
} |
|
} |
|
if (sc.state != SCE_ZIG_KW_PRIMARY) { |
|
kwType = KeywordType::None; |
|
} |
|
sc.SetState(SCE_ZIG_DEFAULT); |
|
} |
|
break; |
|
|
|
case SCE_ZIG_CHARACTER: |
|
case SCE_ZIG_STRING: |
|
case SCE_ZIG_MULTISTRING: |
|
if (sc.atLineStart) { |
|
sc.SetState(SCE_ZIG_DEFAULT); |
|
} else if (sc.ch == '\\' && sc.state != SCE_ZIG_MULTISTRING) { |
|
escSeq.resetEscapeState(sc.state, sc.chNext); |
|
sc.SetState(SCE_ZIG_ESCAPECHAR); |
|
sc.Forward(); |
|
if (sc.Match('u', '{')) { |
|
escSeq.brace = true; |
|
escSeq.digitsLeft = 9; |
|
sc.Forward(); |
|
} |
|
} else if ((sc.ch == '\'' && sc.state == SCE_ZIG_CHARACTER) || (sc.ch == '\"' && sc.state == SCE_ZIG_STRING)) { |
|
sc.ForwardSetState(SCE_ZIG_DEFAULT); |
|
} else if (sc.state != SCE_ZIG_CHARACTER) { |
|
if (sc.ch == '{' || sc.ch == '}') { |
|
if (sc.ch == sc.chNext) { |
|
escSeq.resetEscapeState(sc.state); |
|
sc.SetState(SCE_ZIG_ESCAPECHAR); |
|
sc.Forward(); |
|
} |
|
} |
|
} |
|
break; |
|
|
|
case SCE_ZIG_ESCAPECHAR: |
|
if (escSeq.atEscapeEnd(sc.ch)) { |
|
if (escSeq.brace && sc.ch == '}') { |
|
sc.Forward(); |
|
} |
|
sc.SetState(escSeq.outerState); |
|
continue; |
|
} |
|
break; |
|
|
|
case SCE_ZIG_COMMENTLINE: |
|
case SCE_ZIG_COMMENTLINEDOC: |
|
case SCE_ZIG_COMMENTLINETOP: |
|
if (sc.atLineStart) { |
|
sc.SetState(SCE_ZIG_DEFAULT); |
|
} |
|
break; |
|
} |
|
|
|
if (sc.state == SCE_ZIG_DEFAULT) { |
|
if (sc.Match('/', '/')) { |
|
if (visibleChars == 0) { |
|
lineState = ZigLineStateMaskLineComment; |
|
} |
|
sc.SetState(SCE_ZIG_COMMENTLINE); |
|
sc.Forward(2); |
|
if (sc.ch == '!') { |
|
sc.ChangeState(SCE_ZIG_COMMENTLINETOP); |
|
} else if (sc.ch == '/' && sc.chNext != '/') { |
|
sc.ChangeState(SCE_ZIG_COMMENTLINEDOC); |
|
} |
|
} else if (sc.Match('\\', '\\')) { |
|
lineState = ZigLineStateMaskMultilineString; |
|
sc.SetState(SCE_ZIG_MULTISTRING); |
|
} else if (sc.ch == '\"') { |
|
sc.SetState(SCE_ZIG_STRING); |
|
} else if (sc.ch == '\'') { |
|
sc.SetState(SCE_ZIG_CHARACTER); |
|
} else if (IsNumberStart(sc.ch, sc.chNext)) { |
|
sc.SetState(SCE_ZIG_NUMBER); |
|
} else if ((sc.ch == '@' && IsIdentifierStartEx(sc.chNext)) || IsIdentifierStartEx(sc.ch)) { |
|
sc.SetState((sc.ch == '@') ? SCE_ZIG_BUILTIN_FUNCTION : SCE_ZIG_IDENTIFIER); |
|
} else if (IsAGraphic(sc.ch)) { |
|
sc.SetState(SCE_ZIG_OPERATOR); |
|
} |
|
} |
|
|
|
if (visibleChars == 0 && !isspacechar(sc.ch)) { |
|
visibleChars++; |
|
} |
|
if (sc.atLineEnd) { |
|
styler.SetLineState(sc.currentLine, lineState); |
|
lineState = 0; |
|
kwType = KeywordType::None; |
|
visibleChars = 0; |
|
} |
|
sc.Forward(); |
|
} |
|
|
|
sc.Complete(); |
|
} |
|
|
|
void LexerZig::Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess) { |
|
if (!options.fold) |
|
return; |
|
|
|
Accessor styler(pAccess, nullptr); |
|
const Sci_PositionU endPos = startPos + lengthDoc; |
|
Sci_Position lineCurrent = styler.GetLine(startPos); |
|
while (lineCurrent > 0) { |
|
lineCurrent--; |
|
startPos = styler.LineStart(lineCurrent); |
|
initStyle = (startPos > 0) ? styler.StyleIndexAt(startPos) : 0; |
|
if (!AnyOf(initStyle, SCE_ZIG_MULTISTRING, |
|
SCE_ZIG_COMMENTLINE, SCE_ZIG_COMMENTLINEDOC, SCE_ZIG_COMMENTLINETOP)) { |
|
break; |
|
} |
|
} |
|
FoldLineState foldPrev(0); |
|
int levelCurrent = SC_FOLDLEVELBASE; |
|
if (lineCurrent > 0) { |
|
levelCurrent = styler.LevelAt(lineCurrent - 1) >> 16; |
|
foldPrev = FoldLineState(styler.GetLineState(lineCurrent - 1)); |
|
} |
|
|
|
int levelNext = levelCurrent; |
|
FoldLineState foldCurrent(styler.GetLineState(lineCurrent)); |
|
Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1); |
|
lineStartNext = std::min(lineStartNext, endPos); |
|
|
|
while (startPos < endPos) { |
|
initStyle = styler.StyleIndexAt(startPos); |
|
|
|
if (initStyle == SCE_ZIG_OPERATOR) { |
|
const char ch = styler[startPos]; |
|
if (ch == '{' || ch == '[' || ch == '(') { |
|
levelNext++; |
|
} else if (ch == '}' || ch == ']' || ch == ')') { |
|
levelNext--; |
|
} |
|
} |
|
|
|
++startPos; |
|
if (startPos == lineStartNext) { |
|
const FoldLineState foldNext(styler.GetLineState(lineCurrent + 1)); |
|
levelNext = std::max(levelNext, SC_FOLDLEVELBASE); |
|
if (foldCurrent.lineComment) { |
|
levelNext += foldNext.lineComment - foldPrev.lineComment; |
|
} else if (foldCurrent.multilineString) { |
|
levelNext += foldNext.multilineString - foldPrev.multilineString; |
|
} |
|
|
|
const int levelUse = levelCurrent; |
|
int lev = levelUse | (levelNext << 16); |
|
if (levelUse < levelNext) { |
|
lev |= SC_FOLDLEVELHEADERFLAG; |
|
} |
|
styler.SetLevel(lineCurrent, lev); |
|
|
|
lineCurrent++; |
|
lineStartNext = styler.LineStart(lineCurrent + 1); |
|
lineStartNext = std::min(lineStartNext, endPos); |
|
levelCurrent = levelNext; |
|
foldPrev = foldCurrent; |
|
foldCurrent = foldNext; |
|
} |
|
} |
|
} |
|
|
|
} // unnamed namespace end |
|
|
|
extern const LexerModule lmZig(SCLEX_ZIG, LexerZig::LexerFactoryZig, "zig", zigWordListDesc);
|
|
|