410 lines
12 KiB
C++
410 lines
12 KiB
C++
|
// Scintilla source code edit control
|
||
|
/** @file LexCIL.cxx
|
||
|
** Lexer for Common Intermediate Language
|
||
|
** Written by Jad Altahan (github.com/xv)
|
||
|
** CIL manual: https://www.ecma-international.org/publications/standards/Ecma-335.htm
|
||
|
**/
|
||
|
// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
|
||
|
// The License.txt file describes the conditions under which this software may be distributed.
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdarg.h>
|
||
|
#include <assert.h>
|
||
|
#include <ctype.h>
|
||
|
|
||
|
#include <string>
|
||
|
#include <string_view>
|
||
|
#include <map>
|
||
|
#include <algorithm>
|
||
|
#include <functional>
|
||
|
|
||
|
#include "ILexer.h"
|
||
|
#include "Scintilla.h"
|
||
|
#include "SciLexer.h"
|
||
|
|
||
|
#include "StringCopy.h"
|
||
|
#include "WordList.h"
|
||
|
#include "LexAccessor.h"
|
||
|
#include "Accessor.h"
|
||
|
#include "StyleContext.h"
|
||
|
#include "CharacterSet.h"
|
||
|
#include "LexerModule.h"
|
||
|
#include "OptionSet.h"
|
||
|
#include "DefaultLexer.h"
|
||
|
|
||
|
using namespace Scintilla;
|
||
|
using namespace Lexilla;
|
||
|
|
||
|
namespace {
|
||
|
// Use an unnamed namespace to protect the functions and classes from name conflicts
|
||
|
|
||
|
bool IsAWordChar(const int ch) {
|
||
|
return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.');
|
||
|
}
|
||
|
|
||
|
bool IsOperator(const int ch) {
|
||
|
if ((ch < 0x80) && (isalnum(ch)))
|
||
|
return false;
|
||
|
|
||
|
if (strchr("!%&*+-/<=>@^|~()[]{}", ch)) {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
constexpr bool IsStreamCommentStyle(const int style) noexcept {
|
||
|
return style == SCE_CIL_COMMENT;
|
||
|
}
|
||
|
|
||
|
struct OptionsCIL {
|
||
|
bool fold;
|
||
|
bool foldComment;
|
||
|
bool foldCommentMultiline;
|
||
|
bool foldCompact;
|
||
|
|
||
|
OptionsCIL() {
|
||
|
fold = true;
|
||
|
foldComment = false;
|
||
|
foldCommentMultiline = true;
|
||
|
foldCompact = true;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
static const char *const cilWordListDesc[] = {
|
||
|
"Primary CIL keywords",
|
||
|
"Metadata",
|
||
|
"Opcode instructions",
|
||
|
0
|
||
|
};
|
||
|
|
||
|
struct OptionSetCIL : public OptionSet<OptionsCIL> {
|
||
|
OptionSetCIL() {
|
||
|
DefineProperty("fold", &OptionsCIL::fold);
|
||
|
DefineProperty("fold.comment", &OptionsCIL::foldComment);
|
||
|
|
||
|
DefineProperty("fold.cil.comment.multiline", &OptionsCIL::foldCommentMultiline,
|
||
|
"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
|
||
|
|
||
|
DefineProperty("fold.compact", &OptionsCIL::foldCompact);
|
||
|
|
||
|
DefineWordListSets(cilWordListDesc);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
LexicalClass lexicalClasses[] = {
|
||
|
// Lexer CIL SCLEX_CIL SCE_CIL_:
|
||
|
0, "SCE_CIL_DEFAULT", "default", "White space",
|
||
|
1, "SCE_CIL_COMMENT", "comment", "Multi-line comment",
|
||
|
2, "SCE_CIL_COMMENTLINE", "comment line", "Line comment",
|
||
|
3, "SCE_CIL_WORD", "keyword", "Keyword 1",
|
||
|
4, "SCE_CIL_WORD2", "keyword", "Keyword 2",
|
||
|
5, "SCE_CIL_WORD3", "keyword", "Keyword 3",
|
||
|
6, "SCE_CIL_STRING", "literal string", "Double quoted string",
|
||
|
7, "SCE_CIL_LABEL", "label", "Code label",
|
||
|
8, "SCE_CIL_OPERATOR", "operator", "Operators",
|
||
|
9, "SCE_CIL_STRINGEOL", "error literal string", "String is not closed",
|
||
|
10, "SCE_CIL_IDENTIFIER", "identifier", "Identifiers",
|
||
|
};
|
||
|
|
||
|
}
|
||
|
|
||
|
class LexerCIL : public DefaultLexer {
|
||
|
WordList keywords, keywords2, keywords3;
|
||
|
OptionsCIL options;
|
||
|
OptionSetCIL osCIL;
|
||
|
|
||
|
public:
|
||
|
LexerCIL() : DefaultLexer("cil", SCLEX_CIL, lexicalClasses, ELEMENTS(lexicalClasses)) { }
|
||
|
|
||
|
virtual ~LexerCIL() { }
|
||
|
|
||
|
void SCI_METHOD Release() override {
|
||
|
delete this;
|
||
|
}
|
||
|
|
||
|
int SCI_METHOD Version() const override {
|
||
|
return lvRelease5;
|
||
|
}
|
||
|
|
||
|
const char * SCI_METHOD PropertyNames() override {
|
||
|
return osCIL.PropertyNames();
|
||
|
}
|
||
|
|
||
|
int SCI_METHOD PropertyType(const char *name) override {
|
||
|
return osCIL.PropertyType(name);
|
||
|
}
|
||
|
|
||
|
const char * SCI_METHOD DescribeProperty(const char *name) override {
|
||
|
return osCIL.DescribeProperty(name);
|
||
|
}
|
||
|
|
||
|
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
|
||
|
|
||
|
const char * SCI_METHOD PropertyGet(const char* key) override {
|
||
|
return osCIL.PropertyGet(key);
|
||
|
}
|
||
|
|
||
|
const char * SCI_METHOD DescribeWordListSets() override {
|
||
|
return osCIL.DescribeWordListSets();
|
||
|
}
|
||
|
|
||
|
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
|
||
|
|
||
|
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
|
||
|
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
|
||
|
|
||
|
void * SCI_METHOD PrivateCall(int, void *) override {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int SCI_METHOD LineEndTypesSupported() override {
|
||
|
return SC_LINE_END_TYPE_UNICODE;
|
||
|
}
|
||
|
|
||
|
int SCI_METHOD PrimaryStyleFromStyle(int style) override {
|
||
|
return style;
|
||
|
}
|
||
|
|
||
|
static ILexer5 *LexerFactoryCIL() {
|
||
|
return new LexerCIL();
|
||
|
}
|
||
|
};
|
||
|
|
||
|
Sci_Position SCI_METHOD LexerCIL::PropertySet(const char *key, const char *val) {
|
||
|
if (osCIL.PropertySet(&options, key, val)) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
Sci_Position SCI_METHOD LexerCIL::WordListSet(int n, const char *wl) {
|
||
|
WordList *wordListN = 0;
|
||
|
|
||
|
switch (n) {
|
||
|
case 0:
|
||
|
wordListN = &keywords;
|
||
|
break;
|
||
|
case 1:
|
||
|
wordListN = &keywords2;
|
||
|
break;
|
||
|
case 2:
|
||
|
wordListN = &keywords3;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
Sci_Position firstModification = -1;
|
||
|
|
||
|
if (wordListN) {
|
||
|
WordList wlNew;
|
||
|
wlNew.Set(wl);
|
||
|
|
||
|
if (*wordListN != wlNew) {
|
||
|
wordListN->Set(wl);
|
||
|
firstModification = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return firstModification;
|
||
|
}
|
||
|
|
||
|
void SCI_METHOD LexerCIL::Lex(Sci_PositionU startPos, Sci_Position length,
|
||
|
int initStyle, IDocument *pAccess) {
|
||
|
if (initStyle == SCE_CIL_STRINGEOL) {
|
||
|
initStyle = SCE_CIL_DEFAULT;
|
||
|
}
|
||
|
|
||
|
Accessor styler(pAccess, NULL);
|
||
|
StyleContext sc(startPos, length, initStyle, styler);
|
||
|
|
||
|
bool identAtLineStart = false, // Checks if an identifier is at line start (ignoring spaces)
|
||
|
canStyleLabels = false; // Checks if conditions are met to style SCE_CIL_LABEL
|
||
|
|
||
|
for (; sc.More(); sc.Forward()) {
|
||
|
if (sc.atLineStart) {
|
||
|
if (sc.state == SCE_CIL_STRING) {
|
||
|
sc.SetState(SCE_CIL_STRING);
|
||
|
}
|
||
|
|
||
|
identAtLineStart = true;
|
||
|
}
|
||
|
|
||
|
// Handle string line continuation
|
||
|
if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r') &&
|
||
|
(sc.state == SCE_CIL_STRING)) {
|
||
|
sc.Forward();
|
||
|
|
||
|
if (sc.ch == '\r' && sc.chNext == '\n') {
|
||
|
sc.Forward();
|
||
|
}
|
||
|
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
switch (sc.state) {
|
||
|
case SCE_CIL_OPERATOR:
|
||
|
sc.SetState(SCE_CIL_DEFAULT);
|
||
|
break;
|
||
|
case SCE_CIL_IDENTIFIER:
|
||
|
if (!IsAWordChar(sc.ch)) {
|
||
|
if (canStyleLabels && (sc.ch == ':' && sc.chNext != ':')) {
|
||
|
sc.ChangeState(SCE_CIL_LABEL);
|
||
|
sc.ForwardSetState(SCE_CIL_DEFAULT);
|
||
|
} else {
|
||
|
char kwSize[100];
|
||
|
sc.GetCurrent(kwSize, sizeof(kwSize));
|
||
|
int style = SCE_CIL_IDENTIFIER;
|
||
|
|
||
|
if (keywords.InList(kwSize)) {
|
||
|
style = SCE_CIL_WORD;
|
||
|
} else if (keywords2.InList(kwSize)) {
|
||
|
style = SCE_CIL_WORD2;
|
||
|
} else if (keywords3.InList(kwSize)) {
|
||
|
style = SCE_CIL_WORD3;
|
||
|
}
|
||
|
|
||
|
sc.ChangeState(style);
|
||
|
sc.SetState(SCE_CIL_DEFAULT);
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
case SCE_CIL_COMMENT:
|
||
|
if (sc.Match('*', '/')) {
|
||
|
sc.Forward();
|
||
|
sc.ForwardSetState(SCE_CIL_DEFAULT);
|
||
|
}
|
||
|
break;
|
||
|
case SCE_CIL_COMMENTLINE:
|
||
|
if (sc.atLineStart) {
|
||
|
sc.SetState(SCE_CIL_DEFAULT);
|
||
|
}
|
||
|
break;
|
||
|
case SCE_CIL_STRING:
|
||
|
if (sc.ch == '\\') {
|
||
|
if (sc.chNext == '"' || sc.chNext == '\\') {
|
||
|
sc.Forward();
|
||
|
}
|
||
|
} else if (sc.ch == '"') {
|
||
|
sc.ForwardSetState(SCE_CIL_DEFAULT);
|
||
|
} else if (sc.atLineEnd) {
|
||
|
sc.ChangeState(SCE_CIL_STRINGEOL);
|
||
|
sc.ForwardSetState(SCE_CIL_DEFAULT);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (sc.state == SCE_CIL_DEFAULT) {
|
||
|
// String
|
||
|
if (sc.ch == '"') {
|
||
|
sc.SetState(SCE_CIL_STRING);
|
||
|
}
|
||
|
// Keyword
|
||
|
else if (IsAWordChar(sc.ch)) {
|
||
|
// Allow setting SCE_CIL_LABEL style only if the label is the
|
||
|
// first token in the line and does not start with a dot or a digit
|
||
|
canStyleLabels = identAtLineStart && !(sc.ch == '.' || IsADigit(sc.ch));
|
||
|
sc.SetState(SCE_CIL_IDENTIFIER);
|
||
|
}
|
||
|
// Multi-line comment
|
||
|
else if (sc.Match('/', '*')) {
|
||
|
sc.SetState(SCE_CIL_COMMENT);
|
||
|
sc.Forward();
|
||
|
}
|
||
|
// Line comment
|
||
|
else if (sc.Match('/', '/')) {
|
||
|
sc.SetState(SCE_CIL_COMMENTLINE);
|
||
|
}
|
||
|
// Operators
|
||
|
else if (IsOperator(sc.ch)) {
|
||
|
sc.SetState(SCE_CIL_OPERATOR);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!IsASpace(sc.ch)) {
|
||
|
identAtLineStart = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sc.Complete();
|
||
|
}
|
||
|
|
||
|
void SCI_METHOD LexerCIL::Fold(Sci_PositionU startPos, Sci_Position length,
|
||
|
int initStyle, IDocument *pAccess) {
|
||
|
if (!options.fold) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
LexAccessor styler(pAccess);
|
||
|
|
||
|
const Sci_PositionU endPos = startPos + length;
|
||
|
Sci_Position lineCurrent = styler.GetLine(startPos);
|
||
|
|
||
|
int levelCurrent = SC_FOLDLEVELBASE;
|
||
|
if (lineCurrent > 0)
|
||
|
levelCurrent = styler.LevelAt(lineCurrent - 1) >> 16;
|
||
|
|
||
|
int style = initStyle;
|
||
|
int styleNext = styler.StyleAt(startPos);
|
||
|
int levelNext = levelCurrent;
|
||
|
int visibleChars = 0;
|
||
|
|
||
|
char chNext = styler[startPos];
|
||
|
|
||
|
for (Sci_PositionU i = startPos; i < endPos; i++) {
|
||
|
const char ch = chNext;
|
||
|
int stylePrev = style;
|
||
|
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
style = styleNext;
|
||
|
styleNext = styler.StyleAt(i + 1);
|
||
|
|
||
|
const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
|
||
|
|
||
|
if (options.foldComment &&
|
||
|
options.foldCommentMultiline && IsStreamCommentStyle(style)) {
|
||
|
if (!IsStreamCommentStyle(stylePrev)) {
|
||
|
levelNext++;
|
||
|
} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
|
||
|
levelNext--;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (style == SCE_CIL_OPERATOR) {
|
||
|
if (ch == '{') {
|
||
|
levelNext++;
|
||
|
} else if (ch == '}') {
|
||
|
levelNext--;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!IsASpace(ch)) {
|
||
|
visibleChars++;
|
||
|
}
|
||
|
|
||
|
if (atEOL || (i == endPos - 1)) {
|
||
|
int lev = levelCurrent | levelNext << 16;
|
||
|
if (visibleChars == 0 && options.foldCompact)
|
||
|
lev |= SC_FOLDLEVELWHITEFLAG;
|
||
|
if (levelCurrent < levelNext)
|
||
|
lev |= SC_FOLDLEVELHEADERFLAG;
|
||
|
if (lev != styler.LevelAt(lineCurrent)) {
|
||
|
styler.SetLevel(lineCurrent, lev);
|
||
|
}
|
||
|
|
||
|
lineCurrent++;
|
||
|
levelCurrent = levelNext;
|
||
|
|
||
|
if (options.foldCompact &&
|
||
|
i == static_cast<Sci_PositionU>(styler.Length() - 1)) {
|
||
|
styler.SetLevel(lineCurrent, lev | SC_FOLDLEVELWHITEFLAG);
|
||
|
}
|
||
|
|
||
|
visibleChars = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
LexerModule lmCIL(SCLEX_CIL, LexerCIL::LexerFactoryCIL, "cil", cilWordListDesc);
|