notepad-plus-plus/lexilla/lexers/LexAsm.cxx

481 lines
14 KiB
C++

// Scintilla source code edit control
/** @file LexAsm.cxx
** Lexer for Assembler, just for the MASM syntax
** Written by The Black Horus
** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
**/
// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>
#include <string>
#include <string_view>
#include <map>
#include <set>
#include <functional>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "DefaultLexer.h"
using namespace Scintilla;
using namespace Lexilla;
static inline bool IsAWordChar(const int ch) {
return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
ch == '_' || ch == '?');
}
static inline bool IsAWordStart(const int ch) {
return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' ||
ch == '%' || ch == '@' || ch == '$' || ch == '?');
}
static inline bool IsAsmOperator(const int ch) {
if ((ch < 0x80) && (isalnum(ch)))
return false;
// '.' left out as it is used to make up numbers
if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
ch == '(' || ch == ')' || ch == '=' || ch == '^' ||
ch == '[' || ch == ']' || ch == '<' || ch == '&' ||
ch == '>' || ch == ',' || ch == '|' || ch == '~' ||
ch == '%' || ch == ':')
return true;
return false;
}
static bool IsStreamCommentStyle(int style) {
return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK;
}
static inline int LowerCase(int c) {
if (c >= 'A' && c <= 'Z')
return 'a' + c - 'A';
return c;
}
// An individual named option for use in an OptionSet
// Options used for LexerAsm
struct OptionsAsm {
std::string delimiter;
bool fold;
bool foldSyntaxBased;
bool foldCommentMultiline;
bool foldCommentExplicit;
std::string foldExplicitStart;
std::string foldExplicitEnd;
bool foldExplicitAnywhere;
bool foldCompact;
std::string commentChar;
OptionsAsm() {
delimiter = "";
fold = false;
foldSyntaxBased = true;
foldCommentMultiline = false;
foldCommentExplicit = false;
foldExplicitStart = "";
foldExplicitEnd = "";
foldExplicitAnywhere = false;
foldCompact = true;
commentChar = "";
}
};
static const char * const asmWordListDesc[] = {
"CPU instructions",
"FPU instructions",
"Registers",
"Directives",
"Directive operands",
"Extended instructions",
"Directives4Foldstart",
"Directives4Foldend",
0
};
struct OptionSetAsm : public OptionSet<OptionsAsm> {
OptionSetAsm() {
DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter,
"Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
DefineProperty("fold", &OptionsAsm::fold);
DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased,
"Set this property to 0 to disable syntax based folding.");
DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline,
"Set this property to 1 to enable folding multi-line comments.");
DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit,
"This option enables folding explicit fold points when using the Asm lexer. "
"Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
"at the end of a section that should fold.");
DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart,
"The string to use for explicit fold start points, replacing the standard ;{.");
DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd,
"The string to use for explicit fold end points, replacing the standard ;}.");
DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere,
"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
DefineProperty("fold.compact", &OptionsAsm::foldCompact);
DefineProperty("lexer.as.comment.character", &OptionsAsm::commentChar,
"Overrides the default comment character (which is ';' for asm and '#' for as).");
DefineWordListSets(asmWordListDesc);
}
};
class LexerAsm : public DefaultLexer {
WordList cpuInstruction;
WordList mathInstruction;
WordList registers;
WordList directive;
WordList directiveOperand;
WordList extInstruction;
WordList directives4foldstart;
WordList directives4foldend;
OptionsAsm options;
OptionSetAsm osAsm;
int commentChar;
public:
LexerAsm(const char *languageName_, int language_, int commentChar_) : DefaultLexer(languageName_, language_) {
commentChar = commentChar_;
}
virtual ~LexerAsm() {
}
void SCI_METHOD Release() override {
delete this;
}
int SCI_METHOD Version() const override {
return lvRelease5;
}
const char * SCI_METHOD PropertyNames() override {
return osAsm.PropertyNames();
}
int SCI_METHOD PropertyType(const char *name) override {
return osAsm.PropertyType(name);
}
const char * SCI_METHOD DescribeProperty(const char *name) override {
return osAsm.DescribeProperty(name);
}
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
const char * SCI_METHOD PropertyGet(const char *key) override {
return osAsm.PropertyGet(key);
}
const char * SCI_METHOD DescribeWordListSets() override {
return osAsm.DescribeWordListSets();
}
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void * SCI_METHOD PrivateCall(int, void *) override {
return 0;
}
static ILexer5 *LexerFactoryAsm() {
return new LexerAsm("asm", SCLEX_ASM, ';');
}
static ILexer5 *LexerFactoryAs() {
return new LexerAsm("as", SCLEX_AS, '#');
}
};
Sci_Position SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) {
if (osAsm.PropertySet(&options, key, val)) {
return 0;
}
return -1;
}
Sci_Position SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) {
WordList *wordListN = 0;
switch (n) {
case 0:
wordListN = &cpuInstruction;
break;
case 1:
wordListN = &mathInstruction;
break;
case 2:
wordListN = &registers;
break;
case 3:
wordListN = &directive;
break;
case 4:
wordListN = &directiveOperand;
break;
case 5:
wordListN = &extInstruction;
break;
case 6:
wordListN = &directives4foldstart;
break;
case 7:
wordListN = &directives4foldend;
break;
}
Sci_Position firstModification = -1;
if (wordListN) {
WordList wlNew;
wlNew.Set(wl);
if (*wordListN != wlNew) {
wordListN->Set(wl);
firstModification = 0;
}
}
return firstModification;
}
void SCI_METHOD LexerAsm::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
LexAccessor styler(pAccess);
const char commentCharacter = options.commentChar.empty() ?
commentChar : options.commentChar.front();
// Do not leak onto next line
if (initStyle == SCE_ASM_STRINGEOL)
initStyle = SCE_ASM_DEFAULT;
StyleContext sc(startPos, length, initStyle, styler);
for (; sc.More(); sc.Forward())
{
// Prevent SCE_ASM_STRINGEOL from leaking back to previous line
if (sc.atLineStart && (sc.state == SCE_ASM_STRING)) {
sc.SetState(SCE_ASM_STRING);
} else if (sc.atLineStart && (sc.state == SCE_ASM_CHARACTER)) {
sc.SetState(SCE_ASM_CHARACTER);
}
// Handle line continuation generically.
if (sc.ch == '\\') {
if (sc.chNext == '\n' || sc.chNext == '\r') {
sc.Forward();
if (sc.ch == '\r' && sc.chNext == '\n') {
sc.Forward();
}
continue;
}
}
// Determine if the current state should terminate.
if (sc.state == SCE_ASM_OPERATOR) {
if (!IsAsmOperator(sc.ch)) {
sc.SetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_NUMBER) {
if (!IsAWordChar(sc.ch)) {
sc.SetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_IDENTIFIER) {
if (!IsAWordChar(sc.ch) ) {
char s[100];
sc.GetCurrentLowered(s, sizeof(s));
bool IsDirective = false;
if (cpuInstruction.InList(s)) {
sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
} else if (mathInstruction.InList(s)) {
sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
} else if (registers.InList(s)) {
sc.ChangeState(SCE_ASM_REGISTER);
} else if (directive.InList(s)) {
sc.ChangeState(SCE_ASM_DIRECTIVE);
IsDirective = true;
} else if (directiveOperand.InList(s)) {
sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
} else if (extInstruction.InList(s)) {
sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
}
sc.SetState(SCE_ASM_DEFAULT);
if (IsDirective && !strcmp(s, "comment")) {
char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) {
sc.ForwardSetState(SCE_ASM_DEFAULT);
}
if (sc.ch == delimiter) {
sc.SetState(SCE_ASM_COMMENTDIRECTIVE);
}
}
}
} else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) {
char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
if (sc.ch == delimiter) {
while (!sc.atLineEnd) {
sc.Forward();
}
sc.SetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_COMMENT ) {
if (sc.atLineEnd) {
sc.SetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_STRING) {
if (sc.ch == '\\') {
if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
sc.Forward();
}
} else if (sc.ch == '\"') {
sc.ForwardSetState(SCE_ASM_DEFAULT);
} else if (sc.atLineEnd) {
sc.ChangeState(SCE_ASM_STRINGEOL);
sc.ForwardSetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_CHARACTER) {
if (sc.ch == '\\') {
if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
sc.Forward();
}
} else if (sc.ch == '\'') {
sc.ForwardSetState(SCE_ASM_DEFAULT);
} else if (sc.atLineEnd) {
sc.ChangeState(SCE_ASM_STRINGEOL);
sc.ForwardSetState(SCE_ASM_DEFAULT);
}
}
// Determine if a new state should be entered.
if (sc.state == SCE_ASM_DEFAULT) {
if (sc.ch == commentCharacter) {
sc.SetState(SCE_ASM_COMMENT);
} else if (IsASCII(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && IsASCII(sc.chNext) && isdigit(sc.chNext)))) {
sc.SetState(SCE_ASM_NUMBER);
} else if (IsAWordStart(sc.ch)) {
sc.SetState(SCE_ASM_IDENTIFIER);
} else if (sc.ch == '\"') {
sc.SetState(SCE_ASM_STRING);
} else if (sc.ch == '\'') {
sc.SetState(SCE_ASM_CHARACTER);
} else if (IsAsmOperator(sc.ch)) {
sc.SetState(SCE_ASM_OPERATOR);
}
}
}
sc.Complete();
}
// Store both the current line's fold level and the next lines in the
// level store to make it easy to pick up with each increment
// and to make it possible to fiddle the current level for "else".
void SCI_METHOD LexerAsm::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
if (!options.fold)
return;
LexAccessor styler(pAccess);
Sci_PositionU endPos = startPos + length;
int visibleChars = 0;
Sci_Position lineCurrent = styler.GetLine(startPos);
int levelCurrent = SC_FOLDLEVELBASE;
if (lineCurrent > 0)
levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
int levelNext = levelCurrent;
char chNext = styler[startPos];
int styleNext = styler.StyleAt(startPos);
int style = initStyle;
char word[100];
int wordlen = 0;
const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
for (Sci_PositionU i = startPos; i < endPos; i++) {
char ch = chNext;
chNext = styler.SafeGetCharAt(i + 1);
int stylePrev = style;
style = styleNext;
styleNext = styler.StyleAt(i + 1);
bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
if (options.foldCommentMultiline && IsStreamCommentStyle(style)) {
if (!IsStreamCommentStyle(stylePrev)) {
levelNext++;
} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
// Comments don't end at end of line and the next character may be unstyled.
levelNext--;
}
}
if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) {
if (userDefinedFoldMarkers) {
if (styler.Match(i, options.foldExplicitStart.c_str())) {
levelNext++;
} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
levelNext--;
}
} else {
if (ch == ';') {
if (chNext == '{') {
levelNext++;
} else if (chNext == '}') {
levelNext--;
}
}
}
}
if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) {
word[wordlen++] = static_cast<char>(LowerCase(ch));
if (wordlen == 100) { // prevent overflow
word[0] = '\0';
wordlen = 1;
}
if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready
word[wordlen] = '\0';
wordlen = 0;
if (directives4foldstart.InList(word)) {
levelNext++;
} else if (directives4foldend.InList(word)){
levelNext--;
}
}
}
if (!IsASpace(ch))
visibleChars++;
if (atEOL || (i == endPos-1)) {
int levelUse = levelCurrent;
int lev = levelUse | levelNext << 16;
if (visibleChars == 0 && options.foldCompact)
lev |= SC_FOLDLEVELWHITEFLAG;
if (levelUse < levelNext)
lev |= SC_FOLDLEVELHEADERFLAG;
if (lev != styler.LevelAt(lineCurrent)) {
styler.SetLevel(lineCurrent, lev);
}
lineCurrent++;
levelCurrent = levelNext;
if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - 1))) {
// There is an empty line at end of file so give it same level and empty
styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
}
visibleChars = 0;
}
}
}
LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc);
LexerModule lmAs(SCLEX_AS, LexerAsm::LexerFactoryAs, "as", asmWordListDesc);