You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
notepad-plus-plus/lexilla/lexers/LexAsm.cxx

480 lines
14 KiB

// Scintilla source code edit control
/** @file LexAsm.cxx
** Lexer for Assembler, just for the MASM syntax
** Written by The Black Horus
** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
**/
// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
#include <cstdlib>
#include <cassert>
#include <cstring>
#include <cctype>
#include <cstdio>
#include <cstdarg>
#include <string>
#include <string_view>
#include <map>
#include <set>
#include <functional>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "DefaultLexer.h"
using namespace Scintilla;
using namespace Lexilla;
namespace {
bool IsAWordChar(const int ch) noexcept {
return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
ch == '_' || ch == '?');
}
bool IsAWordStart(const int ch) noexcept {
return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' ||
ch == '%' || ch == '@' || ch == '$' || ch == '?');
}
bool IsAsmOperator(const int ch) noexcept {
if ((ch < 0x80) && (isalnum(ch)))
return false;
// '.' left out as it is used to make up numbers
if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
ch == '(' || ch == ')' || ch == '=' || ch == '^' ||
ch == '[' || ch == ']' || ch == '<' || ch == '&' ||
ch == '>' || ch == ',' || ch == '|' || ch == '~' ||
ch == '%' || ch == ':')
return true;
return false;
}
constexpr bool IsStreamCommentStyle(int style) noexcept {
return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK;
}
// An individual named option for use in an OptionSet
// Options used for LexerAsm
struct OptionsAsm {
std::string delimiter;
bool fold;
bool foldSyntaxBased;
bool foldCommentMultiline;
bool foldCommentExplicit;
std::string foldExplicitStart;
std::string foldExplicitEnd;
bool foldExplicitAnywhere;
bool foldCompact;
std::string commentChar;
OptionsAsm() {
delimiter = "";
fold = false;
foldSyntaxBased = true;
foldCommentMultiline = false;
foldCommentExplicit = false;
foldExplicitStart = "";
foldExplicitEnd = "";
foldExplicitAnywhere = false;
foldCompact = true;
commentChar = "";
}
};
const char *const asmWordListDesc[] = {
"CPU instructions",
"FPU instructions",
"Registers",
"Directives",
"Directive operands",
"Extended instructions",
"Directives4Foldstart",
"Directives4Foldend",
nullptr
};
struct OptionSetAsm : public OptionSet<OptionsAsm> {
OptionSetAsm() {
DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter,
"Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
DefineProperty("fold", &OptionsAsm::fold);
DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased,
"Set this property to 0 to disable syntax based folding.");
DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline,
"Set this property to 1 to enable folding multi-line comments.");
DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit,
"This option enables folding explicit fold points when using the Asm lexer. "
"Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
"at the end of a section that should fold.");
DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart,
"The string to use for explicit fold start points, replacing the standard ;{.");
DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd,
"The string to use for explicit fold end points, replacing the standard ;}.");
DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere,
"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
DefineProperty("fold.compact", &OptionsAsm::foldCompact);
DefineProperty("lexer.as.comment.character", &OptionsAsm::commentChar,
"Overrides the default comment character (which is ';' for asm and '#' for as).");
DefineWordListSets(asmWordListDesc);
}
};
class LexerAsm : public DefaultLexer {
WordList cpuInstruction;
WordList mathInstruction;
WordList registers;
WordList directive;
WordList directiveOperand;
WordList extInstruction;
WordList directives4foldstart;
WordList directives4foldend;
OptionsAsm options;
OptionSetAsm osAsm;
int commentChar;
public:
LexerAsm(const char *languageName_, int language_, int commentChar_) : DefaultLexer(languageName_, language_) {
commentChar = commentChar_;
}
virtual ~LexerAsm() {
}
void SCI_METHOD Release() override {
delete this;
}
int SCI_METHOD Version() const override {
return lvRelease5;
}
const char * SCI_METHOD PropertyNames() override {
return osAsm.PropertyNames();
}
int SCI_METHOD PropertyType(const char *name) override {
return osAsm.PropertyType(name);
}
const char * SCI_METHOD DescribeProperty(const char *name) override {
return osAsm.DescribeProperty(name);
}
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
const char * SCI_METHOD PropertyGet(const char *key) override {
return osAsm.PropertyGet(key);
}
const char * SCI_METHOD DescribeWordListSets() override {
return osAsm.DescribeWordListSets();
}
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void * SCI_METHOD PrivateCall(int, void *) override {
return nullptr;
}
static ILexer5 *LexerFactoryAsm() {
return new LexerAsm("asm", SCLEX_ASM, ';');
}
static ILexer5 *LexerFactoryAs() {
return new LexerAsm("as", SCLEX_AS, '#');
}
};
Sci_Position SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) {
if (osAsm.PropertySet(&options, key, val)) {
return 0;
}
return -1;
}
Sci_Position SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) {
WordList *wordListN = nullptr;
switch (n) {
case 0:
wordListN = &cpuInstruction;
break;
case 1:
wordListN = &mathInstruction;
break;
case 2:
wordListN = &registers;
break;
case 3:
wordListN = &directive;
break;
case 4:
wordListN = &directiveOperand;
break;
case 5:
wordListN = &extInstruction;
break;
case 6:
wordListN = &directives4foldstart;
break;
case 7:
wordListN = &directives4foldend;
break;
}
Sci_Position firstModification = -1;
if (wordListN) {
if (wordListN->Set(wl, true)) {
firstModification = 0;
}
}
return firstModification;
}
void SCI_METHOD LexerAsm::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
LexAccessor styler(pAccess);
const char commentCharacter = options.commentChar.empty() ?
commentChar : options.commentChar.front();
// Do not leak onto next line
if (initStyle == SCE_ASM_STRINGEOL)
initStyle = SCE_ASM_DEFAULT;
StyleContext sc(startPos, length, initStyle, styler);
for (; sc.More(); sc.Forward())
{
Updated to Scintilla 5.4.2 & Lexilla 5.3.1 https://www.scintilla.org/scintilla542.zip Release 5.4.2 Released 5 March 2024. Significantly reduce memory used for undo actions, often to a half or quarter of previous versions. Feature #1458. Add APIs for saving and restoring undo history. For GTK, when laying out text, detect runs with both left-to-right and right-to-left ranges and divide into an ASCII prefix and more complex suffix. Lay out the ASCII prefix in the standard manner but, for the suffix, measure the whole width and spread that over the suffix bytes. This produces more usable results where the caret moves over the ASCII prefix correctly and over the suffix reasonably but not accurately. For ScintillaEdit on Qt, fix reference from ScintillaDocument to Document to match change in 5.4.1 using IDocumentEditable for SCI_GETDOCPOINTER and SCI_SETDOCPOINTER. For Direct2D on Win32, use the multi-threaded option to avoid crashes when Scintilla instances created on different threads. There may be more problems with this scenario so it should be avoided. Bug #2420. For Win32, ensure keyboard-initiated context menu appears in multi-screen situations. https://www.scintilla.org/lexilla531.zip Release 5.3.1 Released 5 March 2024. Assembler: After comments, treat \r\n line ends the same as \n. This makes testing easier. Bash: Fix folding when line changed to/from comment and previous line is comment. Issue #224. Batch: Fix handling ':' next to keywords. Issue #222. JavaScript: in cpp lexer, add lexer.cpp.backquoted.strings=2 mode to treat ` back-quoted strings as template literals which allow embedded ${expressions}. Issue #94. Python: fix lexing of rb'' and rf'' strings. Issue #223, Pull request #227. Ruby: fix lexing of methods on numeric literals like '3.times' so the '.' and method name do not appear in numeric style. Issue #225.
9 months ago
if (sc.atLineStart) {
switch (sc.state) {
case SCE_ASM_STRING:
case SCE_ASM_CHARACTER:
// Prevent SCE_ASM_STRINGEOL from leaking back to previous line
sc.SetState(sc.state);
break;
case SCE_ASM_COMMENT:
sc.SetState(SCE_ASM_DEFAULT);
break;
default:
break;
}
}
// Handle line continuation generically.
if (sc.ch == '\\') {
if (sc.chNext == '\n' || sc.chNext == '\r') {
sc.Forward();
if (sc.ch == '\r' && sc.chNext == '\n') {
sc.Forward();
}
continue;
}
}
// Determine if the current state should terminate.
if (sc.state == SCE_ASM_OPERATOR) {
if (!IsAsmOperator(sc.ch)) {
sc.SetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_NUMBER) {
if (!IsAWordChar(sc.ch)) {
sc.SetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_IDENTIFIER) {
if (!IsAWordChar(sc.ch) ) {
char s[100];
sc.GetCurrentLowered(s, sizeof(s));
bool IsDirective = false;
if (cpuInstruction.InList(s)) {
sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
} else if (mathInstruction.InList(s)) {
sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
} else if (registers.InList(s)) {
sc.ChangeState(SCE_ASM_REGISTER);
} else if (directive.InList(s)) {
sc.ChangeState(SCE_ASM_DIRECTIVE);
IsDirective = true;
} else if (directiveOperand.InList(s)) {
sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
} else if (extInstruction.InList(s)) {
sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
}
sc.SetState(SCE_ASM_DEFAULT);
if (IsDirective && !strcmp(s, "comment")) {
const char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) {
sc.ForwardSetState(SCE_ASM_DEFAULT);
}
if (sc.ch == delimiter) {
sc.SetState(SCE_ASM_COMMENTDIRECTIVE);
}
}
}
} else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) {
const char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
if (sc.ch == delimiter) {
Updated to Scintilla 5.4.2 & Lexilla 5.3.1 https://www.scintilla.org/scintilla542.zip Release 5.4.2 Released 5 March 2024. Significantly reduce memory used for undo actions, often to a half or quarter of previous versions. Feature #1458. Add APIs for saving and restoring undo history. For GTK, when laying out text, detect runs with both left-to-right and right-to-left ranges and divide into an ASCII prefix and more complex suffix. Lay out the ASCII prefix in the standard manner but, for the suffix, measure the whole width and spread that over the suffix bytes. This produces more usable results where the caret moves over the ASCII prefix correctly and over the suffix reasonably but not accurately. For ScintillaEdit on Qt, fix reference from ScintillaDocument to Document to match change in 5.4.1 using IDocumentEditable for SCI_GETDOCPOINTER and SCI_SETDOCPOINTER. For Direct2D on Win32, use the multi-threaded option to avoid crashes when Scintilla instances created on different threads. There may be more problems with this scenario so it should be avoided. Bug #2420. For Win32, ensure keyboard-initiated context menu appears in multi-screen situations. https://www.scintilla.org/lexilla531.zip Release 5.3.1 Released 5 March 2024. Assembler: After comments, treat \r\n line ends the same as \n. This makes testing easier. Bash: Fix folding when line changed to/from comment and previous line is comment. Issue #224. Batch: Fix handling ':' next to keywords. Issue #222. JavaScript: in cpp lexer, add lexer.cpp.backquoted.strings=2 mode to treat ` back-quoted strings as template literals which allow embedded ${expressions}. Issue #94. Python: fix lexing of rb'' and rf'' strings. Issue #223, Pull request #227. Ruby: fix lexing of methods on numeric literals like '3.times' so the '.' and method name do not appear in numeric style. Issue #225.
9 months ago
while (!sc.MatchLineEnd()) {
sc.Forward();
}
sc.SetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_STRING) {
if (sc.ch == '\\') {
if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
sc.Forward();
}
} else if (sc.ch == '\"') {
sc.ForwardSetState(SCE_ASM_DEFAULT);
} else if (sc.atLineEnd) {
sc.ChangeState(SCE_ASM_STRINGEOL);
sc.ForwardSetState(SCE_ASM_DEFAULT);
}
} else if (sc.state == SCE_ASM_CHARACTER) {
if (sc.ch == '\\') {
if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
sc.Forward();
}
} else if (sc.ch == '\'') {
sc.ForwardSetState(SCE_ASM_DEFAULT);
} else if (sc.atLineEnd) {
sc.ChangeState(SCE_ASM_STRINGEOL);
sc.ForwardSetState(SCE_ASM_DEFAULT);
}
}
// Determine if a new state should be entered.
if (sc.state == SCE_ASM_DEFAULT) {
if (sc.ch == commentCharacter) {
sc.SetState(SCE_ASM_COMMENT);
} else if (IsASCII(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && IsASCII(sc.chNext) && isdigit(sc.chNext)))) {
sc.SetState(SCE_ASM_NUMBER);
} else if (IsAWordStart(sc.ch)) {
sc.SetState(SCE_ASM_IDENTIFIER);
} else if (sc.ch == '\"') {
sc.SetState(SCE_ASM_STRING);
} else if (sc.ch == '\'') {
sc.SetState(SCE_ASM_CHARACTER);
} else if (IsAsmOperator(sc.ch)) {
sc.SetState(SCE_ASM_OPERATOR);
}
}
}
sc.Complete();
}
// Store both the current line's fold level and the next lines in the
// level store to make it easy to pick up with each increment
// and to make it possible to fiddle the current level for "else".
void SCI_METHOD LexerAsm::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
if (!options.fold)
return;
LexAccessor styler(pAccess);
const Sci_PositionU endPos = startPos + length;
int visibleChars = 0;
Sci_Position lineCurrent = styler.GetLine(startPos);
int levelCurrent = SC_FOLDLEVELBASE;
if (lineCurrent > 0)
levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
int levelNext = levelCurrent;
char chNext = styler[startPos];
int styleNext = styler.StyleAt(startPos);
int style = initStyle;
char word[100]{};
int wordlen = 0;
const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
for (Sci_PositionU i = startPos; i < endPos; i++) {
const char ch = chNext;
chNext = styler.SafeGetCharAt(i + 1);
const int stylePrev = style;
style = styleNext;
styleNext = styler.StyleAt(i + 1);
const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
if (options.foldCommentMultiline && IsStreamCommentStyle(style)) {
if (!IsStreamCommentStyle(stylePrev)) {
levelNext++;
} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
// Comments don't end at end of line and the next character may be unstyled.
levelNext--;
}
}
if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) {
if (userDefinedFoldMarkers) {
if (styler.Match(i, options.foldExplicitStart.c_str())) {
levelNext++;
} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
levelNext--;
}
} else {
if (ch == ';') {
if (chNext == '{') {
levelNext++;
} else if (chNext == '}') {
levelNext--;
}
}
}
}
if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) {
word[wordlen++] = MakeLowerCase(ch);
if (wordlen == 100) { // prevent overflow
word[0] = '\0';
wordlen = 1;
}
if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready
word[wordlen] = '\0';
wordlen = 0;
if (directives4foldstart.InList(word)) {
levelNext++;
} else if (directives4foldend.InList(word)){
levelNext--;
}
}
}
if (!IsASpace(ch))
visibleChars++;
if (atEOL || (i == endPos-1)) {
const int levelUse = levelCurrent;
int lev = levelUse | levelNext << 16;
if (visibleChars == 0 && options.foldCompact)
lev |= SC_FOLDLEVELWHITEFLAG;
if (levelUse < levelNext)
lev |= SC_FOLDLEVELHEADERFLAG;
if (lev != styler.LevelAt(lineCurrent)) {
styler.SetLevel(lineCurrent, lev);
}
lineCurrent++;
levelCurrent = levelNext;
if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - 1))) {
// There is an empty line at end of file so give it same level and empty
styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
}
visibleChars = 0;
}
}
}
}
extern const LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc);
extern const LexerModule lmAs(SCLEX_AS, LexerAsm::LexerFactoryAs, "as", asmWordListDesc);