// Scintilla source code edit control /** @file LexAsm.cxx ** Lexer for Assembler, just for the MASM syntax ** Written by The Black Horus ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10 ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring ** Converted to lexer object and added further folding features/properties by "Udo Lechner" **/ // Copyright 1998-2003 by Neil Hodgson // The License.txt file describes the conditions under which this software may be distributed. #include #include #include #include #include #include #include #include #include #include #include #include "ILexer.h" #include "Scintilla.h" #include "SciLexer.h" #include "WordList.h" #include "LexAccessor.h" #include "StyleContext.h" #include "CharacterSet.h" #include "LexerModule.h" #include "OptionSet.h" #include "DefaultLexer.h" using namespace Scintilla; using namespace Lexilla; namespace { bool IsAWordChar(const int ch) noexcept { return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_' || ch == '?'); } bool IsAWordStart(const int ch) noexcept { return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' || ch == '%' || ch == '@' || ch == '$' || ch == '?'); } bool IsAsmOperator(const int ch) noexcept { if ((ch < 0x80) && (isalnum(ch))) return false; // '.' left out as it is used to make up numbers if (ch == '*' || ch == '/' || ch == '-' || ch == '+' || ch == '(' || ch == ')' || ch == '=' || ch == '^' || ch == '[' || ch == ']' || ch == '<' || ch == '&' || ch == '>' || ch == ',' || ch == '|' || ch == '~' || ch == '%' || ch == ':') return true; return false; } constexpr bool IsStreamCommentStyle(int style) noexcept { return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK; } // An individual named option for use in an OptionSet // Options used for LexerAsm struct OptionsAsm { std::string delimiter; bool fold; bool foldSyntaxBased; bool foldCommentMultiline; bool foldCommentExplicit; std::string foldExplicitStart; std::string foldExplicitEnd; bool foldExplicitAnywhere; bool foldCompact; std::string commentChar; OptionsAsm() { delimiter = ""; fold = false; foldSyntaxBased = true; foldCommentMultiline = false; foldCommentExplicit = false; foldExplicitStart = ""; foldExplicitEnd = ""; foldExplicitAnywhere = false; foldCompact = true; commentChar = ""; } }; const char *const asmWordListDesc[] = { "CPU instructions", "FPU instructions", "Registers", "Directives", "Directive operands", "Extended instructions", "Directives4Foldstart", "Directives4Foldend", nullptr }; struct OptionSetAsm : public OptionSet { OptionSetAsm() { DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter, "Character used for COMMENT directive's delimiter, replacing the standard \"~\"."); DefineProperty("fold", &OptionsAsm::fold); DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased, "Set this property to 0 to disable syntax based folding."); DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline, "Set this property to 1 to enable folding multi-line comments."); DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit, "This option enables folding explicit fold points when using the Asm lexer. " "Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} " "at the end of a section that should fold."); DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart, "The string to use for explicit fold start points, replacing the standard ;{."); DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd, "The string to use for explicit fold end points, replacing the standard ;}."); DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere, "Set this property to 1 to enable explicit fold points anywhere, not just in line comments."); DefineProperty("fold.compact", &OptionsAsm::foldCompact); DefineProperty("lexer.as.comment.character", &OptionsAsm::commentChar, "Overrides the default comment character (which is ';' for asm and '#' for as)."); DefineWordListSets(asmWordListDesc); } }; class LexerAsm : public DefaultLexer { WordList cpuInstruction; WordList mathInstruction; WordList registers; WordList directive; WordList directiveOperand; WordList extInstruction; WordList directives4foldstart; WordList directives4foldend; OptionsAsm options; OptionSetAsm osAsm; int commentChar; public: LexerAsm(const char *languageName_, int language_, int commentChar_) : DefaultLexer(languageName_, language_) { commentChar = commentChar_; } virtual ~LexerAsm() { } void SCI_METHOD Release() override { delete this; } int SCI_METHOD Version() const override { return lvRelease5; } const char * SCI_METHOD PropertyNames() override { return osAsm.PropertyNames(); } int SCI_METHOD PropertyType(const char *name) override { return osAsm.PropertyType(name); } const char * SCI_METHOD DescribeProperty(const char *name) override { return osAsm.DescribeProperty(name); } Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; const char * SCI_METHOD PropertyGet(const char *key) override { return osAsm.PropertyGet(key); } const char * SCI_METHOD DescribeWordListSets() override { return osAsm.DescribeWordListSets(); } Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; void * SCI_METHOD PrivateCall(int, void *) override { return nullptr; } static ILexer5 *LexerFactoryAsm() { return new LexerAsm("asm", SCLEX_ASM, ';'); } static ILexer5 *LexerFactoryAs() { return new LexerAsm("as", SCLEX_AS, '#'); } }; Sci_Position SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) { if (osAsm.PropertySet(&options, key, val)) { return 0; } return -1; } Sci_Position SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) { WordList *wordListN = nullptr; switch (n) { case 0: wordListN = &cpuInstruction; break; case 1: wordListN = &mathInstruction; break; case 2: wordListN = ®isters; break; case 3: wordListN = &directive; break; case 4: wordListN = &directiveOperand; break; case 5: wordListN = &extInstruction; break; case 6: wordListN = &directives4foldstart; break; case 7: wordListN = &directives4foldend; break; } Sci_Position firstModification = -1; if (wordListN) { if (wordListN->Set(wl, true)) { firstModification = 0; } } return firstModification; } void SCI_METHOD LexerAsm::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { LexAccessor styler(pAccess); const char commentCharacter = options.commentChar.empty() ? commentChar : options.commentChar.front(); // Do not leak onto next line if (initStyle == SCE_ASM_STRINGEOL) initStyle = SCE_ASM_DEFAULT; StyleContext sc(startPos, length, initStyle, styler); for (; sc.More(); sc.Forward()) { if (sc.atLineStart) { switch (sc.state) { case SCE_ASM_STRING: case SCE_ASM_CHARACTER: // Prevent SCE_ASM_STRINGEOL from leaking back to previous line sc.SetState(sc.state); break; case SCE_ASM_COMMENT: sc.SetState(SCE_ASM_DEFAULT); break; default: break; } } // Handle line continuation generically. if (sc.ch == '\\') { if (sc.chNext == '\n' || sc.chNext == '\r') { sc.Forward(); if (sc.ch == '\r' && sc.chNext == '\n') { sc.Forward(); } continue; } } // Determine if the current state should terminate. if (sc.state == SCE_ASM_OPERATOR) { if (!IsAsmOperator(sc.ch)) { sc.SetState(SCE_ASM_DEFAULT); } } else if (sc.state == SCE_ASM_NUMBER) { if (!IsAWordChar(sc.ch)) { sc.SetState(SCE_ASM_DEFAULT); } } else if (sc.state == SCE_ASM_IDENTIFIER) { if (!IsAWordChar(sc.ch) ) { char s[100]; sc.GetCurrentLowered(s, sizeof(s)); bool IsDirective = false; if (cpuInstruction.InList(s)) { sc.ChangeState(SCE_ASM_CPUINSTRUCTION); } else if (mathInstruction.InList(s)) { sc.ChangeState(SCE_ASM_MATHINSTRUCTION); } else if (registers.InList(s)) { sc.ChangeState(SCE_ASM_REGISTER); } else if (directive.InList(s)) { sc.ChangeState(SCE_ASM_DIRECTIVE); IsDirective = true; } else if (directiveOperand.InList(s)) { sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND); } else if (extInstruction.InList(s)) { sc.ChangeState(SCE_ASM_EXTINSTRUCTION); } sc.SetState(SCE_ASM_DEFAULT); if (IsDirective && !strcmp(s, "comment")) { const char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0]; while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) { sc.ForwardSetState(SCE_ASM_DEFAULT); } if (sc.ch == delimiter) { sc.SetState(SCE_ASM_COMMENTDIRECTIVE); } } } } else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) { const char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0]; if (sc.ch == delimiter) { while (!sc.MatchLineEnd()) { sc.Forward(); } sc.SetState(SCE_ASM_DEFAULT); } } else if (sc.state == SCE_ASM_STRING) { if (sc.ch == '\\') { if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { sc.Forward(); } } else if (sc.ch == '\"') { sc.ForwardSetState(SCE_ASM_DEFAULT); } else if (sc.atLineEnd) { sc.ChangeState(SCE_ASM_STRINGEOL); sc.ForwardSetState(SCE_ASM_DEFAULT); } } else if (sc.state == SCE_ASM_CHARACTER) { if (sc.ch == '\\') { if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { sc.Forward(); } } else if (sc.ch == '\'') { sc.ForwardSetState(SCE_ASM_DEFAULT); } else if (sc.atLineEnd) { sc.ChangeState(SCE_ASM_STRINGEOL); sc.ForwardSetState(SCE_ASM_DEFAULT); } } // Determine if a new state should be entered. if (sc.state == SCE_ASM_DEFAULT) { if (sc.ch == commentCharacter) { sc.SetState(SCE_ASM_COMMENT); } else if (IsASCII(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && IsASCII(sc.chNext) && isdigit(sc.chNext)))) { sc.SetState(SCE_ASM_NUMBER); } else if (IsAWordStart(sc.ch)) { sc.SetState(SCE_ASM_IDENTIFIER); } else if (sc.ch == '\"') { sc.SetState(SCE_ASM_STRING); } else if (sc.ch == '\'') { sc.SetState(SCE_ASM_CHARACTER); } else if (IsAsmOperator(sc.ch)) { sc.SetState(SCE_ASM_OPERATOR); } } } sc.Complete(); } // Store both the current line's fold level and the next lines in the // level store to make it easy to pick up with each increment // and to make it possible to fiddle the current level for "else". void SCI_METHOD LexerAsm::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { if (!options.fold) return; LexAccessor styler(pAccess); const Sci_PositionU endPos = startPos + length; int visibleChars = 0; Sci_Position lineCurrent = styler.GetLine(startPos); int levelCurrent = SC_FOLDLEVELBASE; if (lineCurrent > 0) levelCurrent = styler.LevelAt(lineCurrent-1) >> 16; int levelNext = levelCurrent; char chNext = styler[startPos]; int styleNext = styler.StyleAt(startPos); int style = initStyle; char word[100]{}; int wordlen = 0; const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty(); for (Sci_PositionU i = startPos; i < endPos; i++) { const char ch = chNext; chNext = styler.SafeGetCharAt(i + 1); const int stylePrev = style; style = styleNext; styleNext = styler.StyleAt(i + 1); const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); if (options.foldCommentMultiline && IsStreamCommentStyle(style)) { if (!IsStreamCommentStyle(stylePrev)) { levelNext++; } else if (!IsStreamCommentStyle(styleNext) && !atEOL) { // Comments don't end at end of line and the next character may be unstyled. levelNext--; } } if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) { if (userDefinedFoldMarkers) { if (styler.Match(i, options.foldExplicitStart.c_str())) { levelNext++; } else if (styler.Match(i, options.foldExplicitEnd.c_str())) { levelNext--; } } else { if (ch == ';') { if (chNext == '{') { levelNext++; } else if (chNext == '}') { levelNext--; } } } } if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) { word[wordlen++] = MakeLowerCase(ch); if (wordlen == 100) { // prevent overflow word[0] = '\0'; wordlen = 1; } if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready word[wordlen] = '\0'; wordlen = 0; if (directives4foldstart.InList(word)) { levelNext++; } else if (directives4foldend.InList(word)){ levelNext--; } } } if (!IsASpace(ch)) visibleChars++; if (atEOL || (i == endPos-1)) { const int levelUse = levelCurrent; int lev = levelUse | levelNext << 16; if (visibleChars == 0 && options.foldCompact) lev |= SC_FOLDLEVELWHITEFLAG; if (levelUse < levelNext) lev |= SC_FOLDLEVELHEADERFLAG; if (lev != styler.LevelAt(lineCurrent)) { styler.SetLevel(lineCurrent, lev); } lineCurrent++; levelCurrent = levelNext; if (atEOL && (i == static_cast(styler.Length() - 1))) { // There is an empty line at end of file so give it same level and empty styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG); } visibleChars = 0; } } } } extern const LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc); extern const LexerModule lmAs(SCLEX_AS, LexerAsm::LexerFactoryAs, "as", asmWordListDesc);