/** @file LexRaku.cxx ** Lexer for Raku ** ** Copyright (c) 2019 Mark Reay **/ // Copyright 1998-2005 by Neil Hodgson // The License.txt file describes the conditions under which this software may be distributed. /* * Raku (Perl6) Lexer for Scintilla * --------------------------------- * --------------------------------- * 06-Dec-2019: More Unicode support: * - Added a full scope of allowed numbers and letters * 29-Nov-2019: More highlighting / implemented basic folding: * - Operators (blanket cover, no sequence checking) * - Class / Grammar name highlighting * - Folding: * - Comments: line / multi-line * - POD sections * - Code blocks {} * 26-Nov-2019: Basic syntax highlighting covering the following: * - Comments, both line and embedded (multi-line) * - POD, no inline highlighting as yet... * - Heredoc block string, with variable highlighting (with qq) * - Strings, with variable highlighting (with ") * - Q Language, including adverbs (also basic q and qq) * - Regex, including adverbs * - Numbers * - Bareword / identifiers * - Types * - Variables: mu, positional, associative, callable * TODO: * - POD inline * - Better operator sequence coverage */ #include #include #include #include #include #include #include #include #include #include #include #include "ILexer.h" #include "Scintilla.h" #include "SciLexer.h" #include "WordList.h" #include "LexAccessor.h" #include "StyleContext.h" #include "CharacterSet.h" #include "CharacterCategory.h" #include "LexerModule.h" #include "OptionSet.h" #include "DefaultLexer.h" using namespace Scintilla; using namespace Lexilla; namespace { // anonymous namespace to isolate any name clashes /*----------------------------------------------------------------------------* * --- DEFINITIONS: OPTIONS / CONSTANTS --- *----------------------------------------------------------------------------*/ // Number types #define RAKUNUM_BINARY 1 // order is significant: 1-3 cannot have a dot #define RAKUNUM_OCTAL 2 #define RAKUNUM_FLOAT_EXP 3 // exponent part only #define RAKUNUM_HEX 4 // may be a hex float #define RAKUNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings #define RAKUNUM_VECTOR 6 #define RAKUNUM_V_VECTOR 7 #define RAKUNUM_VERSION 8 // can contain multiple '.'s #define RAKUNUM_BAD 9 // Regex / Q string types #define RAKUTYPE_REGEX_NORM 0 // 0 char ident #define RAKUTYPE_REGEX_S 1 // order is significant: #define RAKUTYPE_REGEX_M 2 // 1 char ident #define RAKUTYPE_REGEX_Y 3 // 1 char ident #define RAKUTYPE_REGEX 4 // > RAKUTYPE_REGEX == 2 char identifiers #define RAKUTYPE_REGEX_RX 5 // 2 char ident #define RAKUTYPE_REGEX_TR 6 // 2 char ident #define RAKUTYPE_QLANG 7 // < RAKUTYPE_QLANG == RAKUTYPE_REGEX_? #define RAKUTYPE_STR_WQ 8 // 0 char ident < word quote > #define RAKUTYPE_STR_Q 9 // 1 char ident #define RAKUTYPE_STR_QX 10 // 2 char ident #define RAKUTYPE_STR_QW 11 // 2 char ident #define RAKUTYPE_STR_QQ 12 // 2 char ident #define RAKUTYPE_STR_QQX 13 // 3 char ident #define RAKUTYPE_STR_QQW 14 // 3 char ident #define RAKUTYPE_STR_QQWW 15 // 4 char ident // Delimiter types #define RAKUDELIM_BRACKET 0 // bracket: regex, Q language #define RAKUDELIM_QUOTE 1 // quote: normal string // rakuWordLists: keywords as defined in config const char *const rakuWordLists[] = { "Keywords and identifiers", "Functions", "Types basic", "Types composite", "Types domain-specific", "Types exception", "Adverbs", nullptr, }; // Options and defaults struct OptionsRaku { bool fold; bool foldCompact; bool foldComment; bool foldCommentMultiline; bool foldCommentPOD; OptionsRaku() { fold = true; foldCompact = false; foldComment = true; foldCommentMultiline = true; foldCommentPOD = true; } }; // init options and words struct OptionSetRaku : public OptionSet { OptionSetRaku() { DefineProperty("fold", &OptionsRaku::fold); DefineProperty("fold.comment", &OptionsRaku::foldComment); DefineProperty("fold.compact", &OptionsRaku::foldCompact); DefineProperty("fold.raku.comment.multiline", &OptionsRaku::foldCommentMultiline, "Set this property to 0 to disable folding multi-line comments when fold.comment=1."); DefineProperty("fold.raku.comment.pod", &OptionsRaku::foldCommentPOD, "Set this property to 0 to disable folding POD comments when fold.comment=1."); // init word lists DefineWordListSets(rakuWordLists); } }; // Delimiter pair struct DelimPair { int opener; // opener char int closer[2]; // closer chars bool interpol; // can variables be interpolated? short count; // delimiter char count DelimPair() { opener = 0; closer[0] = 0; closer[1] = 0; interpol = false; count = 0; } bool isCloser(int ch) const { return ch == closer[0] || ch == closer[1]; } }; /*----------------------------------------------------------------------------* * --- FUNCTIONS --- *----------------------------------------------------------------------------*/ /* * IsANewLine * - returns true if this is a new line char */ constexpr bool IsANewLine(int ch) noexcept { return ch == '\r' || ch == '\n'; } /* * IsAWhitespace * - returns true if this is a whitespace (or newline) char */ bool IsAWhitespace(int ch) noexcept { return IsASpaceOrTab(ch) || IsANewLine(ch); } /* * IsAlphabet * - returns true if this is an alphabetical char */ constexpr bool IsAlphabet(int ch) noexcept { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); } /* * IsCommentLine * - returns true if this is a comment line * - tests: SCE_RAKU_COMMENTLINE or SCE_RAKU_COMMENTEMBED * modified from: LexPerl.cxx */ bool IsCommentLine(Sci_Position line, LexAccessor &styler, int type = SCE_RAKU_COMMENTLINE) { Sci_Position pos = styler.LineStart(line); Sci_Position eol_pos = styler.LineStart(line + 1) - 1; for (Sci_Position i = pos; i < eol_pos; i++) { char ch = styler[i]; int style = styler.StyleAt(i); if (type == SCE_RAKU_COMMENTEMBED) { if (i == (eol_pos - 1) && style == type) return true; } else { // make sure the line is NOT a SCE_RAKU_COMMENTEMBED if (ch == '#' && style == type && styler[i+1] != '`' ) return true; else if (!IsASpaceOrTab(ch)) return false; } } return false; } /* * ContainsQTo * - returns true if this range contains ":to" in style SCE_RAKU_ADVERB indicating the start * of a SCE_RAKU_HEREDOC_Q or SCE_RAKU_HEREDOC_QQ. */ bool ContainsQTo(Sci_Position start, Sci_Position end, LexAccessor &styler) { std::string adverb; for (Sci_Position i = start; i < end; i++) { if (styler.StyleAt(i) == SCE_RAKU_ADVERB) { adverb.push_back(styler[i]); } } return adverb.find(":to") != std::string::npos; } /* * GetBracketCloseChar * - returns the end bracket char: opposite of start * - see: http://www.unicode.org/Public/5.1.0/ucd/BidiMirroring.txt (first section) * - Categories are general matches for valid BiDi types * - Most closer chars are opener + 1 */ int GetBracketCloseChar(const int ch) noexcept { const CharacterCategory cc = CategoriseCharacter(ch); switch (cc) { case ccSm: switch (ch) { case 0x3C: return 0x3E; // LESS-THAN SIGN case 0x2208: return 0x220B; // ELEMENT OF case 0x2209: return 0x220C; // NOT AN ELEMENT OF case 0x220A: return 0x220D; // SMALL ELEMENT OF case 0x2215: return 0x29F5; // DIVISION SLASH case 0x2243: return 0x22CD; // ASYMPTOTICALLY EQUAL TO case 0x2298: return 0x29B8; // CIRCLED DIVISION SLASH case 0x22A6: return 0x2ADE; // ASSERTION case 0x22A8: return 0x2AE4; // TRUE case 0x22A9: return 0x2AE3; // FORCES case 0x22AB: return 0x2AE5; // DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE case 0x22F2: return 0x22FA; // ELEMENT OF WITH LONG HORIZONTAL STROKE case 0x22F3: return 0x22FB; // ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE case 0x22F4: return 0x22FC; // SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE case 0x22F6: return 0x22FD; // ELEMENT OF WITH OVERBAR case 0x22F7: return 0x22FE; // SMALL ELEMENT OF WITH OVERBAR case 0xFF1C: return 0xFF1E; // FULLWIDTH LESS-THAN SIGN } break; case ccPs: switch (ch) { case 0x5B: return 0x5D; // LEFT SQUARE BRACKET case 0x7B: return 0x7D; // LEFT CURLY BRACKET case 0x298D: return 0x2990; // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER case 0x298F: return 0x298E; // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER case 0xFF3B: return 0xFF3D; // FULLWIDTH LEFT SQUARE BRACKET case 0xFF5B: return 0xFF5D; // FULLWIDTH LEFT CURLY BRACKET } break; case ccPi: break; default: return 0; } return ch + 1; } /* * IsValidQuoteOpener * - */ bool IsValidQuoteOpener(const int ch, DelimPair &dp, int type = RAKUDELIM_BRACKET) noexcept { dp.closer[0] = 0; dp.closer[1] = 0; dp.interpol = true; if (type == RAKUDELIM_QUOTE) { switch (ch) { // Opener Closer Description case '\'': dp.closer[0] = '\''; // APOSTROPHE dp.interpol = false; break; case '"': dp.closer[0] = '"'; // QUOTATION MARK break; case 0x2018: dp.closer[0] = 0x2019; // LEFT SINGLE QUOTATION MARK dp.interpol = false; break; case 0x201C: dp.closer[0] = 0x201D; // LEFT DOUBLE QUOTATION MARK break; case 0x201D: dp.closer[0] = 0x201C; // RIGHT DOUBLE QUOTATION MARK break; case 0x201E: dp.closer[0] = 0x201C; // DOUBLE LOW-9 QUOTATION MARK dp.closer[1] = 0x201D; break; case 0xFF62: dp.closer[0] = 0xFF63; // HALFWIDTH LEFT CORNER BRACKET dp.interpol = false; break; default: return false; } } else if (type == RAKUDELIM_BRACKET) { dp.closer[0] = GetBracketCloseChar(ch); } dp.opener = ch; dp.count = 1; return dp.closer[0] > 0; } /* * IsBracketOpenChar * - true if this is a valid start bracket character */ bool IsBracketOpenChar(int ch) noexcept { return GetBracketCloseChar(ch) > 0; } /* * IsValidRegOrQAdjacent * - returns true if ch is a valid character to put directly after Q / q * * ref: Q Language: https://docs.raku.org/language/quoting */ bool IsValidRegOrQAdjacent(int ch) noexcept { return !(IsAlphaNumeric(ch) || ch == '_' || ch == '(' || ch == ')' || ch == '\'' ); } /* * IsValidRegOrQPrecede * - returns true if ch is a valid preceding character to put directly before Q / q * * ref: Q Language: https://docs.raku.org/language/quoting */ bool IsValidRegOrQPrecede(int ch) noexcept { return !(IsAlphaNumeric(ch) || ch == '_'); } /* * MatchCharInRange * - returns true if the mach character is found in range (of length) * - ignoreDelim (default false) */ bool MatchCharInRange(StyleContext &sc, const Sci_Position length, const int match, bool ignoreDelim = false) { Sci_Position len = 0; int chPrev = sc.chPrev; while (++len < length) { const int ch = sc.GetRelativeCharacter(len); if (ch == match && (ignoreDelim || chPrev != '\\')) return true; } return false; } /* * PrevNonWhitespaceChar * - returns the last non-whitespace char */ int PrevNonWhitespaceChar(StyleContext &sc) { Sci_Position rel = 0; Sci_Position max_back = 0 - sc.currentPos; while (--rel > max_back) { const int ch = sc.GetRelativeCharacter(rel); if (!IsAWhitespace(ch)) return ch; } return 0; // no matching char } /* * IsQLangStartAtScPos * - returns true if this is a valid Q Language sc position * - ref: https://docs.raku.org/language/quoting * - Q :adverb :adverb //; * - q,qx,qw,qq,qqx,qqw,qqww :adverb /:adverb /; */ bool IsQLangStartAtScPos(StyleContext &sc, int &type, const Sci_Position length) { const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext); const int chFw2 = sc.GetRelativeCharacter(2); const int chFw3 = sc.GetRelativeCharacter(3); type = -1; if (IsValidRegOrQPrecede(sc.chPrev)) { if (sc.ch == 'Q' && valid_adj) { type = RAKUTYPE_QLANG; } else if (sc.ch == 'q') { switch (sc.chNext) { case 'x': type = RAKUTYPE_STR_QX; break; case 'w': type = RAKUTYPE_STR_QW; break; case 'q': if (chFw2 == 'x') { type = RAKUTYPE_STR_QQX; } else if (chFw2 == 'w') { if (chFw3 == 'w') { type = RAKUTYPE_STR_QQWW; } else { type = RAKUTYPE_STR_QQW; } } else { type = RAKUTYPE_STR_QQ; } break; default: type = RAKUTYPE_STR_Q; } } else if (sc.ch == '<' && MatchCharInRange(sc, length, '>')) { type = RAKUTYPE_STR_WQ; // < word quote > } } return type >= 0; } /* * IsRegexStartAtScPos * - returns true if this is a valid Regex sc position * - ref: https://docs.raku.org/language/regexes * - Regex: (rx/s/m/tr/y) :adverb /:adverb /; * - regex R :adverb //; * - /:adverb /; */ bool IsRegexStartAtScPos(StyleContext &sc, int &type, CharacterSet &set) { const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext); type = -1; if (IsValidRegOrQPrecede(sc.chPrev)) { switch (sc.ch) { case 'r': if (sc.chNext == 'x') type = RAKUTYPE_REGEX_RX; break; case 't': case 'T': if (sc.chNext == 'r' || sc.chNext == 'R') type = RAKUTYPE_REGEX_TR; break; case 'm': if (valid_adj) type = RAKUTYPE_REGEX_M; break; case 's': case 'S': if (valid_adj) type = RAKUTYPE_REGEX_S; break; case 'y': if (valid_adj) type = RAKUTYPE_REGEX_Y; break; case '/': if (set.Contains(PrevNonWhitespaceChar(sc))) type = RAKUTYPE_REGEX_NORM; } } return type >= 0; } /* * IsValidIdentPrecede * - returns if ch is a valid preceding char to put directly before an identifier */ bool IsValidIdentPrecede(int ch) noexcept { return !(IsAlphaNumeric(ch) || ch == '_' || ch == '@' || ch == '$' || ch == '%'); } /* * IsValidDelimiter * - returns if ch is a valid delimiter (most chars are valid) * * ref: Q Language: https://docs.raku.org/language/quoting */ bool IsValidDelimiter(int ch) noexcept { return !(IsAlphaNumeric(ch) || ch == ':'); } /* * GetDelimiterCloseChar * - returns the corresponding close char for a given delimiter (could be the same char) */ int GetDelimiterCloseChar(int ch) noexcept { int ch_end = GetBracketCloseChar(ch); if (ch_end == 0 && IsValidDelimiter(ch)) { ch_end = ch; } return ch_end; } /* * GetRepeatCharCount * - returns the occurrence count of match */ Sci_Position GetRepeatCharCount(StyleContext &sc, int chMatch, Sci_Position length) { Sci_Position cnt = 0; while (cnt < length) { if (sc.GetRelativeCharacter(cnt) != chMatch) { break; } cnt++; } return cnt; } /* * LengthToDelimiter * - returns the length until the end of a delimited string section * - Ignores nested delimiters (if opener != closer) * - no trailing char after last closer (default false) */ Sci_Position LengthToDelimiter(StyleContext &sc, const DelimPair &dp, Sci_Position length, bool noTrailing = false) { short cnt_open = 0; // count open bracket short cnt_close = 0; // count close bracket bool is_escape = false; // has been escaped using '\'? Sci_Position len = 0; // count characters int chOpener = dp.opener; // look for nested opener / closer if (dp.opener == dp.closer[0]) chOpener = 0; // no opening delimiter (no nesting possible) while (len < length) { const int chPrev = sc.GetRelativeCharacter(len - 1); const int ch = sc.GetRelativeCharacter(len); const int chNext = sc.GetRelativeCharacter(len+1); if (cnt_open == 0 && cnt_close == dp.count) { return len; // end condition has been met } else if (is_escape) { is_escape = false; } else if (ch == '\\') { is_escape = true; } else { if (ch == chOpener) { cnt_open++; // open nested bracket } else if (dp.isCloser(ch)) { if ( cnt_open > 0 ) { cnt_open--; // close nested bracket } else if (dp.count > 1 && cnt_close < (dp.count - 1)) { if (cnt_close > 1) { if (dp.isCloser(chPrev)) { cnt_close++; } else { // reset if previous char was not close cnt_close = 0; } } else { cnt_close++; } } else if (!noTrailing || (IsAWhitespace(chNext))) { cnt_close++; // found last close if (cnt_close > 1 && !dp.isCloser(chPrev)) { cnt_close = 0; // reset if previous char was not close } } else { cnt_close = 0; // non handled close: reset } } else if (IsANewLine(ch)) { cnt_open = 0; // reset after each line cnt_close = 0; } } len++; } return -1; // end condition has NOT been met } /* * LengthToEndHeredoc * - returns the length until the end of a heredoc section * - delimiter string MUST begin on a new line */ Sci_Position LengthToEndHeredoc(const StyleContext &sc, LexAccessor &styler, const Sci_Position length, const char *delim) { bool on_new_ln = false; int i = 0; // str index for (int n = 0; n < length; n++) { const char ch = styler.SafeGetCharAt(sc.currentPos + n, 0); if (on_new_ln) { if (delim[i] == '\0') return n; // at end of str, match found! if (ch != delim[i++]) i = 0; // no char match, reset 'i'ndex } if (i == 0) // detect new line on_new_ln = IsANewLine(ch); } return -1; // no match found } /* * LengthToNextChar * - returns the length until the next character */ Sci_Position LengthToNextChar(StyleContext &sc, const Sci_Position length) { Sci_Position len = 0; while (++len < length) { const int ch = sc.GetRelativeCharacter(len); if (!IsASpaceOrTab(ch) && !IsANewLine(ch)) { break; } } return len; } /* * GetRelativeString * - gets a relative string and sets it in &str * - resets string before setting */ void GetRelativeString(StyleContext &sc, Sci_Position offset, Sci_Position length, std::string &str) { Sci_Position pos = offset; str.clear(); while (pos < length) { str += sc.GetRelativeCharacter(pos++); } } } // end anonymous namespace /*----------------------------------------------------------------------------* * --- class: LexerRaku --- *----------------------------------------------------------------------------*/ //class LexerRaku : public ILexerWithMetaData { class LexerRaku : public DefaultLexer { CharacterSet setWord; CharacterSet setSigil; CharacterSet setTwigil; CharacterSet setOperator; CharacterSet setSpecialVar; WordList regexIdent; // identifiers that specify a regex OptionsRaku options; // Options from config OptionSetRaku osRaku; WordList keywords; // Word Lists from config WordList functions; WordList typesBasic; WordList typesComposite; WordList typesDomainSpecific; WordList typesExceptions; WordList adverbs; public: // Defined as explicit, so that constructor can not be copied explicit LexerRaku() : DefaultLexer("raku", SCLEX_RAKU), setWord(CharacterSet::setAlphaNum, "-_", 0x80), setSigil(CharacterSet::setNone, "$&%@"), setTwigil(CharacterSet::setNone, "!*.:<=?^~"), setOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;<>,?!.~"), setSpecialVar(CharacterSet::setNone, "_/!") { regexIdent.Set("regex rule token"); } // Deleted so LexerRaku objects can not be copied. LexerRaku(const LexerRaku &) = delete; LexerRaku(LexerRaku &&) = delete; void operator=(const LexerRaku &) = delete; void operator=(LexerRaku &&) = delete; virtual ~LexerRaku() { } void SCI_METHOD Release() noexcept override { delete this; } int SCI_METHOD Version() const noexcept override { return lvRelease5; } const char *SCI_METHOD PropertyNames() override { return osRaku.PropertyNames(); } int SCI_METHOD PropertyType(const char *name) override { return osRaku.PropertyType(name); } const char *SCI_METHOD DescribeProperty(const char *name) override { return osRaku.DescribeProperty(name); } Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; const char *SCI_METHOD PropertyGet(const char *key) override { return osRaku.PropertyGet(key); } const char *SCI_METHOD DescribeWordListSets() override { return osRaku.DescribeWordListSets(); } Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; static ILexer5 *LexerFactoryRaku() { return new LexerRaku(); } protected: bool IsOperatorChar(const int ch); bool IsWordChar(const int ch, bool allowNumber = true); bool IsWordStartChar(const int ch); bool IsNumberChar(const int ch, int base = 10); bool ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length, int &type, const DelimPair &dp); void ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState); bool ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type, WordList &wordsAdverbs, DelimPair &dp); Sci_Position LengthToNonWordChar(StyleContext &sc, Sci_Position length, char *s, const int size, Sci_Position offset = 0); }; /*----------------------------------------------------------------------------* * --- METHODS: LexerRaku --- *----------------------------------------------------------------------------*/ /* * LexerRaku::IsOperatorChar * - Test for both ASCII and Unicode operators * see: https://docs.raku.org/language/unicode_entry */ bool LexerRaku::IsOperatorChar(const int ch) { if (ch > 0x7F) { switch (ch) { // Unicode ASCII Equiv. case 0x2208: // (elem) case 0x2209: // !(elem) case 0x220B: // (cont) case 0x220C: // !(cont) case 0x2216: // (-) case 0x2229: // (&) case 0x222A: // (|) case 0x2282: // (<) case 0x2283: // (>) case 0x2284: // !(<) case 0x2285: // !(>) case 0x2286: // (<=) case 0x2287: // (>=) case 0x2288: // !(<=) case 0x2289: // !(>=) case 0x228D: // (.) case 0x228E: // (+) case 0x2296: // (^) return true; } } return setOperator.Contains(ch); } /* * LexerRaku::IsWordChar * - Test for both ASCII and Unicode identifier characters * see: https://docs.raku.org/language/unicode_ascii * also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt * FIXME: *still* may not contain all valid characters */ bool LexerRaku::IsWordChar(const int ch, bool allowNumber) { // Unicode numbers should not appear in word identifiers if (ch > 0x7F) { const CharacterCategory cc = CategoriseCharacter(ch); switch (cc) { // Letters case ccLu: case ccLl: case ccLt: case ccLm: case ccLo: return true; default: return false; } } else if (allowNumber && IsADigit(ch)) { return true; // an ASCII number type } return setWord.Contains(ch); } /* * LexerRaku::IsWordStartChar * - Test for both ASCII and Unicode identifier "start / first" characters */ bool LexerRaku::IsWordStartChar(const int ch) { return ch != '-' && IsWordChar(ch, false); // no numbers allowed } /* * LexerRaku::IsNumberChar * - Test for both ASCII and Unicode identifier number characters * see: https://docs.raku.org/language/unicode_ascii * also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt * FILTERED by Unicode letters that are NUMBER * and NOT PARENTHESIZED or CIRCLED * FIXME: *still* may not contain all valid number characters */ bool LexerRaku::IsNumberChar(const int ch, int base) { if (ch > 0x7F) { const CharacterCategory cc = CategoriseCharacter(ch); switch (cc) { // Numbers case ccNd: case ccNl: case ccNo: return true; default: return false; } } return IsADigit(ch, base); } /* * LexerRaku::PropertySet * - */ Sci_Position SCI_METHOD LexerRaku::PropertySet(const char *key, const char *val) { if (osRaku.PropertySet(&options, key, val)) return 0; return -1; } /* * LexerRaku::WordListSet * - */ Sci_Position SCI_METHOD LexerRaku::WordListSet(int n, const char *wl) { WordList *wordListN = nullptr; switch (n) { case 0: wordListN = &keywords; break; case 1: wordListN = &functions; break; case 2: wordListN = &typesBasic; break; case 3: wordListN = &typesComposite; break; case 4: wordListN = &typesDomainSpecific; break; case 5: wordListN = &typesExceptions; break; case 6: wordListN = &adverbs; break; } Sci_Position firstModification = -1; if (wordListN) { if (wordListN->Set(wl)) { firstModification = 0; } } return firstModification; } /* * LexerRaku::ProcessRegexTwinCapture * - processes the transition between a regex pair (two sets of delimiters) * - moves to first new delimiter, if a bracket * - returns true when valid delimiter start found (if bracket) */ bool LexerRaku::ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length, int &type, const DelimPair &dp) { if (type == RAKUTYPE_REGEX_S || type == RAKUTYPE_REGEX_TR || type == RAKUTYPE_REGEX_Y) { type = -1; // clear type // move past chRegQClose if it was the previous char if (dp.isCloser(sc.chPrev)) sc.Forward(); // no processing needed for non-bracket if (dp.isCloser(dp.opener)) return true; // move to next opening bracket const Sci_Position len = LengthToNextChar(sc, length); if (sc.GetRelativeCharacter(len) == dp.opener) { sc.Forward(len); return true; } } return false; } /* * LexerRaku::ProcessStringVars * - processes a string and highlights any valid variables */ void LexerRaku::ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState) { const int state = sc.state; for (Sci_Position pos = 0; pos < length; pos++) { if (sc.state == varState && !IsWordChar(sc.ch)) { sc.SetState(state); } else if (sc.chPrev != '\\' && (sc.ch == '$' || sc.ch == '@') && IsWordStartChar(sc.chNext)) { sc.SetState(varState); } sc.Forward(); // Next character } } /* * LexerRaku::ProcessValidRegQlangStart * - processes a section of the document range from after a Regex / Q delimiter * - returns true on success * - sets: adverbs, chOpen, chClose, chCount * ref: https://docs.raku.org/language/regexes */ bool LexerRaku::ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type, WordList &wordsAdverbs, DelimPair &dp) { Sci_Position startPos = sc.currentPos; Sci_Position startLen = length; const int target_state = sc.state; int state = SCE_RAKU_DEFAULT; std::string str; // find our opening delimiter (and occurrences) / save any adverbs dp.opener = 0; // adverbs can be after the first delimiter bool got_all_adverbs = false; // in Regex statements bool got_ident = false; // regex can have an identifier: 'regex R' sc.SetState(state); // set state default to avoid pre-highlights while ((dp.opener == 0 || !got_all_adverbs) && sc.More()) { // move to the next non-space character const bool was_space = IsAWhitespace(sc.ch); if (!got_all_adverbs && was_space) { sc.Forward(LengthToNextChar(sc, length)); } length = startLen - (sc.currentPos - startPos); // update length remaining // parse / eat an identifier (if type == RAKUTYPE_REGEX) if (dp.opener == 0 && !got_ident && type == RAKUTYPE_REGEX && IsAlphabet(sc.ch)) { // eat identifier / account for special adverb :sym bool got_sym = false; while (sc.More()) { sc.SetState(SCE_RAKU_IDENTIFIER); while (sc.More() && (IsAlphaNumeric(sc.chNext) || sc.chNext == '_' || sc.chNext == '-')) { sc.Forward(); } sc.Forward(); if (got_sym && sc.ch == '>') { sc.SetState(SCE_RAKU_OPERATOR); // '>' sc.Forward(); break; } else if (type == RAKUTYPE_REGEX && sc.Match(":sym<")) { sc.SetState(SCE_RAKU_ADVERB); // ':sym' sc.Forward(4); sc.SetState(SCE_RAKU_OPERATOR); // '<' sc.Forward(); got_sym = true; } else { break; } } sc.SetState(state); got_ident = true; } // parse / save an adverb: RAKUTYPE_REGEX only has adverbs after delim // >= RAKUTYPE_QLANG only has adverbs before delim else if (!got_all_adverbs && sc.ch == ':' && (!(dp.opener == 0 && got_ident) && !(dp.opener > 0 && type >= RAKUTYPE_QLANG))) { sc.SetState(SCE_RAKU_ADVERB); while (IsAlphaNumeric(sc.chNext) && sc.More()) { sc.Forward(); str += sc.ch; } str += ' '; sc.Forward(); sc.SetState(state); } // find starting delimiter else if (dp.opener == 0 && (was_space || IsValidRegOrQAdjacent(sc.ch)) && IsValidDelimiter(sc.ch)) { // make sure the delimiter is legal (most are) sc.SetState((state = target_state));// start state here... dp.opener = sc.ch; // this is our delimiter, get count if (type < RAKUTYPE_QLANG) // type is Regex dp.count = 1; // has only one delimiter else dp.count = GetRepeatCharCount(sc, dp.opener, length); sc.Forward(dp.count); } // we must have all the adverbs by now... else { if (got_all_adverbs) break; // prevent infinite loop: occurs on missing open char got_all_adverbs = true; } } // set word list / find a valid closing delimiter (or bomb!) wordsAdverbs.Set(str.c_str()); dp.closer[0] = GetDelimiterCloseChar(dp.opener); dp.closer[1] = 0; // no other closer char return dp.closer[0] > 0; } /* * LexerRaku::LengthToNonWordChar * - returns the length until the next non "word" character: AlphaNum + '_' * - also sets all the parsed chars in 's' */ Sci_Position LexerRaku::LengthToNonWordChar(StyleContext &sc, Sci_Position length, char *s, const int size, Sci_Position offset) { Sci_Position len = 0; Sci_Position max_length = size < length ? size : length; while (len <= max_length) { const int ch = sc.GetRelativeCharacter(len + offset); if (!IsWordChar(ch)) { s[len] = '\0'; break; } s[len] = ch; len++; } s[len + 1] = '\0'; return len; } /* * LexerRaku::Lex * - Main lexer method */ void SCI_METHOD LexerRaku::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { LexAccessor styler(pAccess); DelimPair dpEmbeded; // delimiter pair: embedded comments DelimPair dpString; // delimiter pair: string DelimPair dpRegQ; // delimiter pair: Regex / Q Lang std::string hereDelim; // heredoc delimiter (if in heredoc) int hereState = 0; // heredoc state to use (Q / QQ) int numState = 0; // number state / type short cntDecimal = 0; // number decimal count std::string wordLast; // last word seen std::string identLast; // last identifier seen std::string adverbLast; // last (single) adverb seen WordList lastAdverbs; // last adverbs seen Sci_Position len; // temp length value char s[100]; // temp char string int typeDetect = -1; // temp type detected (for regex and Q lang) Sci_Position lengthToEnd; // length until the end of range // Backtrack to safe start position before complex quoted elements Sci_PositionU newStartPos = startPos; if (initStyle != SCE_RAKU_DEFAULT) { // Backtrack to last SCE_RAKU_DEFAULT or 0 while (newStartPos > 0) { newStartPos--; if (styler.StyleAt(newStartPos) == SCE_RAKU_DEFAULT) break; } // Backtrack to start of line before SCE_RAKU_HEREDOC_Q? if (initStyle == SCE_RAKU_HEREDOC_Q || initStyle == SCE_RAKU_HEREDOC_QQ) { if (newStartPos > 0) { newStartPos = styler.LineStart(styler.GetLine(newStartPos)); } } } else { const Sci_Position line = styler.GetLine(newStartPos); if (line > 0) { // If the previous line is a start of a q or qq heredoc, backtrack to start of line const Sci_Position startPreviousLine = styler.LineStart(line-1); if (ContainsQTo(startPreviousLine, newStartPos, styler)) { newStartPos = startPreviousLine; } } } // Re-calculate (any) changed startPos, length and initStyle state if (newStartPos < startPos) { initStyle = SCE_RAKU_DEFAULT; length += startPos - newStartPos; startPos = newStartPos; } // init StyleContext StyleContext sc(startPos, length, initStyle, styler); // StyleContext Loop for (; sc.More(); sc.Forward()) { lengthToEnd = (length - (sc.currentPos - startPos)); // end of range /* *** Determine if the current state should terminate ************** * * Everything within the 'switch' statement processes characters up * until the end of a syntax highlight section / state. * ****************************************************************** */ switch (sc.state) { case SCE_RAKU_OPERATOR: sc.SetState(SCE_RAKU_DEFAULT); break; // FIXME: better valid operator sequences needed? case SCE_RAKU_COMMENTLINE: if (IsANewLine(sc.ch)) { sc.SetState(SCE_RAKU_DEFAULT); } break; case SCE_RAKU_COMMENTEMBED: if ((len = LengthToDelimiter(sc, dpEmbeded, lengthToEnd)) >= 0) { sc.Forward(len); // Move to end delimiter sc.SetState(SCE_RAKU_DEFAULT); } else { sc.Forward(lengthToEnd); // no end delimiter found } break; case SCE_RAKU_POD: if (sc.atLineStart && sc.Match("=end pod")) { sc.Forward(8); sc.SetState(SCE_RAKU_DEFAULT); } break; case SCE_RAKU_STRING: // Process the string for variables: move to end delimiter if ((len = LengthToDelimiter(sc, dpString, lengthToEnd)) >= 0) { if (dpString.interpol) { ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); } else { sc.Forward(len); } sc.SetState(SCE_RAKU_DEFAULT); } else { sc.Forward(lengthToEnd); // no end delimiter found } break; case SCE_RAKU_STRING_Q: case SCE_RAKU_STRING_QQ: case SCE_RAKU_STRING_Q_LANG: // No string: previous char was the delimiter if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) { sc.SetState(SCE_RAKU_DEFAULT); } // Process the string for variables: move to end delimiter else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) { // set (any) heredoc delimiter string if (lastAdverbs.InList("to")) { GetRelativeString(sc, -1, len - dpRegQ.count, hereDelim); hereState = SCE_RAKU_HEREDOC_Q; // default heredoc state } // select variable identifiers if (sc.state == SCE_RAKU_STRING_QQ || lastAdverbs.InList("qq")) { ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); hereState = SCE_RAKU_HEREDOC_QQ; // potential heredoc state } else { sc.Forward(len); } sc.SetState(SCE_RAKU_DEFAULT); } else { sc.Forward(lengthToEnd); // no end delimiter found } break; case SCE_RAKU_HEREDOC_Q: case SCE_RAKU_HEREDOC_QQ: if ((len = LengthToEndHeredoc(sc, styler, lengthToEnd, hereDelim.c_str())) >= 0) { // select variable identifiers if (sc.state == SCE_RAKU_HEREDOC_QQ) { ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); } else { sc.Forward(len); } sc.SetState(SCE_RAKU_DEFAULT); } else { sc.Forward(lengthToEnd); // no end delimiter found } hereDelim.clear(); // clear heredoc delimiter break; case SCE_RAKU_REGEX: // account for typeDetect = RAKUTYPE_REGEX_S/TR/Y while (sc.state == SCE_RAKU_REGEX) { // No string: previous char was the delimiter if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) { if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ)) continue; sc.SetState(SCE_RAKU_DEFAULT); break; } // Process the string for variables: move to end delimiter else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) { ProcessStringVars(sc, len, SCE_RAKU_REGEX_VAR); if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ)) continue; sc.SetState(SCE_RAKU_DEFAULT); break; } else { sc.Forward(lengthToEnd); // no end delimiter found break; } } break; case SCE_RAKU_NUMBER: if (sc.ch == '.') { if (sc.chNext == '.') { // '..' is an operator sc.SetState(SCE_RAKU_OPERATOR); sc.Forward(); if (sc.chNext == '.') // '...' is also an operator sc.Forward(); break; } else if (numState > RAKUNUM_FLOAT_EXP && (cntDecimal < 1 || numState == RAKUNUM_VERSION)) { cntDecimal++; sc.Forward(); } else { sc.SetState(SCE_RAKU_DEFAULT); break; // too many decimal places } } switch (numState) { case RAKUNUM_BINARY: if (!IsNumberChar(sc.ch, 2)) sc.SetState(SCE_RAKU_DEFAULT); break; case RAKUNUM_OCTAL: if (!IsNumberChar(sc.ch, 8)) sc.SetState(SCE_RAKU_DEFAULT); break; case RAKUNUM_HEX: if (!IsNumberChar(sc.ch, 16)) sc.SetState(SCE_RAKU_DEFAULT); break; case RAKUNUM_DECIMAL: case RAKUNUM_VERSION: if (!IsNumberChar(sc.ch)) sc.SetState(SCE_RAKU_DEFAULT); } break; case SCE_RAKU_WORD: case SCE_RAKU_FUNCTION: case SCE_RAKU_TYPEDEF: case SCE_RAKU_ADVERB: sc.SetState(SCE_RAKU_DEFAULT); break; case SCE_RAKU_MU: case SCE_RAKU_POSITIONAL: case SCE_RAKU_ASSOCIATIVE: case SCE_RAKU_CALLABLE: case SCE_RAKU_IDENTIFIER: case SCE_RAKU_GRAMMAR: case SCE_RAKU_CLASS: sc.SetState(SCE_RAKU_DEFAULT); break; } /* *** Determine if a new state should be entered ******************* * * Everything below here identifies the beginning of a state, all or part * of the characters within this state are processed here, the rest are * completed above in the terminate state section. * ****************************************************************** */ if (sc.state == SCE_RAKU_DEFAULT) { // --- Single line comment if (sc.ch == '#') { sc.SetState(SCE_RAKU_COMMENTLINE); } // --- POD block else if (sc.atLineStart && sc.Match("=begin pod")) { sc.SetState(SCE_RAKU_POD); sc.Forward(10); } // --- String (normal) else if (sc.chPrev != '\\' && (IsValidQuoteOpener(sc.ch, dpString, RAKUDELIM_QUOTE))) { sc.SetState(SCE_RAKU_STRING); } // --- String (Q Language) ---------------------------------------- // - https://docs.raku.org/language/quoting // - Q :adverb :adverb //; // - q,qx,qw,qq,qqx,qqw,qqww :adverb :adverb //; else if (IsQLangStartAtScPos(sc, typeDetect, lengthToEnd)) { int state = SCE_RAKU_STRING_Q_LANG; Sci_Position forward = 1; // single char ident (default) if (typeDetect > RAKUTYPE_QLANG) { state = SCE_RAKU_STRING_Q; if (typeDetect == RAKUTYPE_STR_WQ) forward = 0; // no char ident } if (typeDetect > RAKUTYPE_STR_Q) { if (typeDetect == RAKUTYPE_STR_QQ) state = SCE_RAKU_STRING_QQ; forward++; // two char ident } if (typeDetect > RAKUTYPE_STR_QQ) forward++; // three char ident if (typeDetect == RAKUTYPE_STR_QQWW) forward++; // four char ident // Proceed: check for a valid character after statement if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_QLANG) { sc.SetState(state); sc.Forward(forward); lastAdverbs.Clear(); // Process: adverbs / opening delimiter / adverbs after delim if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect, lastAdverbs, dpRegQ)) sc.SetState(state); } } // --- Regex (rx/s/m/tr/y) ---------------------------------------- // - https://docs.raku.org/language/regexes else if ((IsRegexStartAtScPos(sc, typeDetect, setOperator) || regexIdent.InList(wordLast.c_str()))) { if (typeDetect == -1) { // must be a regex identifier word wordLast.clear(); typeDetect = RAKUTYPE_REGEX; } Sci_Position forward = 0; // no ident (RAKUTYPE_REGEX, RAKUTYPE_REGEX_NORM) if (typeDetect > 0 && typeDetect != RAKUTYPE_REGEX) forward++; // single char ident if (typeDetect > RAKUTYPE_REGEX) forward++; // two char ident // Proceed: check for a valid character after statement if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_REGEX_NORM) { sc.SetState(SCE_RAKU_REGEX); sc.Forward(forward); lastAdverbs.Clear(); // Process: adverbs / opening delimiter / adverbs after delim if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect, lastAdverbs, dpRegQ)) sc.SetState(SCE_RAKU_REGEX); } } // --- Numbers ---------------------------------------------------- else if (IsValidIdentPrecede(sc.chPrev) && (IsNumberChar(sc.ch) || (sc.ch == 'v' && IsNumberChar(sc.chNext) && wordLast == "use"))) { numState = RAKUNUM_DECIMAL; // default: decimal (base 10) cntDecimal = 0; sc.SetState(SCE_RAKU_NUMBER); if (sc.ch == 'v') // forward past 'v' sc.Forward(); if (wordLast == "use") { // package version number numState = RAKUNUM_VERSION; } else if (sc.ch == '0') { // other type of number switch (sc.chNext) { case 'b': // binary (base 2) numState = RAKUNUM_BINARY; break; case 'o': // octal (base 8) numState = RAKUNUM_OCTAL; break; case 'x': // hexadecimal (base 16) numState = RAKUNUM_HEX; } if (numState != RAKUNUM_DECIMAL) sc.Forward(); // forward to number type char } } // --- Keywords / functions / types / barewords ------------------- else if ((sc.currentPos == 0 || sc.atLineStart || IsValidIdentPrecede(sc.chPrev)) && IsWordStartChar(sc.ch)) { len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)); if (keywords.InList(s)) { sc.SetState(SCE_RAKU_WORD); // Keywords } else if(functions.InList(s)) { sc.SetState(SCE_RAKU_FUNCTION); // Functions } else if(typesBasic.InList(s)) { sc.SetState(SCE_RAKU_TYPEDEF); // Types (basic) } else if(typesComposite.InList(s)) { sc.SetState(SCE_RAKU_TYPEDEF); // Types (composite) } else if(typesDomainSpecific.InList(s)) { sc.SetState(SCE_RAKU_TYPEDEF); // Types (domain-specific) } else if(typesExceptions.InList(s)) { sc.SetState(SCE_RAKU_TYPEDEF); // Types (exceptions) } else { if (wordLast == "class") sc.SetState(SCE_RAKU_CLASS); // a Class ident else if (wordLast == "grammar") sc.SetState(SCE_RAKU_GRAMMAR); // a Grammar ident else sc.SetState(SCE_RAKU_IDENTIFIER); // Bareword identLast = s; // save identifier } if (adverbLast == "sym") { // special adverb ":sym" sc.SetState(SCE_RAKU_IDENTIFIER); // treat as identifier identLast = s; // save identifier } if (sc.state != SCE_RAKU_IDENTIFIER) wordLast = s; // save word sc.Forward(len - 1); // ...forward past word } // --- Adverbs ---------------------------------------------------- else if (sc.ch == ':' && IsWordStartChar(sc.chNext)) { len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s), 1); if (adverbs.InList(s)) { sc.SetState(SCE_RAKU_ADVERB); // Adverbs (begin with ':') adverbLast = s; // save word sc.Forward(len); // ...forward past word (less offset: 1) } } // --- Identifiers: $mu / @positional / %associative / &callable -- // see: https://docs.raku.org/language/variables else if (setSigil.Contains(sc.ch) && (setTwigil.Contains(sc.chNext) || setSpecialVar.Contains(sc.chNext) || IsWordStartChar(sc.chNext))) { // State based on sigil switch (sc.ch) { case '$': sc.SetState(SCE_RAKU_MU); break; case '@': sc.SetState(SCE_RAKU_POSITIONAL); break; case '%': sc.SetState(SCE_RAKU_ASSOCIATIVE); break; case '&': sc.SetState(SCE_RAKU_CALLABLE); } const int state = sc.state; sc.Forward(); char ch_delim = 0; if (setSpecialVar.Contains(sc.ch) && !setWord.Contains(sc.chNext)) { // Process Special Var ch_delim = -1; } else if (setTwigil.Contains(sc.ch)) { // Process Twigil sc.SetState(SCE_RAKU_OPERATOR); if (sc.ch == '<' && setWord.Contains(sc.chNext)) ch_delim = '>'; sc.Forward(); sc.SetState(state); } // Process (any) identifier if (ch_delim >= 0) { sc.Forward(LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)) - 1); if (ch_delim > 0 && sc.chNext == ch_delim) { sc.Forward(); sc.SetState(SCE_RAKU_OPERATOR); } identLast = s; // save identifier } } // --- Operators -------------------------------------------------- else if (IsOperatorChar(sc.ch)) { // FIXME: better valid operator sequences needed? sc.SetState(SCE_RAKU_OPERATOR); } // --- Heredoc: begin --------------------------------------------- else if (!hereDelim.empty() && sc.atLineEnd) { if (IsANewLine(sc.ch)) sc.Forward(); // skip a possible CRLF situation sc.SetState(hereState); } // Reset words: on operator semi-colon OR '}' (end of statement) if (sc.state == SCE_RAKU_OPERATOR && (sc.ch == ';' || sc.ch == '}')) { wordLast.clear(); identLast.clear(); adverbLast.clear(); } } /* *** Determine if an "embedded comment" is to be entered ********** * * This type of embedded comment section, or multi-line comment comes * after a normal comment has begun... e.g: #`[ ... ] * ****************************************************************** */ else if (sc.state == SCE_RAKU_COMMENTLINE && sc.chPrev == '#' && sc.ch == '`') { if (IsBracketOpenChar(sc.chNext)) { sc.Forward(); // Condition met for "embedded comment" dpEmbeded.opener = sc.ch; // Find the opposite (termination) closing bracket (if any) dpEmbeded.closer[0] = GetBracketCloseChar(dpEmbeded.opener); if (dpEmbeded.closer[0] > 0) { // Enter "embedded comment" // Find multiple opening character occurrence dpEmbeded.count = GetRepeatCharCount(sc, dpEmbeded.opener, lengthToEnd); sc.SetState(SCE_RAKU_COMMENTEMBED); sc.Forward(dpEmbeded.count - 1); // incremented in the next loop } } } } // And we're done... sc.Complete(); } /* * LexerRaku::Lex * - Main fold method * NOTE: although Raku uses and supports UNICODE characters, we're only looking * at normal chars here, using 'SafeGetCharAt' - for folding purposes * that is all we need. */ #define RAKU_HEADFOLD_SHIFT 4 #define RAKU_HEADFOLD_MASK 0xF0 void SCI_METHOD LexerRaku::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) { // init LexAccessor / return if fold option is off if (!options.fold) return; LexAccessor styler(pAccess); // init char and line positions const Sci_PositionU endPos = startPos + length; Sci_Position lineCurrent = styler.GetLine(startPos); // Backtrack to last SCE_RAKU_DEFAULT line if (startPos > 0 && lineCurrent > 0) { while (lineCurrent > 0 && styler.StyleAt(startPos) != SCE_RAKU_DEFAULT) { lineCurrent--; startPos = styler.LineStart(lineCurrent); } lineCurrent = styler.GetLine(startPos); } Sci_PositionU lineStart = startPos; Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1); // init line folding level int levelPrev = SC_FOLDLEVELBASE; if (lineCurrent > 0) levelPrev = styler.LevelAt(lineCurrent - 1) >> 16; int levelCurrent = levelPrev; // init char and style variables char chNext = styler[startPos]; int stylePrev = styler.StyleAt(startPos - 1); int styleNext = styler.StyleAt(startPos); int styleNextStartLine = styler.StyleAt(lineStartNext); int visibleChars = 0; bool wasCommentMulti = false; // main loop for (Sci_PositionU i = startPos; i < endPos; i++) { // next char, style and flags const char ch = chNext; chNext = styler.SafeGetCharAt(i + 1); const int style = styleNext; styleNext = styler.StyleAt(i + 1); const bool atEOL = i == (lineStartNext - 1); const bool atLineStart = i == lineStart; // --- Comments / Multi-line / POD ------------------------------------ if (options.foldComment) { // Multi-line if (options.foldCommentMultiline) { if (style == SCE_RAKU_COMMENTLINE && atLineStart && ch == '#' && chNext == '`' && styleNextStartLine == SCE_RAKU_COMMENTEMBED) { levelCurrent++; wasCommentMulti = true; // don't confuse line comments } else if (style == SCE_RAKU_COMMENTEMBED && atLineStart && styleNextStartLine != SCE_RAKU_COMMENTEMBED) { levelCurrent--; } } // Line comments if (!wasCommentMulti && atEOL && stylePrev == SCE_RAKU_COMMENTLINE && IsCommentLine(lineCurrent, styler)) { if (!IsCommentLine(lineCurrent - 1, styler) && IsCommentLine(lineCurrent + 1, styler)) levelCurrent++; else if (IsCommentLine(lineCurrent - 1, styler) && !IsCommentLine(lineCurrent + 1, styler)) levelCurrent--; } // POD if (options.foldCommentPOD && atLineStart && style == SCE_RAKU_POD) { if (styler.Match(i, "=begin")) levelCurrent++; else if (styler.Match(i, "=end")) levelCurrent--; } } // --- Code block ----------------------------------------------------- if (style == SCE_RAKU_OPERATOR) { if (ch == '{') { if (levelCurrent < levelPrev) levelPrev--; levelCurrent++; } else if (ch == '}') { levelCurrent--; } } // --- at end of line / range / apply fold ---------------------------- if (atEOL) { int level = levelPrev; // set level flags level |= levelCurrent << 16; if (visibleChars == 0 && options.foldCompact) level |= SC_FOLDLEVELWHITEFLAG; if ((levelCurrent > levelPrev) && (visibleChars > 0)) level |= SC_FOLDLEVELHEADERFLAG; if (level != styler.LevelAt(lineCurrent)) { styler.SetLevel(lineCurrent, level); } lineCurrent++; lineStart = lineStartNext; lineStartNext = styler.LineStart(lineCurrent + 1); styleNextStartLine = styler.StyleAt(lineStartNext); levelPrev = levelCurrent; visibleChars = 0; wasCommentMulti = false; } // increment visibleChars / set previous char if (!isspacechar(ch)) visibleChars++; stylePrev = style; } // Done: set real level of the next line int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; styler.SetLevel(lineCurrent, levelPrev | flagsNext); } /*----------------------------------------------------------------------------* * --- Scintilla: LexerModule --- *----------------------------------------------------------------------------*/ extern const LexerModule lmRaku(SCLEX_RAKU, LexerRaku::LexerFactoryRaku, "raku", rakuWordLists);