You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
notepad-plus-plus/lexilla/lexers/LexRaku.cxx

1635 lines
50 KiB

/** @file LexRaku.cxx
** Lexer for Raku
**
** Copyright (c) 2019 Mark Reay <mark@reay.net.au>
**/
// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
/*
* Raku (Perl6) Lexer for Scintilla
* ---------------------------------
* ---------------------------------
* 06-Dec-2019: More Unicode support:
* - Added a full scope of allowed numbers and letters
* 29-Nov-2019: More highlighting / implemented basic folding:
* - Operators (blanket cover, no sequence checking)
* - Class / Grammar name highlighting
* - Folding:
* - Comments: line / multi-line
* - POD sections
* - Code blocks {}
* 26-Nov-2019: Basic syntax highlighting covering the following:
* - Comments, both line and embedded (multi-line)
* - POD, no inline highlighting as yet...
* - Heredoc block string, with variable highlighting (with qq)
* - Strings, with variable highlighting (with ")
* - Q Language, including adverbs (also basic q and qq)
* - Regex, including adverbs
* - Numbers
* - Bareword / identifiers
* - Types
* - Variables: mu, positional, associative, callable
* TODO:
* - POD inline
* - Better operator sequence coverage
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>
#include <string>
#include <string_view>
#include <vector>
#include <map>
#include <functional>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "CharacterCategory.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "DefaultLexer.h"
using namespace Scintilla;
using namespace Lexilla;
namespace { // anonymous namespace to isolate any name clashes
/*----------------------------------------------------------------------------*
* --- DEFINITIONS: OPTIONS / CONSTANTS ---
*----------------------------------------------------------------------------*/
// Number types
#define RAKUNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
#define RAKUNUM_OCTAL 2
#define RAKUNUM_FLOAT_EXP 3 // exponent part only
#define RAKUNUM_HEX 4 // may be a hex float
#define RAKUNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
#define RAKUNUM_VECTOR 6
#define RAKUNUM_V_VECTOR 7
#define RAKUNUM_VERSION 8 // can contain multiple '.'s
#define RAKUNUM_BAD 9
// Regex / Q string types
#define RAKUTYPE_REGEX_NORM 0 // 0 char ident
#define RAKUTYPE_REGEX_S 1 // order is significant:
#define RAKUTYPE_REGEX_M 2 // 1 char ident
#define RAKUTYPE_REGEX_Y 3 // 1 char ident
#define RAKUTYPE_REGEX 4 // > RAKUTYPE_REGEX == 2 char identifiers
#define RAKUTYPE_REGEX_RX 5 // 2 char ident
#define RAKUTYPE_REGEX_TR 6 // 2 char ident
#define RAKUTYPE_QLANG 7 // < RAKUTYPE_QLANG == RAKUTYPE_REGEX_?
#define RAKUTYPE_STR_WQ 8 // 0 char ident < word quote >
#define RAKUTYPE_STR_Q 9 // 1 char ident
#define RAKUTYPE_STR_QX 10 // 2 char ident
#define RAKUTYPE_STR_QW 11 // 2 char ident
#define RAKUTYPE_STR_QQ 12 // 2 char ident
#define RAKUTYPE_STR_QQX 13 // 3 char ident
#define RAKUTYPE_STR_QQW 14 // 3 char ident
#define RAKUTYPE_STR_QQWW 15 // 4 char ident
// Delimiter types
#define RAKUDELIM_BRACKET 0 // bracket: regex, Q language
#define RAKUDELIM_QUOTE 1 // quote: normal string
// rakuWordLists: keywords as defined in config
const char *const rakuWordLists[] = {
"Keywords and identifiers",
"Functions",
"Types basic",
"Types composite",
"Types domain-specific",
"Types exception",
"Adverbs",
nullptr,
};
// Options and defaults
struct OptionsRaku {
bool fold;
bool foldCompact;
bool foldComment;
bool foldCommentMultiline;
bool foldCommentPOD;
OptionsRaku() {
fold = true;
foldCompact = false;
foldComment = true;
foldCommentMultiline = true;
foldCommentPOD = true;
}
};
// init options and words
struct OptionSetRaku : public OptionSet<OptionsRaku> {
OptionSetRaku() {
DefineProperty("fold", &OptionsRaku::fold);
DefineProperty("fold.comment", &OptionsRaku::foldComment);
DefineProperty("fold.compact", &OptionsRaku::foldCompact);
DefineProperty("fold.raku.comment.multiline", &OptionsRaku::foldCommentMultiline,
"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
DefineProperty("fold.raku.comment.pod", &OptionsRaku::foldCommentPOD,
"Set this property to 0 to disable folding POD comments when fold.comment=1.");
// init word lists
DefineWordListSets(rakuWordLists);
}
};
// Delimiter pair
struct DelimPair {
int opener; // opener char
int closer[2]; // closer chars
bool interpol; // can variables be interpolated?
short count; // delimiter char count
DelimPair() {
opener = 0;
closer[0] = 0;
closer[1] = 0;
interpol = false;
count = 0;
}
bool isCloser(int ch) const {
return ch == closer[0] || ch == closer[1];
}
};
/*----------------------------------------------------------------------------*
* --- FUNCTIONS ---
*----------------------------------------------------------------------------*/
/*
* IsANewLine
* - returns true if this is a new line char
*/
constexpr bool IsANewLine(int ch) noexcept {
return ch == '\r' || ch == '\n';
}
/*
* IsAWhitespace
* - returns true if this is a whitespace (or newline) char
*/
bool IsAWhitespace(int ch) noexcept {
return IsASpaceOrTab(ch) || IsANewLine(ch);
}
/*
* IsAlphabet
* - returns true if this is an alphabetical char
*/
constexpr bool IsAlphabet(int ch) noexcept {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
}
/*
* IsCommentLine
* - returns true if this is a comment line
* - tests: SCE_RAKU_COMMENTLINE or SCE_RAKU_COMMENTEMBED
* modified from: LexPerl.cxx
*/
bool IsCommentLine(Sci_Position line, LexAccessor &styler, int type = SCE_RAKU_COMMENTLINE) {
Sci_Position pos = styler.LineStart(line);
Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
for (Sci_Position i = pos; i < eol_pos; i++) {
char ch = styler[i];
int style = styler.StyleAt(i);
if (type == SCE_RAKU_COMMENTEMBED) {
if (i == (eol_pos - 1) && style == type)
return true;
} else { // make sure the line is NOT a SCE_RAKU_COMMENTEMBED
if (ch == '#' && style == type && styler[i+1] != '`' )
return true;
else if (!IsASpaceOrTab(ch))
return false;
}
}
return false;
}
/*
* ContainsQTo
* - returns true if this range contains ":to" in style SCE_RAKU_ADVERB indicating the start
* of a SCE_RAKU_HEREDOC_Q or SCE_RAKU_HEREDOC_QQ.
*/
bool ContainsQTo(Sci_Position start, Sci_Position end, LexAccessor &styler) {
std::string adverb;
for (Sci_Position i = start; i < end; i++) {
if (styler.StyleAt(i) == SCE_RAKU_ADVERB) {
adverb.push_back(styler[i]);
}
}
return adverb.find(":to") != std::string::npos;
}
/*
* GetBracketCloseChar
* - returns the end bracket char: opposite of start
* - see: http://www.unicode.org/Public/5.1.0/ucd/BidiMirroring.txt (first section)
* - Categories are general matches for valid BiDi types
* - Most closer chars are opener + 1
*/
int GetBracketCloseChar(const int ch) noexcept {
const CharacterCategory cc = CategoriseCharacter(ch);
switch (cc) {
case ccSm:
switch (ch) {
case 0x3C: return 0x3E; // LESS-THAN SIGN
case 0x2208: return 0x220B; // ELEMENT OF
case 0x2209: return 0x220C; // NOT AN ELEMENT OF
case 0x220A: return 0x220D; // SMALL ELEMENT OF
case 0x2215: return 0x29F5; // DIVISION SLASH
case 0x2243: return 0x22CD; // ASYMPTOTICALLY EQUAL TO
case 0x2298: return 0x29B8; // CIRCLED DIVISION SLASH
case 0x22A6: return 0x2ADE; // ASSERTION
case 0x22A8: return 0x2AE4; // TRUE
case 0x22A9: return 0x2AE3; // FORCES
case 0x22AB: return 0x2AE5; // DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
case 0x22F2: return 0x22FA; // ELEMENT OF WITH LONG HORIZONTAL STROKE
case 0x22F3: return 0x22FB; // ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
case 0x22F4: return 0x22FC; // SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
case 0x22F6: return 0x22FD; // ELEMENT OF WITH OVERBAR
case 0x22F7: return 0x22FE; // SMALL ELEMENT OF WITH OVERBAR
case 0xFF1C: return 0xFF1E; // FULLWIDTH LESS-THAN SIGN
}
break;
case ccPs:
switch (ch) {
case 0x5B: return 0x5D; // LEFT SQUARE BRACKET
case 0x7B: return 0x7D; // LEFT CURLY BRACKET
case 0x298D: return 0x2990; // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
case 0x298F: return 0x298E; // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
case 0xFF3B: return 0xFF3D; // FULLWIDTH LEFT SQUARE BRACKET
case 0xFF5B: return 0xFF5D; // FULLWIDTH LEFT CURLY BRACKET
}
break;
case ccPi:
break;
default: return 0;
}
return ch + 1;
}
/*
* IsValidQuoteOpener
* -
*/
bool IsValidQuoteOpener(const int ch, DelimPair &dp, int type = RAKUDELIM_BRACKET) noexcept {
dp.closer[0] = 0;
dp.closer[1] = 0;
dp.interpol = true;
if (type == RAKUDELIM_QUOTE) {
switch (ch) {
// Opener Closer Description
case '\'': dp.closer[0] = '\''; // APOSTROPHE
dp.interpol = false;
break;
case '"': dp.closer[0] = '"'; // QUOTATION MARK
break;
case 0x2018: dp.closer[0] = 0x2019; // LEFT SINGLE QUOTATION MARK
dp.interpol = false;
break;
case 0x201C: dp.closer[0] = 0x201D; // LEFT DOUBLE QUOTATION MARK
break;
case 0x201D: dp.closer[0] = 0x201C; // RIGHT DOUBLE QUOTATION MARK
break;
case 0x201E: dp.closer[0] = 0x201C; // DOUBLE LOW-9 QUOTATION MARK
dp.closer[1] = 0x201D;
break;
case 0xFF62: dp.closer[0] = 0xFF63; // HALFWIDTH LEFT CORNER BRACKET
dp.interpol = false;
break;
default: return false;
}
} else if (type == RAKUDELIM_BRACKET) {
dp.closer[0] = GetBracketCloseChar(ch);
}
dp.opener = ch;
dp.count = 1;
return dp.closer[0] > 0;
}
/*
* IsBracketOpenChar
* - true if this is a valid start bracket character
*/
bool IsBracketOpenChar(int ch) noexcept {
return GetBracketCloseChar(ch) > 0;
}
/*
* IsValidRegOrQAdjacent
* - returns true if ch is a valid character to put directly after Q / q
* * ref: Q Language: https://docs.raku.org/language/quoting
*/
bool IsValidRegOrQAdjacent(int ch) noexcept {
return !(IsAlphaNumeric(ch) || ch == '_' || ch == '(' || ch == ')' || ch == '\'' );
}
/*
* IsValidRegOrQPrecede
* - returns true if ch is a valid preceding character to put directly before Q / q
* * ref: Q Language: https://docs.raku.org/language/quoting
*/
bool IsValidRegOrQPrecede(int ch) noexcept {
return !(IsAlphaNumeric(ch) || ch == '_');
}
/*
* MatchCharInRange
* - returns true if the mach character is found in range (of length)
* - ignoreDelim (default false)
*/
bool MatchCharInRange(StyleContext &sc, const Sci_Position length,
const int match, bool ignoreDelim = false) {
Sci_Position len = 0;
int chPrev = sc.chPrev;
while (++len < length) {
const int ch = sc.GetRelativeCharacter(len);
if (ch == match && (ignoreDelim || chPrev != '\\'))
return true;
}
return false;
}
/*
* PrevNonWhitespaceChar
* - returns the last non-whitespace char
*/
int PrevNonWhitespaceChar(StyleContext &sc) {
Sci_Position rel = 0;
Sci_Position max_back = 0 - sc.currentPos;
while (--rel > max_back) {
const int ch = sc.GetRelativeCharacter(rel);
if (!IsAWhitespace(ch))
return ch;
}
return 0; // no matching char
}
/*
* IsQLangStartAtScPos
* - returns true if this is a valid Q Language sc position
* - ref: https://docs.raku.org/language/quoting
* - Q :adverb :adverb //;
* - q,qx,qw,qq,qqx,qqw,qqww :adverb /:adverb /;
*/
bool IsQLangStartAtScPos(StyleContext &sc, int &type, const Sci_Position length) {
const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
const int chFw2 = sc.GetRelativeCharacter(2);
const int chFw3 = sc.GetRelativeCharacter(3);
type = -1;
if (IsValidRegOrQPrecede(sc.chPrev)) {
if (sc.ch == 'Q' && valid_adj) {
type = RAKUTYPE_QLANG;
} else if (sc.ch == 'q') {
switch (sc.chNext) {
case 'x':
type = RAKUTYPE_STR_QX;
break;
case 'w':
type = RAKUTYPE_STR_QW;
break;
case 'q':
if (chFw2 == 'x') {
type = RAKUTYPE_STR_QQX;
} else if (chFw2 == 'w') {
if (chFw3 == 'w') {
type = RAKUTYPE_STR_QQWW;
} else {
type = RAKUTYPE_STR_QQW;
}
} else {
type = RAKUTYPE_STR_QQ;
}
break;
default:
type = RAKUTYPE_STR_Q;
}
} else if (sc.ch == '<' && MatchCharInRange(sc, length, '>')) {
type = RAKUTYPE_STR_WQ; // < word quote >
}
}
return type >= 0;
}
/*
* IsRegexStartAtScPos
* - returns true if this is a valid Regex sc position
* - ref: https://docs.raku.org/language/regexes
* - Regex: (rx/s/m/tr/y) :adverb /:adverb /;
* - regex R :adverb //;
* - /:adverb /;
*/
bool IsRegexStartAtScPos(StyleContext &sc, int &type, CharacterSet &set) {
const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
type = -1;
if (IsValidRegOrQPrecede(sc.chPrev)) {
switch (sc.ch) {
case 'r':
if (sc.chNext == 'x')
type = RAKUTYPE_REGEX_RX;
break;
case 't':
case 'T':
if (sc.chNext == 'r' || sc.chNext == 'R')
type = RAKUTYPE_REGEX_TR;
break;
case 'm':
if (valid_adj)
type = RAKUTYPE_REGEX_M;
break;
case 's':
case 'S':
if (valid_adj)
type = RAKUTYPE_REGEX_S;
break;
case 'y':
if (valid_adj)
type = RAKUTYPE_REGEX_Y;
break;
case '/':
if (set.Contains(PrevNonWhitespaceChar(sc)))
type = RAKUTYPE_REGEX_NORM;
}
}
return type >= 0;
}
/*
* IsValidIdentPrecede
* - returns if ch is a valid preceding char to put directly before an identifier
*/
bool IsValidIdentPrecede(int ch) noexcept {
return !(IsAlphaNumeric(ch) || ch == '_' || ch == '@' || ch == '$' || ch == '%');
}
/*
* IsValidDelimiter
* - returns if ch is a valid delimiter (most chars are valid)
* * ref: Q Language: https://docs.raku.org/language/quoting
*/
bool IsValidDelimiter(int ch) noexcept {
return !(IsAlphaNumeric(ch) || ch == ':');
}
/*
* GetDelimiterCloseChar
* - returns the corresponding close char for a given delimiter (could be the same char)
*/
int GetDelimiterCloseChar(int ch) noexcept {
int ch_end = GetBracketCloseChar(ch);
if (ch_end == 0 && IsValidDelimiter(ch)) {
ch_end = ch;
}
return ch_end;
}
/*
* GetRepeatCharCount
* - returns the occurrence count of match
*/
Sci_Position GetRepeatCharCount(StyleContext &sc, int chMatch, Sci_Position length) {
Sci_Position cnt = 0;
while (cnt < length) {
if (sc.GetRelativeCharacter(cnt) != chMatch) {
break;
}
cnt++;
}
return cnt;
}
/*
* LengthToDelimiter
* - returns the length until the end of a delimited string section
* - Ignores nested delimiters (if opener != closer)
* - no trailing char after last closer (default false)
*/
Sci_Position LengthToDelimiter(StyleContext &sc, const DelimPair &dp,
Sci_Position length, bool noTrailing = false) {
short cnt_open = 0; // count open bracket
short cnt_close = 0; // count close bracket
bool is_escape = false; // has been escaped using '\'?
Sci_Position len = 0; // count characters
int chOpener = dp.opener; // look for nested opener / closer
if (dp.opener == dp.closer[0])
chOpener = 0; // no opening delimiter (no nesting possible)
while (len < length) {
const int chPrev = sc.GetRelativeCharacter(len - 1);
const int ch = sc.GetRelativeCharacter(len);
const int chNext = sc.GetRelativeCharacter(len+1);
if (cnt_open == 0 && cnt_close == dp.count) {
return len; // end condition has been met
} else if (is_escape) {
is_escape = false;
} else if (ch == '\\') {
is_escape = true;
} else {
if (ch == chOpener) {
cnt_open++; // open nested bracket
} else if (dp.isCloser(ch)) {
if ( cnt_open > 0 ) {
cnt_open--; // close nested bracket
} else if (dp.count > 1 && cnt_close < (dp.count - 1)) {
if (cnt_close > 1) {
if (dp.isCloser(chPrev)) {
cnt_close++;
} else { // reset if previous char was not close
cnt_close = 0;
}
} else {
cnt_close++;
}
} else if (!noTrailing || (IsAWhitespace(chNext))) {
cnt_close++; // found last close
if (cnt_close > 1 && !dp.isCloser(chPrev)) {
cnt_close = 0; // reset if previous char was not close
}
} else {
cnt_close = 0; // non handled close: reset
}
} else if (IsANewLine(ch)) {
cnt_open = 0; // reset after each line
cnt_close = 0;
}
}
len++;
}
return -1; // end condition has NOT been met
}
/*
* LengthToEndHeredoc
* - returns the length until the end of a heredoc section
* - delimiter string MUST begin on a new line
*/
Sci_Position LengthToEndHeredoc(const StyleContext &sc, LexAccessor &styler,
const Sci_Position length, const char *delim) {
bool on_new_ln = false;
int i = 0; // str index
for (int n = 0; n < length; n++) {
const char ch = styler.SafeGetCharAt(sc.currentPos + n, 0);
if (on_new_ln) {
if (delim[i] == '\0')
return n; // at end of str, match found!
if (ch != delim[i++])
i = 0; // no char match, reset 'i'ndex
}
if (i == 0) // detect new line
on_new_ln = IsANewLine(ch);
}
return -1; // no match found
}
/*
* LengthToNextChar
* - returns the length until the next character
*/
Sci_Position LengthToNextChar(StyleContext &sc, const Sci_Position length) {
Sci_Position len = 0;
while (++len < length) {
const int ch = sc.GetRelativeCharacter(len);
if (!IsASpaceOrTab(ch) && !IsANewLine(ch)) {
break;
}
}
return len;
}
/*
* GetRelativeString
* - gets a relative string and sets it in &str
* - resets string before setting
*/
void GetRelativeString(StyleContext &sc, Sci_Position offset, Sci_Position length,
std::string &str) {
Sci_Position pos = offset;
str.clear();
while (pos < length) {
str += sc.GetRelativeCharacter(pos++);
}
}
} // end anonymous namespace
/*----------------------------------------------------------------------------*
* --- class: LexerRaku ---
*----------------------------------------------------------------------------*/
//class LexerRaku : public ILexerWithMetaData {
class LexerRaku : public DefaultLexer {
CharacterSet setWord;
CharacterSet setSigil;
CharacterSet setTwigil;
CharacterSet setOperator;
CharacterSet setSpecialVar;
WordList regexIdent; // identifiers that specify a regex
OptionsRaku options; // Options from config
OptionSetRaku osRaku;
WordList keywords; // Word Lists from config
WordList functions;
WordList typesBasic;
WordList typesComposite;
WordList typesDomainSpecific;
WordList typesExceptions;
WordList adverbs;
public:
// Defined as explicit, so that constructor can not be copied
explicit LexerRaku() :
DefaultLexer("raku", SCLEX_RAKU),
setWord(CharacterSet::setAlphaNum, "-_", 0x80),
setSigil(CharacterSet::setNone, "$&%@"),
setTwigil(CharacterSet::setNone, "!*.:<=?^~"),
setOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;<>,?!.~"),
setSpecialVar(CharacterSet::setNone, "_/!") {
regexIdent.Set("regex rule token");
}
// Deleted so LexerRaku objects can not be copied.
LexerRaku(const LexerRaku &) = delete;
LexerRaku(LexerRaku &&) = delete;
void operator=(const LexerRaku &) = delete;
void operator=(LexerRaku &&) = delete;
virtual ~LexerRaku() {
}
void SCI_METHOD Release() noexcept override {
delete this;
}
int SCI_METHOD Version() const noexcept override {
return lvRelease5;
}
const char *SCI_METHOD PropertyNames() override {
return osRaku.PropertyNames();
}
int SCI_METHOD PropertyType(const char *name) override {
return osRaku.PropertyType(name);
}
const char *SCI_METHOD DescribeProperty(const char *name) override {
return osRaku.DescribeProperty(name);
}
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
const char *SCI_METHOD PropertyGet(const char *key) override {
return osRaku.PropertyGet(key);
}
const char *SCI_METHOD DescribeWordListSets() override {
return osRaku.DescribeWordListSets();
}
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
static ILexer5 *LexerFactoryRaku() {
return new LexerRaku();
}
protected:
bool IsOperatorChar(const int ch);
bool IsWordChar(const int ch, bool allowNumber = true);
bool IsWordStartChar(const int ch);
bool IsNumberChar(const int ch, int base = 10);
bool ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
int &type, const DelimPair &dp);
void ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState);
bool ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
WordList &wordsAdverbs, DelimPair &dp);
Sci_Position LengthToNonWordChar(StyleContext &sc, Sci_Position length,
char *s, const int size, Sci_Position offset = 0);
};
/*----------------------------------------------------------------------------*
* --- METHODS: LexerRaku ---
*----------------------------------------------------------------------------*/
/*
* LexerRaku::IsOperatorChar
* - Test for both ASCII and Unicode operators
* see: https://docs.raku.org/language/unicode_entry
*/
bool LexerRaku::IsOperatorChar(const int ch) {
if (ch > 0x7F) {
switch (ch) {
// Unicode ASCII Equiv.
case 0x2208: // (elem)
case 0x2209: // !(elem)
case 0x220B: // (cont)
case 0x220C: // !(cont)
case 0x2216: // (-)
case 0x2229: // (&)
case 0x222A: // (|)
case 0x2282: // (<)
case 0x2283: // (>)
case 0x2284: // !(<)
case 0x2285: // !(>)
case 0x2286: // (<=)
case 0x2287: // (>=)
case 0x2288: // !(<=)
case 0x2289: // !(>=)
case 0x228D: // (.)
case 0x228E: // (+)
case 0x2296: // (^)
return true;
}
}
return setOperator.Contains(ch);
}
/*
* LexerRaku::IsWordChar
* - Test for both ASCII and Unicode identifier characters
* see: https://docs.raku.org/language/unicode_ascii
* also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
* FIXME: *still* may not contain all valid characters
*/
bool LexerRaku::IsWordChar(const int ch, bool allowNumber) {
// Unicode numbers should not appear in word identifiers
if (ch > 0x7F) {
const CharacterCategory cc = CategoriseCharacter(ch);
switch (cc) {
// Letters
case ccLu:
case ccLl:
case ccLt:
case ccLm:
case ccLo:
return true;
default:
return false;
}
} else if (allowNumber && IsADigit(ch)) {
return true; // an ASCII number type
}
return setWord.Contains(ch);
}
/*
* LexerRaku::IsWordStartChar
* - Test for both ASCII and Unicode identifier "start / first" characters
*/
bool LexerRaku::IsWordStartChar(const int ch) {
return ch != '-' && IsWordChar(ch, false); // no numbers allowed
}
/*
* LexerRaku::IsNumberChar
* - Test for both ASCII and Unicode identifier number characters
* see: https://docs.raku.org/language/unicode_ascii
* also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
* FILTERED by Unicode letters that are NUMBER
* and NOT PARENTHESIZED or CIRCLED
* FIXME: *still* may not contain all valid number characters
*/
bool LexerRaku::IsNumberChar(const int ch, int base) {
if (ch > 0x7F) {
const CharacterCategory cc = CategoriseCharacter(ch);
switch (cc) {
// Numbers
case ccNd:
case ccNl:
case ccNo:
return true;
default:
return false;
}
}
return IsADigit(ch, base);
}
/*
* LexerRaku::PropertySet
* -
*/
Sci_Position SCI_METHOD LexerRaku::PropertySet(const char *key, const char *val) {
if (osRaku.PropertySet(&options, key, val))
return 0;
return -1;
}
/*
* LexerRaku::WordListSet
* -
*/
Sci_Position SCI_METHOD LexerRaku::WordListSet(int n, const char *wl) {
WordList *wordListN = nullptr;
switch (n) {
case 0:
wordListN = &keywords;
break;
case 1:
wordListN = &functions;
break;
case 2:
wordListN = &typesBasic;
break;
case 3:
wordListN = &typesComposite;
break;
case 4:
wordListN = &typesDomainSpecific;
break;
case 5:
wordListN = &typesExceptions;
break;
case 6:
wordListN = &adverbs;
break;
}
Sci_Position firstModification = -1;
if (wordListN) {
if (wordListN->Set(wl)) {
firstModification = 0;
}
}
return firstModification;
}
/*
* LexerRaku::ProcessRegexTwinCapture
* - processes the transition between a regex pair (two sets of delimiters)
* - moves to first new delimiter, if a bracket
* - returns true when valid delimiter start found (if bracket)
*/
bool LexerRaku::ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
int &type, const DelimPair &dp) {
if (type == RAKUTYPE_REGEX_S || type == RAKUTYPE_REGEX_TR || type == RAKUTYPE_REGEX_Y) {
type = -1; // clear type
// move past chRegQClose if it was the previous char
if (dp.isCloser(sc.chPrev))
sc.Forward();
// no processing needed for non-bracket
if (dp.isCloser(dp.opener))
return true;
// move to next opening bracket
const Sci_Position len = LengthToNextChar(sc, length);
if (sc.GetRelativeCharacter(len) == dp.opener) {
sc.Forward(len);
return true;
}
}
return false;
}
/*
* LexerRaku::ProcessStringVars
* - processes a string and highlights any valid variables
*/
void LexerRaku::ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState) {
const int state = sc.state;
for (Sci_Position pos = 0; pos < length; pos++) {
if (sc.state == varState && !IsWordChar(sc.ch)) {
sc.SetState(state);
} else if (sc.chPrev != '\\'
&& (sc.ch == '$' || sc.ch == '@')
&& IsWordStartChar(sc.chNext)) {
sc.SetState(varState);
}
sc.Forward(); // Next character
}
}
/*
* LexerRaku::ProcessValidRegQlangStart
* - processes a section of the document range from after a Regex / Q delimiter
* - returns true on success
* - sets: adverbs, chOpen, chClose, chCount
* ref: https://docs.raku.org/language/regexes
*/
bool LexerRaku::ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
WordList &wordsAdverbs, DelimPair &dp) {
Sci_Position startPos = sc.currentPos;
Sci_Position startLen = length;
const int target_state = sc.state;
int state = SCE_RAKU_DEFAULT;
std::string str;
// find our opening delimiter (and occurrences) / save any adverbs
dp.opener = 0; // adverbs can be after the first delimiter
bool got_all_adverbs = false; // in Regex statements
bool got_ident = false; // regex can have an identifier: 'regex R'
sc.SetState(state); // set state default to avoid pre-highlights
while ((dp.opener == 0 || !got_all_adverbs) && sc.More()) {
// move to the next non-space character
const bool was_space = IsAWhitespace(sc.ch);
if (!got_all_adverbs && was_space) {
sc.Forward(LengthToNextChar(sc, length));
}
length = startLen - (sc.currentPos - startPos); // update length remaining
// parse / eat an identifier (if type == RAKUTYPE_REGEX)
if (dp.opener == 0 && !got_ident && type == RAKUTYPE_REGEX && IsAlphabet(sc.ch)) {
// eat identifier / account for special adverb :sym<name>
bool got_sym = false;
while (sc.More()) {
sc.SetState(SCE_RAKU_IDENTIFIER);
while (sc.More() && (IsAlphaNumeric(sc.chNext)
|| sc.chNext == '_' || sc.chNext == '-')) {
sc.Forward();
}
sc.Forward();
if (got_sym && sc.ch == '>') {
sc.SetState(SCE_RAKU_OPERATOR); // '>'
sc.Forward();
break;
} else if (type == RAKUTYPE_REGEX && sc.Match(":sym<")) {
sc.SetState(SCE_RAKU_ADVERB); // ':sym'
sc.Forward(4);
sc.SetState(SCE_RAKU_OPERATOR); // '<'
sc.Forward();
got_sym = true;
} else {
break;
}
}
sc.SetState(state);
got_ident = true;
}
// parse / save an adverb: RAKUTYPE_REGEX only has adverbs after delim
// >= RAKUTYPE_QLANG only has adverbs before delim
else if (!got_all_adverbs && sc.ch == ':' && (!(dp.opener == 0 && got_ident)
&& !(dp.opener > 0 && type >= RAKUTYPE_QLANG))) {
sc.SetState(SCE_RAKU_ADVERB);
while (IsAlphaNumeric(sc.chNext) && sc.More()) {
sc.Forward();
str += sc.ch;
}
str += ' ';
sc.Forward();
sc.SetState(state);
}
// find starting delimiter
else if (dp.opener == 0 && (was_space || IsValidRegOrQAdjacent(sc.ch))
&& IsValidDelimiter(sc.ch)) { // make sure the delimiter is legal (most are)
sc.SetState((state = target_state));// start state here...
dp.opener = sc.ch; // this is our delimiter, get count
if (type < RAKUTYPE_QLANG) // type is Regex
dp.count = 1; // has only one delimiter
else
dp.count = GetRepeatCharCount(sc, dp.opener, length);
sc.Forward(dp.count);
}
// we must have all the adverbs by now...
else {
if (got_all_adverbs)
break; // prevent infinite loop: occurs on missing open char
got_all_adverbs = true;
}
}
// set word list / find a valid closing delimiter (or bomb!)
wordsAdverbs.Set(str.c_str());
dp.closer[0] = GetDelimiterCloseChar(dp.opener);
dp.closer[1] = 0; // no other closer char
return dp.closer[0] > 0;
}
/*
* LexerRaku::LengthToNonWordChar
* - returns the length until the next non "word" character: AlphaNum + '_'
* - also sets all the parsed chars in 's'
*/
Sci_Position LexerRaku::LengthToNonWordChar(StyleContext &sc, Sci_Position length,
char *s, const int size, Sci_Position offset) {
Sci_Position len = 0;
Sci_Position max_length = size < length ? size : length;
while (len <= max_length) {
const int ch = sc.GetRelativeCharacter(len + offset);
if (!IsWordChar(ch)) {
s[len] = '\0';
break;
}
s[len] = ch;
len++;
}
s[len + 1] = '\0';
return len;
}
/*
* LexerRaku::Lex
* - Main lexer method
*/
void SCI_METHOD LexerRaku::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
LexAccessor styler(pAccess);
DelimPair dpEmbeded; // delimiter pair: embedded comments
DelimPair dpString; // delimiter pair: string
DelimPair dpRegQ; // delimiter pair: Regex / Q Lang
std::string hereDelim; // heredoc delimiter (if in heredoc)
int hereState = 0; // heredoc state to use (Q / QQ)
int numState = 0; // number state / type
short cntDecimal = 0; // number decimal count
std::string wordLast; // last word seen
std::string identLast; // last identifier seen
std::string adverbLast; // last (single) adverb seen
WordList lastAdverbs; // last adverbs seen
Sci_Position len; // temp length value
char s[100]; // temp char string
int typeDetect = -1; // temp type detected (for regex and Q lang)
Sci_Position lengthToEnd; // length until the end of range
// Backtrack to safe start position before complex quoted elements
Sci_PositionU newStartPos = startPos;
if (initStyle != SCE_RAKU_DEFAULT) {
// Backtrack to last SCE_RAKU_DEFAULT or 0
while (newStartPos > 0) {
newStartPos--;
if (styler.StyleAt(newStartPos) == SCE_RAKU_DEFAULT)
break;
}
// Backtrack to start of line before SCE_RAKU_HEREDOC_Q?
if (initStyle == SCE_RAKU_HEREDOC_Q || initStyle == SCE_RAKU_HEREDOC_QQ) {
if (newStartPos > 0) {
newStartPos = styler.LineStart(styler.GetLine(newStartPos));
}
}
} else {
const Sci_Position line = styler.GetLine(newStartPos);
if (line > 0) {
// If the previous line is a start of a q or qq heredoc, backtrack to start of line
const Sci_Position startPreviousLine = styler.LineStart(line-1);
if (ContainsQTo(startPreviousLine, newStartPos, styler)) {
newStartPos = startPreviousLine;
}
}
}
// Re-calculate (any) changed startPos, length and initStyle state
if (newStartPos < startPos) {
initStyle = SCE_RAKU_DEFAULT;
length += startPos - newStartPos;
startPos = newStartPos;
}
// init StyleContext
StyleContext sc(startPos, length, initStyle, styler);
// StyleContext Loop
for (; sc.More(); sc.Forward()) {
lengthToEnd = (length - (sc.currentPos - startPos)); // end of range
/* *** Determine if the current state should terminate ************** *
* Everything within the 'switch' statement processes characters up
* until the end of a syntax highlight section / state.
* ****************************************************************** */
switch (sc.state) {
case SCE_RAKU_OPERATOR:
sc.SetState(SCE_RAKU_DEFAULT);
break; // FIXME: better valid operator sequences needed?
case SCE_RAKU_COMMENTLINE:
if (IsANewLine(sc.ch)) {
sc.SetState(SCE_RAKU_DEFAULT);
}
break;
case SCE_RAKU_COMMENTEMBED:
if ((len = LengthToDelimiter(sc, dpEmbeded, lengthToEnd)) >= 0) {
sc.Forward(len); // Move to end delimiter
sc.SetState(SCE_RAKU_DEFAULT);
} else {
sc.Forward(lengthToEnd); // no end delimiter found
}
break;
case SCE_RAKU_POD:
if (sc.atLineStart && sc.Match("=end pod")) {
sc.Forward(8);
sc.SetState(SCE_RAKU_DEFAULT);
}
break;
case SCE_RAKU_STRING:
// Process the string for variables: move to end delimiter
if ((len = LengthToDelimiter(sc, dpString, lengthToEnd)) >= 0) {
if (dpString.interpol) {
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
} else {
sc.Forward(len);
}
sc.SetState(SCE_RAKU_DEFAULT);
} else {
sc.Forward(lengthToEnd); // no end delimiter found
}
break;
case SCE_RAKU_STRING_Q:
case SCE_RAKU_STRING_QQ:
case SCE_RAKU_STRING_Q_LANG:
// No string: previous char was the delimiter
if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) {
sc.SetState(SCE_RAKU_DEFAULT);
}
// Process the string for variables: move to end delimiter
else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) {
// set (any) heredoc delimiter string
if (lastAdverbs.InList("to")) {
GetRelativeString(sc, -1, len - dpRegQ.count, hereDelim);
hereState = SCE_RAKU_HEREDOC_Q; // default heredoc state
}
// select variable identifiers
if (sc.state == SCE_RAKU_STRING_QQ || lastAdverbs.InList("qq")) {
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
hereState = SCE_RAKU_HEREDOC_QQ; // potential heredoc state
} else {
sc.Forward(len);
}
sc.SetState(SCE_RAKU_DEFAULT);
} else {
sc.Forward(lengthToEnd); // no end delimiter found
}
break;
case SCE_RAKU_HEREDOC_Q:
case SCE_RAKU_HEREDOC_QQ:
if ((len = LengthToEndHeredoc(sc, styler, lengthToEnd, hereDelim.c_str())) >= 0) {
// select variable identifiers
if (sc.state == SCE_RAKU_HEREDOC_QQ) {
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
} else {
sc.Forward(len);
}
sc.SetState(SCE_RAKU_DEFAULT);
} else {
sc.Forward(lengthToEnd); // no end delimiter found
}
hereDelim.clear(); // clear heredoc delimiter
break;
case SCE_RAKU_REGEX:
// account for typeDetect = RAKUTYPE_REGEX_S/TR/Y
while (sc.state == SCE_RAKU_REGEX) {
// No string: previous char was the delimiter
if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) {
if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
continue;
sc.SetState(SCE_RAKU_DEFAULT);
break;
}
// Process the string for variables: move to end delimiter
else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) {
ProcessStringVars(sc, len, SCE_RAKU_REGEX_VAR);
if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
continue;
sc.SetState(SCE_RAKU_DEFAULT);
break;
} else {
sc.Forward(lengthToEnd); // no end delimiter found
break;
}
}
break;
case SCE_RAKU_NUMBER:
if (sc.ch == '.') {
if (sc.chNext == '.') { // '..' is an operator
sc.SetState(SCE_RAKU_OPERATOR);
sc.Forward();
if (sc.chNext == '.') // '...' is also an operator
sc.Forward();
break;
} else if (numState > RAKUNUM_FLOAT_EXP
&& (cntDecimal < 1 || numState == RAKUNUM_VERSION)) {
cntDecimal++;
sc.Forward();
} else {
sc.SetState(SCE_RAKU_DEFAULT);
break; // too many decimal places
}
}
switch (numState) {
case RAKUNUM_BINARY:
if (!IsNumberChar(sc.ch, 2))
sc.SetState(SCE_RAKU_DEFAULT);
break;
case RAKUNUM_OCTAL:
if (!IsNumberChar(sc.ch, 8))
sc.SetState(SCE_RAKU_DEFAULT);
break;
case RAKUNUM_HEX:
if (!IsNumberChar(sc.ch, 16))
sc.SetState(SCE_RAKU_DEFAULT);
break;
case RAKUNUM_DECIMAL:
case RAKUNUM_VERSION:
if (!IsNumberChar(sc.ch))
sc.SetState(SCE_RAKU_DEFAULT);
}
break;
case SCE_RAKU_WORD:
case SCE_RAKU_FUNCTION:
case SCE_RAKU_TYPEDEF:
case SCE_RAKU_ADVERB:
sc.SetState(SCE_RAKU_DEFAULT);
break;
case SCE_RAKU_MU:
case SCE_RAKU_POSITIONAL:
case SCE_RAKU_ASSOCIATIVE:
case SCE_RAKU_CALLABLE:
case SCE_RAKU_IDENTIFIER:
case SCE_RAKU_GRAMMAR:
case SCE_RAKU_CLASS:
sc.SetState(SCE_RAKU_DEFAULT);
break;
}
/* *** Determine if a new state should be entered ******************* *
* Everything below here identifies the beginning of a state, all or part
* of the characters within this state are processed here, the rest are
* completed above in the terminate state section.
* ****************************************************************** */
if (sc.state == SCE_RAKU_DEFAULT) {
// --- Single line comment
if (sc.ch == '#') {
sc.SetState(SCE_RAKU_COMMENTLINE);
}
// --- POD block
else if (sc.atLineStart && sc.Match("=begin pod")) {
sc.SetState(SCE_RAKU_POD);
sc.Forward(10);
}
// --- String (normal)
else if (sc.chPrev != '\\' && (IsValidQuoteOpener(sc.ch, dpString, RAKUDELIM_QUOTE))) {
sc.SetState(SCE_RAKU_STRING);
}
// --- String (Q Language) ----------------------------------------
// - https://docs.raku.org/language/quoting
// - Q :adverb :adverb //;
// - q,qx,qw,qq,qqx,qqw,qqww :adverb :adverb //;
else if (IsQLangStartAtScPos(sc, typeDetect, lengthToEnd)) {
int state = SCE_RAKU_STRING_Q_LANG;
Sci_Position forward = 1; // single char ident (default)
if (typeDetect > RAKUTYPE_QLANG) {
state = SCE_RAKU_STRING_Q;
if (typeDetect == RAKUTYPE_STR_WQ)
forward = 0; // no char ident
}
if (typeDetect > RAKUTYPE_STR_Q) {
if (typeDetect == RAKUTYPE_STR_QQ)
state = SCE_RAKU_STRING_QQ;
forward++; // two char ident
}
if (typeDetect > RAKUTYPE_STR_QQ)
forward++; // three char ident
if (typeDetect == RAKUTYPE_STR_QQWW)
forward++; // four char ident
// Proceed: check for a valid character after statement
if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_QLANG) {
sc.SetState(state);
sc.Forward(forward);
lastAdverbs.Clear();
// Process: adverbs / opening delimiter / adverbs after delim
if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
lastAdverbs, dpRegQ))
sc.SetState(state);
}
}
// --- Regex (rx/s/m/tr/y) ----------------------------------------
// - https://docs.raku.org/language/regexes
else if ((IsRegexStartAtScPos(sc, typeDetect, setOperator) || regexIdent.InList(wordLast.c_str()))) {
if (typeDetect == -1) { // must be a regex identifier word
wordLast.clear();
typeDetect = RAKUTYPE_REGEX;
}
Sci_Position forward = 0; // no ident (RAKUTYPE_REGEX, RAKUTYPE_REGEX_NORM)
if (typeDetect > 0 && typeDetect != RAKUTYPE_REGEX)
forward++; // single char ident
if (typeDetect > RAKUTYPE_REGEX)
forward++; // two char ident
// Proceed: check for a valid character after statement
if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_REGEX_NORM) {
sc.SetState(SCE_RAKU_REGEX);
sc.Forward(forward);
lastAdverbs.Clear();
// Process: adverbs / opening delimiter / adverbs after delim
if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
lastAdverbs, dpRegQ))
sc.SetState(SCE_RAKU_REGEX);
}
}
// --- Numbers ----------------------------------------------------
else if (IsValidIdentPrecede(sc.chPrev) && (IsNumberChar(sc.ch)
|| (sc.ch == 'v' && IsNumberChar(sc.chNext) && wordLast == "use"))) {
numState = RAKUNUM_DECIMAL; // default: decimal (base 10)
cntDecimal = 0;
sc.SetState(SCE_RAKU_NUMBER);
if (sc.ch == 'v') // forward past 'v'
sc.Forward();
if (wordLast == "use") { // package version number
numState = RAKUNUM_VERSION;
} else if (sc.ch == '0') { // other type of number
switch (sc.chNext) {
case 'b': // binary (base 2)
numState = RAKUNUM_BINARY;
break;
case 'o': // octal (base 8)
numState = RAKUNUM_OCTAL;
break;
case 'x': // hexadecimal (base 16)
numState = RAKUNUM_HEX;
}
if (numState != RAKUNUM_DECIMAL)
sc.Forward(); // forward to number type char
}
}
// --- Keywords / functions / types / barewords -------------------
else if ((sc.currentPos == 0 || sc.atLineStart || IsValidIdentPrecede(sc.chPrev))
&& IsWordStartChar(sc.ch)) {
len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s));
if (keywords.InList(s)) {
sc.SetState(SCE_RAKU_WORD); // Keywords
} else if(functions.InList(s)) {
sc.SetState(SCE_RAKU_FUNCTION); // Functions
} else if(typesBasic.InList(s)) {
sc.SetState(SCE_RAKU_TYPEDEF); // Types (basic)
} else if(typesComposite.InList(s)) {
sc.SetState(SCE_RAKU_TYPEDEF); // Types (composite)
} else if(typesDomainSpecific.InList(s)) {
sc.SetState(SCE_RAKU_TYPEDEF); // Types (domain-specific)
} else if(typesExceptions.InList(s)) {
sc.SetState(SCE_RAKU_TYPEDEF); // Types (exceptions)
} else {
if (wordLast == "class")
sc.SetState(SCE_RAKU_CLASS); // a Class ident
else if (wordLast == "grammar")
sc.SetState(SCE_RAKU_GRAMMAR); // a Grammar ident
else
sc.SetState(SCE_RAKU_IDENTIFIER); // Bareword
identLast = s; // save identifier
}
if (adverbLast == "sym") { // special adverb ":sym"
sc.SetState(SCE_RAKU_IDENTIFIER); // treat as identifier
identLast = s; // save identifier
}
if (sc.state != SCE_RAKU_IDENTIFIER)
wordLast = s; // save word
sc.Forward(len - 1); // ...forward past word
}
// --- Adverbs ----------------------------------------------------
else if (sc.ch == ':' && IsWordStartChar(sc.chNext)) {
len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s), 1);
if (adverbs.InList(s)) {
sc.SetState(SCE_RAKU_ADVERB); // Adverbs (begin with ':')
adverbLast = s; // save word
sc.Forward(len); // ...forward past word (less offset: 1)
}
}
// --- Identifiers: $mu / @positional / %associative / &callable --
// see: https://docs.raku.org/language/variables
else if (setSigil.Contains(sc.ch) && (setTwigil.Contains(sc.chNext)
|| setSpecialVar.Contains(sc.chNext)
|| IsWordStartChar(sc.chNext))) {
// State based on sigil
switch (sc.ch) {
case '$': sc.SetState(SCE_RAKU_MU);
break;
case '@': sc.SetState(SCE_RAKU_POSITIONAL);
break;
case '%': sc.SetState(SCE_RAKU_ASSOCIATIVE);
break;
case '&': sc.SetState(SCE_RAKU_CALLABLE);
}
const int state = sc.state;
sc.Forward();
char ch_delim = 0;
if (setSpecialVar.Contains(sc.ch)
&& !setWord.Contains(sc.chNext)) { // Process Special Var
ch_delim = -1;
} else if (setTwigil.Contains(sc.ch)) { // Process Twigil
sc.SetState(SCE_RAKU_OPERATOR);
if (sc.ch == '<' && setWord.Contains(sc.chNext))
ch_delim = '>';
sc.Forward();
sc.SetState(state);
}
// Process (any) identifier
if (ch_delim >= 0) {
sc.Forward(LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)) - 1);
if (ch_delim > 0 && sc.chNext == ch_delim) {
sc.Forward();
sc.SetState(SCE_RAKU_OPERATOR);
}
identLast = s; // save identifier
}
}
// --- Operators --------------------------------------------------
else if (IsOperatorChar(sc.ch)) {
// FIXME: better valid operator sequences needed?
sc.SetState(SCE_RAKU_OPERATOR);
}
// --- Heredoc: begin ---------------------------------------------
else if (!hereDelim.empty() && sc.atLineEnd) {
if (IsANewLine(sc.ch))
sc.Forward(); // skip a possible CRLF situation
sc.SetState(hereState);
}
// Reset words: on operator semi-colon OR '}' (end of statement)
if (sc.state == SCE_RAKU_OPERATOR && (sc.ch == ';' || sc.ch == '}')) {
wordLast.clear();
identLast.clear();
adverbLast.clear();
}
}
/* *** Determine if an "embedded comment" is to be entered ********** *
* This type of embedded comment section, or multi-line comment comes
* after a normal comment has begun... e.g: #`[ ... ]
* ****************************************************************** */
else if (sc.state == SCE_RAKU_COMMENTLINE && sc.chPrev == '#' && sc.ch == '`') {
if (IsBracketOpenChar(sc.chNext)) {
sc.Forward(); // Condition met for "embedded comment"
dpEmbeded.opener = sc.ch;
// Find the opposite (termination) closing bracket (if any)
dpEmbeded.closer[0] = GetBracketCloseChar(dpEmbeded.opener);
if (dpEmbeded.closer[0] > 0) { // Enter "embedded comment"
// Find multiple opening character occurrence
dpEmbeded.count = GetRepeatCharCount(sc, dpEmbeded.opener, lengthToEnd);
sc.SetState(SCE_RAKU_COMMENTEMBED);
sc.Forward(dpEmbeded.count - 1); // incremented in the next loop
}
}
}
}
// And we're done...
sc.Complete();
}
/*
* LexerRaku::Lex
* - Main fold method
* NOTE: although Raku uses and supports UNICODE characters, we're only looking
* at normal chars here, using 'SafeGetCharAt' - for folding purposes
* that is all we need.
*/
#define RAKU_HEADFOLD_SHIFT 4
#define RAKU_HEADFOLD_MASK 0xF0
void SCI_METHOD LexerRaku::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
// init LexAccessor / return if fold option is off
if (!options.fold) return;
LexAccessor styler(pAccess);
// init char and line positions
const Sci_PositionU endPos = startPos + length;
Sci_Position lineCurrent = styler.GetLine(startPos);
// Backtrack to last SCE_RAKU_DEFAULT line
if (startPos > 0 && lineCurrent > 0) {
while (lineCurrent > 0 && styler.StyleAt(startPos) != SCE_RAKU_DEFAULT) {
lineCurrent--;
startPos = styler.LineStart(lineCurrent);
}
lineCurrent = styler.GetLine(startPos);
}
Sci_PositionU lineStart = startPos;
Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1);
// init line folding level
int levelPrev = SC_FOLDLEVELBASE;
if (lineCurrent > 0)
levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
int levelCurrent = levelPrev;
// init char and style variables
char chNext = styler[startPos];
int stylePrev = styler.StyleAt(startPos - 1);
int styleNext = styler.StyleAt(startPos);
int styleNextStartLine = styler.StyleAt(lineStartNext);
int visibleChars = 0;
bool wasCommentMulti = false;
// main loop
for (Sci_PositionU i = startPos; i < endPos; i++) {
// next char, style and flags
const char ch = chNext;
chNext = styler.SafeGetCharAt(i + 1);
const int style = styleNext;
styleNext = styler.StyleAt(i + 1);
const bool atEOL = i == (lineStartNext - 1);
const bool atLineStart = i == lineStart;
// --- Comments / Multi-line / POD ------------------------------------
if (options.foldComment) {
// Multi-line
if (options.foldCommentMultiline) {
if (style == SCE_RAKU_COMMENTLINE && atLineStart && ch == '#' && chNext == '`'
&& styleNextStartLine == SCE_RAKU_COMMENTEMBED) {
levelCurrent++;
wasCommentMulti = true; // don't confuse line comments
} else if (style == SCE_RAKU_COMMENTEMBED && atLineStart
&& styleNextStartLine != SCE_RAKU_COMMENTEMBED) {
levelCurrent--;
}
}
// Line comments
if (!wasCommentMulti && atEOL && stylePrev == SCE_RAKU_COMMENTLINE
&& IsCommentLine(lineCurrent, styler)) {
if (!IsCommentLine(lineCurrent - 1, styler)
&& IsCommentLine(lineCurrent + 1, styler))
levelCurrent++;
else if (IsCommentLine(lineCurrent - 1, styler)
&& !IsCommentLine(lineCurrent + 1, styler))
levelCurrent--;
}
// POD
if (options.foldCommentPOD && atLineStart && style == SCE_RAKU_POD) {
if (styler.Match(i, "=begin"))
levelCurrent++;
else if (styler.Match(i, "=end"))
levelCurrent--;
}
}
// --- Code block -----------------------------------------------------
if (style == SCE_RAKU_OPERATOR) {
if (ch == '{') {
if (levelCurrent < levelPrev) levelPrev--;
levelCurrent++;
} else if (ch == '}') {
levelCurrent--;
}
}
// --- at end of line / range / apply fold ----------------------------
if (atEOL) {
int level = levelPrev;
// set level flags
level |= levelCurrent << 16;
if (visibleChars == 0 && options.foldCompact)
level |= SC_FOLDLEVELWHITEFLAG;
if ((levelCurrent > levelPrev) && (visibleChars > 0))
level |= SC_FOLDLEVELHEADERFLAG;
if (level != styler.LevelAt(lineCurrent)) {
styler.SetLevel(lineCurrent, level);
}
lineCurrent++;
lineStart = lineStartNext;
lineStartNext = styler.LineStart(lineCurrent + 1);
styleNextStartLine = styler.StyleAt(lineStartNext);
levelPrev = levelCurrent;
visibleChars = 0;
wasCommentMulti = false;
}
// increment visibleChars / set previous char
if (!isspacechar(ch))
visibleChars++;
stylePrev = style;
}
// Done: set real level of the next line
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
styler.SetLevel(lineCurrent, levelPrev | flagsNext);
}
/*----------------------------------------------------------------------------*
* --- Scintilla: LexerModule ---
*----------------------------------------------------------------------------*/
extern const LexerModule lmRaku(SCLEX_RAKU, LexerRaku::LexerFactoryRaku, "raku", rakuWordLists);