You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1635 lines
50 KiB
1635 lines
50 KiB
/** @file LexRaku.cxx
|
|
** Lexer for Raku
|
|
**
|
|
** Copyright (c) 2019 Mark Reay <mark@reay.net.au>
|
|
**/
|
|
// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
|
|
// The License.txt file describes the conditions under which this software may be distributed.
|
|
|
|
/*
|
|
* Raku (Perl6) Lexer for Scintilla
|
|
* ---------------------------------
|
|
* ---------------------------------
|
|
* 06-Dec-2019: More Unicode support:
|
|
* - Added a full scope of allowed numbers and letters
|
|
* 29-Nov-2019: More highlighting / implemented basic folding:
|
|
* - Operators (blanket cover, no sequence checking)
|
|
* - Class / Grammar name highlighting
|
|
* - Folding:
|
|
* - Comments: line / multi-line
|
|
* - POD sections
|
|
* - Code blocks {}
|
|
* 26-Nov-2019: Basic syntax highlighting covering the following:
|
|
* - Comments, both line and embedded (multi-line)
|
|
* - POD, no inline highlighting as yet...
|
|
* - Heredoc block string, with variable highlighting (with qq)
|
|
* - Strings, with variable highlighting (with ")
|
|
* - Q Language, including adverbs (also basic q and qq)
|
|
* - Regex, including adverbs
|
|
* - Numbers
|
|
* - Bareword / identifiers
|
|
* - Types
|
|
* - Variables: mu, positional, associative, callable
|
|
* TODO:
|
|
* - POD inline
|
|
* - Better operator sequence coverage
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <vector>
|
|
#include <map>
|
|
#include <functional>
|
|
|
|
#include "ILexer.h"
|
|
#include "Scintilla.h"
|
|
#include "SciLexer.h"
|
|
|
|
#include "WordList.h"
|
|
#include "LexAccessor.h"
|
|
#include "StyleContext.h"
|
|
#include "CharacterSet.h"
|
|
#include "CharacterCategory.h"
|
|
#include "LexerModule.h"
|
|
#include "OptionSet.h"
|
|
#include "DefaultLexer.h"
|
|
|
|
using namespace Scintilla;
|
|
using namespace Lexilla;
|
|
|
|
namespace { // anonymous namespace to isolate any name clashes
|
|
/*----------------------------------------------------------------------------*
|
|
* --- DEFINITIONS: OPTIONS / CONSTANTS ---
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
// Number types
|
|
#define RAKUNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
|
|
#define RAKUNUM_OCTAL 2
|
|
#define RAKUNUM_FLOAT_EXP 3 // exponent part only
|
|
#define RAKUNUM_HEX 4 // may be a hex float
|
|
#define RAKUNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
|
|
#define RAKUNUM_VECTOR 6
|
|
#define RAKUNUM_V_VECTOR 7
|
|
#define RAKUNUM_VERSION 8 // can contain multiple '.'s
|
|
#define RAKUNUM_BAD 9
|
|
|
|
// Regex / Q string types
|
|
#define RAKUTYPE_REGEX_NORM 0 // 0 char ident
|
|
#define RAKUTYPE_REGEX_S 1 // order is significant:
|
|
#define RAKUTYPE_REGEX_M 2 // 1 char ident
|
|
#define RAKUTYPE_REGEX_Y 3 // 1 char ident
|
|
#define RAKUTYPE_REGEX 4 // > RAKUTYPE_REGEX == 2 char identifiers
|
|
#define RAKUTYPE_REGEX_RX 5 // 2 char ident
|
|
#define RAKUTYPE_REGEX_TR 6 // 2 char ident
|
|
#define RAKUTYPE_QLANG 7 // < RAKUTYPE_QLANG == RAKUTYPE_REGEX_?
|
|
#define RAKUTYPE_STR_WQ 8 // 0 char ident < word quote >
|
|
#define RAKUTYPE_STR_Q 9 // 1 char ident
|
|
#define RAKUTYPE_STR_QX 10 // 2 char ident
|
|
#define RAKUTYPE_STR_QW 11 // 2 char ident
|
|
#define RAKUTYPE_STR_QQ 12 // 2 char ident
|
|
#define RAKUTYPE_STR_QQX 13 // 3 char ident
|
|
#define RAKUTYPE_STR_QQW 14 // 3 char ident
|
|
#define RAKUTYPE_STR_QQWW 15 // 4 char ident
|
|
|
|
// Delimiter types
|
|
#define RAKUDELIM_BRACKET 0 // bracket: regex, Q language
|
|
#define RAKUDELIM_QUOTE 1 // quote: normal string
|
|
|
|
// rakuWordLists: keywords as defined in config
|
|
const char *const rakuWordLists[] = {
|
|
"Keywords and identifiers",
|
|
"Functions",
|
|
"Types basic",
|
|
"Types composite",
|
|
"Types domain-specific",
|
|
"Types exception",
|
|
"Adverbs",
|
|
nullptr,
|
|
};
|
|
|
|
// Options and defaults
|
|
struct OptionsRaku {
|
|
bool fold;
|
|
bool foldCompact;
|
|
bool foldComment;
|
|
bool foldCommentMultiline;
|
|
bool foldCommentPOD;
|
|
OptionsRaku() {
|
|
fold = true;
|
|
foldCompact = false;
|
|
foldComment = true;
|
|
foldCommentMultiline = true;
|
|
foldCommentPOD = true;
|
|
}
|
|
};
|
|
|
|
// init options and words
|
|
struct OptionSetRaku : public OptionSet<OptionsRaku> {
|
|
OptionSetRaku() {
|
|
DefineProperty("fold", &OptionsRaku::fold);
|
|
DefineProperty("fold.comment", &OptionsRaku::foldComment);
|
|
DefineProperty("fold.compact", &OptionsRaku::foldCompact);
|
|
|
|
DefineProperty("fold.raku.comment.multiline", &OptionsRaku::foldCommentMultiline,
|
|
"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
|
|
DefineProperty("fold.raku.comment.pod", &OptionsRaku::foldCommentPOD,
|
|
"Set this property to 0 to disable folding POD comments when fold.comment=1.");
|
|
|
|
// init word lists
|
|
DefineWordListSets(rakuWordLists);
|
|
}
|
|
};
|
|
|
|
// Delimiter pair
|
|
struct DelimPair {
|
|
int opener; // opener char
|
|
int closer[2]; // closer chars
|
|
bool interpol; // can variables be interpolated?
|
|
short count; // delimiter char count
|
|
DelimPair() {
|
|
opener = 0;
|
|
closer[0] = 0;
|
|
closer[1] = 0;
|
|
interpol = false;
|
|
count = 0;
|
|
}
|
|
bool isCloser(int ch) const {
|
|
return ch == closer[0] || ch == closer[1];
|
|
}
|
|
};
|
|
|
|
/*----------------------------------------------------------------------------*
|
|
* --- FUNCTIONS ---
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
/*
|
|
* IsANewLine
|
|
* - returns true if this is a new line char
|
|
*/
|
|
constexpr bool IsANewLine(int ch) noexcept {
|
|
return ch == '\r' || ch == '\n';
|
|
}
|
|
|
|
/*
|
|
* IsAWhitespace
|
|
* - returns true if this is a whitespace (or newline) char
|
|
*/
|
|
bool IsAWhitespace(int ch) noexcept {
|
|
return IsASpaceOrTab(ch) || IsANewLine(ch);
|
|
}
|
|
|
|
/*
|
|
* IsAlphabet
|
|
* - returns true if this is an alphabetical char
|
|
*/
|
|
constexpr bool IsAlphabet(int ch) noexcept {
|
|
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
|
|
}
|
|
|
|
/*
|
|
* IsCommentLine
|
|
* - returns true if this is a comment line
|
|
* - tests: SCE_RAKU_COMMENTLINE or SCE_RAKU_COMMENTEMBED
|
|
* modified from: LexPerl.cxx
|
|
*/
|
|
bool IsCommentLine(Sci_Position line, LexAccessor &styler, int type = SCE_RAKU_COMMENTLINE) {
|
|
Sci_Position pos = styler.LineStart(line);
|
|
Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
|
|
for (Sci_Position i = pos; i < eol_pos; i++) {
|
|
char ch = styler[i];
|
|
int style = styler.StyleAt(i);
|
|
if (type == SCE_RAKU_COMMENTEMBED) {
|
|
if (i == (eol_pos - 1) && style == type)
|
|
return true;
|
|
} else { // make sure the line is NOT a SCE_RAKU_COMMENTEMBED
|
|
if (ch == '#' && style == type && styler[i+1] != '`' )
|
|
return true;
|
|
else if (!IsASpaceOrTab(ch))
|
|
return false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* ContainsQTo
|
|
* - returns true if this range contains ":to" in style SCE_RAKU_ADVERB indicating the start
|
|
* of a SCE_RAKU_HEREDOC_Q or SCE_RAKU_HEREDOC_QQ.
|
|
*/
|
|
bool ContainsQTo(Sci_Position start, Sci_Position end, LexAccessor &styler) {
|
|
std::string adverb;
|
|
for (Sci_Position i = start; i < end; i++) {
|
|
if (styler.StyleAt(i) == SCE_RAKU_ADVERB) {
|
|
adverb.push_back(styler[i]);
|
|
}
|
|
}
|
|
return adverb.find(":to") != std::string::npos;
|
|
}
|
|
|
|
/*
|
|
* GetBracketCloseChar
|
|
* - returns the end bracket char: opposite of start
|
|
* - see: http://www.unicode.org/Public/5.1.0/ucd/BidiMirroring.txt (first section)
|
|
* - Categories are general matches for valid BiDi types
|
|
* - Most closer chars are opener + 1
|
|
*/
|
|
int GetBracketCloseChar(const int ch) noexcept {
|
|
const CharacterCategory cc = CategoriseCharacter(ch);
|
|
switch (cc) {
|
|
case ccSm:
|
|
switch (ch) {
|
|
case 0x3C: return 0x3E; // LESS-THAN SIGN
|
|
case 0x2208: return 0x220B; // ELEMENT OF
|
|
case 0x2209: return 0x220C; // NOT AN ELEMENT OF
|
|
case 0x220A: return 0x220D; // SMALL ELEMENT OF
|
|
case 0x2215: return 0x29F5; // DIVISION SLASH
|
|
case 0x2243: return 0x22CD; // ASYMPTOTICALLY EQUAL TO
|
|
case 0x2298: return 0x29B8; // CIRCLED DIVISION SLASH
|
|
case 0x22A6: return 0x2ADE; // ASSERTION
|
|
case 0x22A8: return 0x2AE4; // TRUE
|
|
case 0x22A9: return 0x2AE3; // FORCES
|
|
case 0x22AB: return 0x2AE5; // DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
|
|
case 0x22F2: return 0x22FA; // ELEMENT OF WITH LONG HORIZONTAL STROKE
|
|
case 0x22F3: return 0x22FB; // ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
|
|
case 0x22F4: return 0x22FC; // SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
|
|
case 0x22F6: return 0x22FD; // ELEMENT OF WITH OVERBAR
|
|
case 0x22F7: return 0x22FE; // SMALL ELEMENT OF WITH OVERBAR
|
|
case 0xFF1C: return 0xFF1E; // FULLWIDTH LESS-THAN SIGN
|
|
}
|
|
break;
|
|
case ccPs:
|
|
switch (ch) {
|
|
case 0x5B: return 0x5D; // LEFT SQUARE BRACKET
|
|
case 0x7B: return 0x7D; // LEFT CURLY BRACKET
|
|
case 0x298D: return 0x2990; // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
|
|
case 0x298F: return 0x298E; // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
|
|
case 0xFF3B: return 0xFF3D; // FULLWIDTH LEFT SQUARE BRACKET
|
|
case 0xFF5B: return 0xFF5D; // FULLWIDTH LEFT CURLY BRACKET
|
|
}
|
|
break;
|
|
case ccPi:
|
|
break;
|
|
default: return 0;
|
|
}
|
|
return ch + 1;
|
|
}
|
|
|
|
/*
|
|
* IsValidQuoteOpener
|
|
* -
|
|
*/
|
|
bool IsValidQuoteOpener(const int ch, DelimPair &dp, int type = RAKUDELIM_BRACKET) noexcept {
|
|
dp.closer[0] = 0;
|
|
dp.closer[1] = 0;
|
|
dp.interpol = true;
|
|
if (type == RAKUDELIM_QUOTE) {
|
|
switch (ch) {
|
|
// Opener Closer Description
|
|
case '\'': dp.closer[0] = '\''; // APOSTROPHE
|
|
dp.interpol = false;
|
|
break;
|
|
case '"': dp.closer[0] = '"'; // QUOTATION MARK
|
|
break;
|
|
case 0x2018: dp.closer[0] = 0x2019; // LEFT SINGLE QUOTATION MARK
|
|
dp.interpol = false;
|
|
break;
|
|
case 0x201C: dp.closer[0] = 0x201D; // LEFT DOUBLE QUOTATION MARK
|
|
break;
|
|
case 0x201D: dp.closer[0] = 0x201C; // RIGHT DOUBLE QUOTATION MARK
|
|
break;
|
|
case 0x201E: dp.closer[0] = 0x201C; // DOUBLE LOW-9 QUOTATION MARK
|
|
dp.closer[1] = 0x201D;
|
|
break;
|
|
case 0xFF62: dp.closer[0] = 0xFF63; // HALFWIDTH LEFT CORNER BRACKET
|
|
dp.interpol = false;
|
|
break;
|
|
default: return false;
|
|
}
|
|
} else if (type == RAKUDELIM_BRACKET) {
|
|
dp.closer[0] = GetBracketCloseChar(ch);
|
|
}
|
|
dp.opener = ch;
|
|
dp.count = 1;
|
|
return dp.closer[0] > 0;
|
|
}
|
|
|
|
/*
|
|
* IsBracketOpenChar
|
|
* - true if this is a valid start bracket character
|
|
*/
|
|
bool IsBracketOpenChar(int ch) noexcept {
|
|
return GetBracketCloseChar(ch) > 0;
|
|
}
|
|
|
|
/*
|
|
* IsValidRegOrQAdjacent
|
|
* - returns true if ch is a valid character to put directly after Q / q
|
|
* * ref: Q Language: https://docs.raku.org/language/quoting
|
|
*/
|
|
bool IsValidRegOrQAdjacent(int ch) noexcept {
|
|
return !(IsAlphaNumeric(ch) || ch == '_' || ch == '(' || ch == ')' || ch == '\'' );
|
|
}
|
|
|
|
/*
|
|
* IsValidRegOrQPrecede
|
|
* - returns true if ch is a valid preceding character to put directly before Q / q
|
|
* * ref: Q Language: https://docs.raku.org/language/quoting
|
|
*/
|
|
bool IsValidRegOrQPrecede(int ch) noexcept {
|
|
return !(IsAlphaNumeric(ch) || ch == '_');
|
|
}
|
|
|
|
/*
|
|
* MatchCharInRange
|
|
* - returns true if the mach character is found in range (of length)
|
|
* - ignoreDelim (default false)
|
|
*/
|
|
bool MatchCharInRange(StyleContext &sc, const Sci_Position length,
|
|
const int match, bool ignoreDelim = false) {
|
|
Sci_Position len = 0;
|
|
int chPrev = sc.chPrev;
|
|
while (++len < length) {
|
|
const int ch = sc.GetRelativeCharacter(len);
|
|
if (ch == match && (ignoreDelim || chPrev != '\\'))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* PrevNonWhitespaceChar
|
|
* - returns the last non-whitespace char
|
|
*/
|
|
int PrevNonWhitespaceChar(StyleContext &sc) {
|
|
Sci_Position rel = 0;
|
|
Sci_Position max_back = 0 - sc.currentPos;
|
|
while (--rel > max_back) {
|
|
const int ch = sc.GetRelativeCharacter(rel);
|
|
if (!IsAWhitespace(ch))
|
|
return ch;
|
|
}
|
|
return 0; // no matching char
|
|
}
|
|
|
|
/*
|
|
* IsQLangStartAtScPos
|
|
* - returns true if this is a valid Q Language sc position
|
|
* - ref: https://docs.raku.org/language/quoting
|
|
* - Q :adverb :adverb //;
|
|
* - q,qx,qw,qq,qqx,qqw,qqww :adverb /:adverb /;
|
|
*/
|
|
bool IsQLangStartAtScPos(StyleContext &sc, int &type, const Sci_Position length) {
|
|
const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
|
|
const int chFw2 = sc.GetRelativeCharacter(2);
|
|
const int chFw3 = sc.GetRelativeCharacter(3);
|
|
type = -1;
|
|
if (IsValidRegOrQPrecede(sc.chPrev)) {
|
|
if (sc.ch == 'Q' && valid_adj) {
|
|
type = RAKUTYPE_QLANG;
|
|
} else if (sc.ch == 'q') {
|
|
switch (sc.chNext) {
|
|
case 'x':
|
|
type = RAKUTYPE_STR_QX;
|
|
break;
|
|
case 'w':
|
|
type = RAKUTYPE_STR_QW;
|
|
break;
|
|
case 'q':
|
|
if (chFw2 == 'x') {
|
|
type = RAKUTYPE_STR_QQX;
|
|
} else if (chFw2 == 'w') {
|
|
if (chFw3 == 'w') {
|
|
type = RAKUTYPE_STR_QQWW;
|
|
} else {
|
|
type = RAKUTYPE_STR_QQW;
|
|
}
|
|
} else {
|
|
type = RAKUTYPE_STR_QQ;
|
|
}
|
|
break;
|
|
default:
|
|
type = RAKUTYPE_STR_Q;
|
|
}
|
|
} else if (sc.ch == '<' && MatchCharInRange(sc, length, '>')) {
|
|
type = RAKUTYPE_STR_WQ; // < word quote >
|
|
}
|
|
}
|
|
return type >= 0;
|
|
}
|
|
|
|
/*
|
|
* IsRegexStartAtScPos
|
|
* - returns true if this is a valid Regex sc position
|
|
* - ref: https://docs.raku.org/language/regexes
|
|
* - Regex: (rx/s/m/tr/y) :adverb /:adverb /;
|
|
* - regex R :adverb //;
|
|
* - /:adverb /;
|
|
*/
|
|
bool IsRegexStartAtScPos(StyleContext &sc, int &type, CharacterSet &set) {
|
|
const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
|
|
type = -1;
|
|
if (IsValidRegOrQPrecede(sc.chPrev)) {
|
|
switch (sc.ch) {
|
|
case 'r':
|
|
if (sc.chNext == 'x')
|
|
type = RAKUTYPE_REGEX_RX;
|
|
break;
|
|
case 't':
|
|
case 'T':
|
|
if (sc.chNext == 'r' || sc.chNext == 'R')
|
|
type = RAKUTYPE_REGEX_TR;
|
|
break;
|
|
case 'm':
|
|
if (valid_adj)
|
|
type = RAKUTYPE_REGEX_M;
|
|
break;
|
|
case 's':
|
|
case 'S':
|
|
if (valid_adj)
|
|
type = RAKUTYPE_REGEX_S;
|
|
break;
|
|
case 'y':
|
|
if (valid_adj)
|
|
type = RAKUTYPE_REGEX_Y;
|
|
break;
|
|
case '/':
|
|
if (set.Contains(PrevNonWhitespaceChar(sc)))
|
|
type = RAKUTYPE_REGEX_NORM;
|
|
}
|
|
}
|
|
return type >= 0;
|
|
}
|
|
|
|
/*
|
|
* IsValidIdentPrecede
|
|
* - returns if ch is a valid preceding char to put directly before an identifier
|
|
*/
|
|
bool IsValidIdentPrecede(int ch) noexcept {
|
|
return !(IsAlphaNumeric(ch) || ch == '_' || ch == '@' || ch == '$' || ch == '%');
|
|
}
|
|
|
|
/*
|
|
* IsValidDelimiter
|
|
* - returns if ch is a valid delimiter (most chars are valid)
|
|
* * ref: Q Language: https://docs.raku.org/language/quoting
|
|
*/
|
|
bool IsValidDelimiter(int ch) noexcept {
|
|
return !(IsAlphaNumeric(ch) || ch == ':');
|
|
}
|
|
|
|
/*
|
|
* GetDelimiterCloseChar
|
|
* - returns the corresponding close char for a given delimiter (could be the same char)
|
|
*/
|
|
int GetDelimiterCloseChar(int ch) noexcept {
|
|
int ch_end = GetBracketCloseChar(ch);
|
|
if (ch_end == 0 && IsValidDelimiter(ch)) {
|
|
ch_end = ch;
|
|
}
|
|
return ch_end;
|
|
}
|
|
|
|
/*
|
|
* GetRepeatCharCount
|
|
* - returns the occurrence count of match
|
|
*/
|
|
Sci_Position GetRepeatCharCount(StyleContext &sc, int chMatch, Sci_Position length) {
|
|
Sci_Position cnt = 0;
|
|
while (cnt < length) {
|
|
if (sc.GetRelativeCharacter(cnt) != chMatch) {
|
|
break;
|
|
}
|
|
cnt++;
|
|
}
|
|
return cnt;
|
|
}
|
|
|
|
/*
|
|
* LengthToDelimiter
|
|
* - returns the length until the end of a delimited string section
|
|
* - Ignores nested delimiters (if opener != closer)
|
|
* - no trailing char after last closer (default false)
|
|
*/
|
|
Sci_Position LengthToDelimiter(StyleContext &sc, const DelimPair &dp,
|
|
Sci_Position length, bool noTrailing = false) {
|
|
short cnt_open = 0; // count open bracket
|
|
short cnt_close = 0; // count close bracket
|
|
bool is_escape = false; // has been escaped using '\'?
|
|
Sci_Position len = 0; // count characters
|
|
int chOpener = dp.opener; // look for nested opener / closer
|
|
if (dp.opener == dp.closer[0])
|
|
chOpener = 0; // no opening delimiter (no nesting possible)
|
|
|
|
while (len < length) {
|
|
const int chPrev = sc.GetRelativeCharacter(len - 1);
|
|
const int ch = sc.GetRelativeCharacter(len);
|
|
const int chNext = sc.GetRelativeCharacter(len+1);
|
|
|
|
if (cnt_open == 0 && cnt_close == dp.count) {
|
|
return len; // end condition has been met
|
|
} else if (is_escape) {
|
|
is_escape = false;
|
|
} else if (ch == '\\') {
|
|
is_escape = true;
|
|
} else {
|
|
if (ch == chOpener) {
|
|
cnt_open++; // open nested bracket
|
|
} else if (dp.isCloser(ch)) {
|
|
if ( cnt_open > 0 ) {
|
|
cnt_open--; // close nested bracket
|
|
} else if (dp.count > 1 && cnt_close < (dp.count - 1)) {
|
|
if (cnt_close > 1) {
|
|
if (dp.isCloser(chPrev)) {
|
|
cnt_close++;
|
|
} else { // reset if previous char was not close
|
|
cnt_close = 0;
|
|
}
|
|
} else {
|
|
cnt_close++;
|
|
}
|
|
} else if (!noTrailing || (IsAWhitespace(chNext))) {
|
|
cnt_close++; // found last close
|
|
if (cnt_close > 1 && !dp.isCloser(chPrev)) {
|
|
cnt_close = 0; // reset if previous char was not close
|
|
}
|
|
} else {
|
|
cnt_close = 0; // non handled close: reset
|
|
}
|
|
} else if (IsANewLine(ch)) {
|
|
cnt_open = 0; // reset after each line
|
|
cnt_close = 0;
|
|
}
|
|
}
|
|
len++;
|
|
}
|
|
return -1; // end condition has NOT been met
|
|
}
|
|
|
|
/*
|
|
* LengthToEndHeredoc
|
|
* - returns the length until the end of a heredoc section
|
|
* - delimiter string MUST begin on a new line
|
|
*/
|
|
Sci_Position LengthToEndHeredoc(const StyleContext &sc, LexAccessor &styler,
|
|
const Sci_Position length, const char *delim) {
|
|
bool on_new_ln = false;
|
|
int i = 0; // str index
|
|
for (int n = 0; n < length; n++) {
|
|
const char ch = styler.SafeGetCharAt(sc.currentPos + n, 0);
|
|
if (on_new_ln) {
|
|
if (delim[i] == '\0')
|
|
return n; // at end of str, match found!
|
|
if (ch != delim[i++])
|
|
i = 0; // no char match, reset 'i'ndex
|
|
}
|
|
if (i == 0) // detect new line
|
|
on_new_ln = IsANewLine(ch);
|
|
}
|
|
return -1; // no match found
|
|
}
|
|
|
|
/*
|
|
* LengthToNextChar
|
|
* - returns the length until the next character
|
|
*/
|
|
Sci_Position LengthToNextChar(StyleContext &sc, const Sci_Position length) {
|
|
Sci_Position len = 0;
|
|
while (++len < length) {
|
|
const int ch = sc.GetRelativeCharacter(len);
|
|
if (!IsASpaceOrTab(ch) && !IsANewLine(ch)) {
|
|
break;
|
|
}
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* GetRelativeString
|
|
* - gets a relative string and sets it in &str
|
|
* - resets string before setting
|
|
*/
|
|
void GetRelativeString(StyleContext &sc, Sci_Position offset, Sci_Position length,
|
|
std::string &str) {
|
|
Sci_Position pos = offset;
|
|
str.clear();
|
|
while (pos < length) {
|
|
str += sc.GetRelativeCharacter(pos++);
|
|
}
|
|
}
|
|
|
|
} // end anonymous namespace
|
|
|
|
/*----------------------------------------------------------------------------*
|
|
* --- class: LexerRaku ---
|
|
*----------------------------------------------------------------------------*/
|
|
//class LexerRaku : public ILexerWithMetaData {
|
|
class LexerRaku : public DefaultLexer {
|
|
CharacterSet setWord;
|
|
CharacterSet setSigil;
|
|
CharacterSet setTwigil;
|
|
CharacterSet setOperator;
|
|
CharacterSet setSpecialVar;
|
|
WordList regexIdent; // identifiers that specify a regex
|
|
OptionsRaku options; // Options from config
|
|
OptionSetRaku osRaku;
|
|
WordList keywords; // Word Lists from config
|
|
WordList functions;
|
|
WordList typesBasic;
|
|
WordList typesComposite;
|
|
WordList typesDomainSpecific;
|
|
WordList typesExceptions;
|
|
WordList adverbs;
|
|
|
|
public:
|
|
// Defined as explicit, so that constructor can not be copied
|
|
explicit LexerRaku() :
|
|
DefaultLexer("raku", SCLEX_RAKU),
|
|
setWord(CharacterSet::setAlphaNum, "-_", 0x80),
|
|
setSigil(CharacterSet::setNone, "$&%@"),
|
|
setTwigil(CharacterSet::setNone, "!*.:<=?^~"),
|
|
setOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;<>,?!.~"),
|
|
setSpecialVar(CharacterSet::setNone, "_/!") {
|
|
regexIdent.Set("regex rule token");
|
|
}
|
|
// Deleted so LexerRaku objects can not be copied.
|
|
LexerRaku(const LexerRaku &) = delete;
|
|
LexerRaku(LexerRaku &&) = delete;
|
|
void operator=(const LexerRaku &) = delete;
|
|
void operator=(LexerRaku &&) = delete;
|
|
virtual ~LexerRaku() {
|
|
}
|
|
void SCI_METHOD Release() noexcept override {
|
|
delete this;
|
|
}
|
|
int SCI_METHOD Version() const noexcept override {
|
|
return lvRelease5;
|
|
}
|
|
const char *SCI_METHOD PropertyNames() override {
|
|
return osRaku.PropertyNames();
|
|
}
|
|
int SCI_METHOD PropertyType(const char *name) override {
|
|
return osRaku.PropertyType(name);
|
|
}
|
|
const char *SCI_METHOD DescribeProperty(const char *name) override {
|
|
return osRaku.DescribeProperty(name);
|
|
}
|
|
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
|
|
const char *SCI_METHOD PropertyGet(const char *key) override {
|
|
return osRaku.PropertyGet(key);
|
|
}
|
|
const char *SCI_METHOD DescribeWordListSets() override {
|
|
return osRaku.DescribeWordListSets();
|
|
}
|
|
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
|
|
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
|
|
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
|
|
|
|
static ILexer5 *LexerFactoryRaku() {
|
|
return new LexerRaku();
|
|
}
|
|
|
|
protected:
|
|
bool IsOperatorChar(const int ch);
|
|
bool IsWordChar(const int ch, bool allowNumber = true);
|
|
bool IsWordStartChar(const int ch);
|
|
bool IsNumberChar(const int ch, int base = 10);
|
|
bool ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
|
|
int &type, const DelimPair &dp);
|
|
void ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState);
|
|
bool ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
|
|
WordList &wordsAdverbs, DelimPair &dp);
|
|
Sci_Position LengthToNonWordChar(StyleContext &sc, Sci_Position length,
|
|
char *s, const int size, Sci_Position offset = 0);
|
|
};
|
|
|
|
/*----------------------------------------------------------------------------*
|
|
* --- METHODS: LexerRaku ---
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
/*
|
|
* LexerRaku::IsOperatorChar
|
|
* - Test for both ASCII and Unicode operators
|
|
* see: https://docs.raku.org/language/unicode_entry
|
|
*/
|
|
bool LexerRaku::IsOperatorChar(const int ch) {
|
|
if (ch > 0x7F) {
|
|
switch (ch) {
|
|
// Unicode ASCII Equiv.
|
|
case 0x2208: // (elem)
|
|
case 0x2209: // !(elem)
|
|
case 0x220B: // (cont)
|
|
case 0x220C: // !(cont)
|
|
case 0x2216: // (-)
|
|
case 0x2229: // (&)
|
|
case 0x222A: // (|)
|
|
case 0x2282: // (<)
|
|
case 0x2283: // (>)
|
|
case 0x2284: // !(<)
|
|
case 0x2285: // !(>)
|
|
case 0x2286: // (<=)
|
|
case 0x2287: // (>=)
|
|
case 0x2288: // !(<=)
|
|
case 0x2289: // !(>=)
|
|
case 0x228D: // (.)
|
|
case 0x228E: // (+)
|
|
case 0x2296: // (^)
|
|
return true;
|
|
}
|
|
}
|
|
return setOperator.Contains(ch);
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::IsWordChar
|
|
* - Test for both ASCII and Unicode identifier characters
|
|
* see: https://docs.raku.org/language/unicode_ascii
|
|
* also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
|
* FIXME: *still* may not contain all valid characters
|
|
*/
|
|
bool LexerRaku::IsWordChar(const int ch, bool allowNumber) {
|
|
// Unicode numbers should not appear in word identifiers
|
|
if (ch > 0x7F) {
|
|
const CharacterCategory cc = CategoriseCharacter(ch);
|
|
switch (cc) {
|
|
// Letters
|
|
case ccLu:
|
|
case ccLl:
|
|
case ccLt:
|
|
case ccLm:
|
|
case ccLo:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
} else if (allowNumber && IsADigit(ch)) {
|
|
return true; // an ASCII number type
|
|
}
|
|
return setWord.Contains(ch);
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::IsWordStartChar
|
|
* - Test for both ASCII and Unicode identifier "start / first" characters
|
|
*/
|
|
bool LexerRaku::IsWordStartChar(const int ch) {
|
|
return ch != '-' && IsWordChar(ch, false); // no numbers allowed
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::IsNumberChar
|
|
* - Test for both ASCII and Unicode identifier number characters
|
|
* see: https://docs.raku.org/language/unicode_ascii
|
|
* also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
|
* FILTERED by Unicode letters that are NUMBER
|
|
* and NOT PARENTHESIZED or CIRCLED
|
|
* FIXME: *still* may not contain all valid number characters
|
|
*/
|
|
bool LexerRaku::IsNumberChar(const int ch, int base) {
|
|
if (ch > 0x7F) {
|
|
const CharacterCategory cc = CategoriseCharacter(ch);
|
|
switch (cc) {
|
|
// Numbers
|
|
case ccNd:
|
|
case ccNl:
|
|
case ccNo:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
return IsADigit(ch, base);
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::PropertySet
|
|
* -
|
|
*/
|
|
Sci_Position SCI_METHOD LexerRaku::PropertySet(const char *key, const char *val) {
|
|
if (osRaku.PropertySet(&options, key, val))
|
|
return 0;
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::WordListSet
|
|
* -
|
|
*/
|
|
Sci_Position SCI_METHOD LexerRaku::WordListSet(int n, const char *wl) {
|
|
WordList *wordListN = nullptr;
|
|
switch (n) {
|
|
case 0:
|
|
wordListN = &keywords;
|
|
break;
|
|
case 1:
|
|
wordListN = &functions;
|
|
break;
|
|
case 2:
|
|
wordListN = &typesBasic;
|
|
break;
|
|
case 3:
|
|
wordListN = &typesComposite;
|
|
break;
|
|
case 4:
|
|
wordListN = &typesDomainSpecific;
|
|
break;
|
|
case 5:
|
|
wordListN = &typesExceptions;
|
|
break;
|
|
case 6:
|
|
wordListN = &adverbs;
|
|
break;
|
|
}
|
|
Sci_Position firstModification = -1;
|
|
if (wordListN) {
|
|
if (wordListN->Set(wl)) {
|
|
firstModification = 0;
|
|
}
|
|
}
|
|
return firstModification;
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::ProcessRegexTwinCapture
|
|
* - processes the transition between a regex pair (two sets of delimiters)
|
|
* - moves to first new delimiter, if a bracket
|
|
* - returns true when valid delimiter start found (if bracket)
|
|
*/
|
|
bool LexerRaku::ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
|
|
int &type, const DelimPair &dp) {
|
|
|
|
if (type == RAKUTYPE_REGEX_S || type == RAKUTYPE_REGEX_TR || type == RAKUTYPE_REGEX_Y) {
|
|
type = -1; // clear type
|
|
|
|
// move past chRegQClose if it was the previous char
|
|
if (dp.isCloser(sc.chPrev))
|
|
sc.Forward();
|
|
|
|
// no processing needed for non-bracket
|
|
if (dp.isCloser(dp.opener))
|
|
return true;
|
|
|
|
// move to next opening bracket
|
|
const Sci_Position len = LengthToNextChar(sc, length);
|
|
if (sc.GetRelativeCharacter(len) == dp.opener) {
|
|
sc.Forward(len);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::ProcessStringVars
|
|
* - processes a string and highlights any valid variables
|
|
*/
|
|
void LexerRaku::ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState) {
|
|
const int state = sc.state;
|
|
for (Sci_Position pos = 0; pos < length; pos++) {
|
|
if (sc.state == varState && !IsWordChar(sc.ch)) {
|
|
sc.SetState(state);
|
|
} else if (sc.chPrev != '\\'
|
|
&& (sc.ch == '$' || sc.ch == '@')
|
|
&& IsWordStartChar(sc.chNext)) {
|
|
sc.SetState(varState);
|
|
}
|
|
sc.Forward(); // Next character
|
|
}
|
|
}
|
|
/*
|
|
* LexerRaku::ProcessValidRegQlangStart
|
|
* - processes a section of the document range from after a Regex / Q delimiter
|
|
* - returns true on success
|
|
* - sets: adverbs, chOpen, chClose, chCount
|
|
* ref: https://docs.raku.org/language/regexes
|
|
*/
|
|
bool LexerRaku::ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
|
|
WordList &wordsAdverbs, DelimPair &dp) {
|
|
Sci_Position startPos = sc.currentPos;
|
|
Sci_Position startLen = length;
|
|
const int target_state = sc.state;
|
|
int state = SCE_RAKU_DEFAULT;
|
|
std::string str;
|
|
|
|
// find our opening delimiter (and occurrences) / save any adverbs
|
|
dp.opener = 0; // adverbs can be after the first delimiter
|
|
bool got_all_adverbs = false; // in Regex statements
|
|
bool got_ident = false; // regex can have an identifier: 'regex R'
|
|
sc.SetState(state); // set state default to avoid pre-highlights
|
|
while ((dp.opener == 0 || !got_all_adverbs) && sc.More()) {
|
|
|
|
// move to the next non-space character
|
|
const bool was_space = IsAWhitespace(sc.ch);
|
|
if (!got_all_adverbs && was_space) {
|
|
sc.Forward(LengthToNextChar(sc, length));
|
|
}
|
|
length = startLen - (sc.currentPos - startPos); // update length remaining
|
|
|
|
// parse / eat an identifier (if type == RAKUTYPE_REGEX)
|
|
if (dp.opener == 0 && !got_ident && type == RAKUTYPE_REGEX && IsAlphabet(sc.ch)) {
|
|
|
|
// eat identifier / account for special adverb :sym<name>
|
|
bool got_sym = false;
|
|
while (sc.More()) {
|
|
sc.SetState(SCE_RAKU_IDENTIFIER);
|
|
while (sc.More() && (IsAlphaNumeric(sc.chNext)
|
|
|| sc.chNext == '_' || sc.chNext == '-')) {
|
|
sc.Forward();
|
|
}
|
|
sc.Forward();
|
|
if (got_sym && sc.ch == '>') {
|
|
sc.SetState(SCE_RAKU_OPERATOR); // '>'
|
|
sc.Forward();
|
|
break;
|
|
} else if (type == RAKUTYPE_REGEX && sc.Match(":sym<")) {
|
|
sc.SetState(SCE_RAKU_ADVERB); // ':sym'
|
|
sc.Forward(4);
|
|
sc.SetState(SCE_RAKU_OPERATOR); // '<'
|
|
sc.Forward();
|
|
got_sym = true;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
sc.SetState(state);
|
|
got_ident = true;
|
|
}
|
|
|
|
// parse / save an adverb: RAKUTYPE_REGEX only has adverbs after delim
|
|
// >= RAKUTYPE_QLANG only has adverbs before delim
|
|
else if (!got_all_adverbs && sc.ch == ':' && (!(dp.opener == 0 && got_ident)
|
|
&& !(dp.opener > 0 && type >= RAKUTYPE_QLANG))) {
|
|
sc.SetState(SCE_RAKU_ADVERB);
|
|
while (IsAlphaNumeric(sc.chNext) && sc.More()) {
|
|
sc.Forward();
|
|
str += sc.ch;
|
|
}
|
|
str += ' ';
|
|
sc.Forward();
|
|
sc.SetState(state);
|
|
}
|
|
|
|
// find starting delimiter
|
|
else if (dp.opener == 0 && (was_space || IsValidRegOrQAdjacent(sc.ch))
|
|
&& IsValidDelimiter(sc.ch)) { // make sure the delimiter is legal (most are)
|
|
sc.SetState((state = target_state));// start state here...
|
|
dp.opener = sc.ch; // this is our delimiter, get count
|
|
if (type < RAKUTYPE_QLANG) // type is Regex
|
|
dp.count = 1; // has only one delimiter
|
|
else
|
|
dp.count = GetRepeatCharCount(sc, dp.opener, length);
|
|
sc.Forward(dp.count);
|
|
}
|
|
|
|
// we must have all the adverbs by now...
|
|
else {
|
|
if (got_all_adverbs)
|
|
break; // prevent infinite loop: occurs on missing open char
|
|
got_all_adverbs = true;
|
|
}
|
|
}
|
|
|
|
// set word list / find a valid closing delimiter (or bomb!)
|
|
wordsAdverbs.Set(str.c_str());
|
|
dp.closer[0] = GetDelimiterCloseChar(dp.opener);
|
|
dp.closer[1] = 0; // no other closer char
|
|
return dp.closer[0] > 0;
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::LengthToNonWordChar
|
|
* - returns the length until the next non "word" character: AlphaNum + '_'
|
|
* - also sets all the parsed chars in 's'
|
|
*/
|
|
Sci_Position LexerRaku::LengthToNonWordChar(StyleContext &sc, Sci_Position length,
|
|
char *s, const int size, Sci_Position offset) {
|
|
Sci_Position len = 0;
|
|
Sci_Position max_length = size < length ? size : length;
|
|
while (len <= max_length) {
|
|
const int ch = sc.GetRelativeCharacter(len + offset);
|
|
if (!IsWordChar(ch)) {
|
|
s[len] = '\0';
|
|
break;
|
|
}
|
|
s[len] = ch;
|
|
len++;
|
|
}
|
|
s[len + 1] = '\0';
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::Lex
|
|
* - Main lexer method
|
|
*/
|
|
void SCI_METHOD LexerRaku::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
|
|
LexAccessor styler(pAccess);
|
|
DelimPair dpEmbeded; // delimiter pair: embedded comments
|
|
DelimPair dpString; // delimiter pair: string
|
|
DelimPair dpRegQ; // delimiter pair: Regex / Q Lang
|
|
std::string hereDelim; // heredoc delimiter (if in heredoc)
|
|
int hereState = 0; // heredoc state to use (Q / QQ)
|
|
int numState = 0; // number state / type
|
|
short cntDecimal = 0; // number decimal count
|
|
std::string wordLast; // last word seen
|
|
std::string identLast; // last identifier seen
|
|
std::string adverbLast; // last (single) adverb seen
|
|
WordList lastAdverbs; // last adverbs seen
|
|
Sci_Position len; // temp length value
|
|
char s[100]; // temp char string
|
|
int typeDetect = -1; // temp type detected (for regex and Q lang)
|
|
Sci_Position lengthToEnd; // length until the end of range
|
|
|
|
// Backtrack to safe start position before complex quoted elements
|
|
|
|
Sci_PositionU newStartPos = startPos;
|
|
if (initStyle != SCE_RAKU_DEFAULT) {
|
|
// Backtrack to last SCE_RAKU_DEFAULT or 0
|
|
while (newStartPos > 0) {
|
|
newStartPos--;
|
|
if (styler.StyleAt(newStartPos) == SCE_RAKU_DEFAULT)
|
|
break;
|
|
}
|
|
// Backtrack to start of line before SCE_RAKU_HEREDOC_Q?
|
|
if (initStyle == SCE_RAKU_HEREDOC_Q || initStyle == SCE_RAKU_HEREDOC_QQ) {
|
|
if (newStartPos > 0) {
|
|
newStartPos = styler.LineStart(styler.GetLine(newStartPos));
|
|
}
|
|
}
|
|
} else {
|
|
const Sci_Position line = styler.GetLine(newStartPos);
|
|
if (line > 0) {
|
|
// If the previous line is a start of a q or qq heredoc, backtrack to start of line
|
|
const Sci_Position startPreviousLine = styler.LineStart(line-1);
|
|
if (ContainsQTo(startPreviousLine, newStartPos, styler)) {
|
|
newStartPos = startPreviousLine;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// Re-calculate (any) changed startPos, length and initStyle state
|
|
if (newStartPos < startPos) {
|
|
initStyle = SCE_RAKU_DEFAULT;
|
|
length += startPos - newStartPos;
|
|
startPos = newStartPos;
|
|
}
|
|
|
|
// init StyleContext
|
|
StyleContext sc(startPos, length, initStyle, styler);
|
|
|
|
// StyleContext Loop
|
|
for (; sc.More(); sc.Forward()) {
|
|
lengthToEnd = (length - (sc.currentPos - startPos)); // end of range
|
|
|
|
/* *** Determine if the current state should terminate ************** *
|
|
* Everything within the 'switch' statement processes characters up
|
|
* until the end of a syntax highlight section / state.
|
|
* ****************************************************************** */
|
|
switch (sc.state) {
|
|
case SCE_RAKU_OPERATOR:
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break; // FIXME: better valid operator sequences needed?
|
|
case SCE_RAKU_COMMENTLINE:
|
|
if (IsANewLine(sc.ch)) {
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
}
|
|
break;
|
|
case SCE_RAKU_COMMENTEMBED:
|
|
if ((len = LengthToDelimiter(sc, dpEmbeded, lengthToEnd)) >= 0) {
|
|
sc.Forward(len); // Move to end delimiter
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
} else {
|
|
sc.Forward(lengthToEnd); // no end delimiter found
|
|
}
|
|
break;
|
|
case SCE_RAKU_POD:
|
|
if (sc.atLineStart && sc.Match("=end pod")) {
|
|
sc.Forward(8);
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
}
|
|
break;
|
|
case SCE_RAKU_STRING:
|
|
|
|
// Process the string for variables: move to end delimiter
|
|
if ((len = LengthToDelimiter(sc, dpString, lengthToEnd)) >= 0) {
|
|
if (dpString.interpol) {
|
|
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
|
|
} else {
|
|
sc.Forward(len);
|
|
}
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
} else {
|
|
sc.Forward(lengthToEnd); // no end delimiter found
|
|
}
|
|
break;
|
|
case SCE_RAKU_STRING_Q:
|
|
case SCE_RAKU_STRING_QQ:
|
|
case SCE_RAKU_STRING_Q_LANG:
|
|
|
|
// No string: previous char was the delimiter
|
|
if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) {
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
}
|
|
|
|
// Process the string for variables: move to end delimiter
|
|
else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) {
|
|
|
|
// set (any) heredoc delimiter string
|
|
if (lastAdverbs.InList("to")) {
|
|
GetRelativeString(sc, -1, len - dpRegQ.count, hereDelim);
|
|
hereState = SCE_RAKU_HEREDOC_Q; // default heredoc state
|
|
}
|
|
|
|
// select variable identifiers
|
|
if (sc.state == SCE_RAKU_STRING_QQ || lastAdverbs.InList("qq")) {
|
|
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
|
|
hereState = SCE_RAKU_HEREDOC_QQ; // potential heredoc state
|
|
} else {
|
|
sc.Forward(len);
|
|
}
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
} else {
|
|
sc.Forward(lengthToEnd); // no end delimiter found
|
|
}
|
|
break;
|
|
case SCE_RAKU_HEREDOC_Q:
|
|
case SCE_RAKU_HEREDOC_QQ:
|
|
if ((len = LengthToEndHeredoc(sc, styler, lengthToEnd, hereDelim.c_str())) >= 0) {
|
|
// select variable identifiers
|
|
if (sc.state == SCE_RAKU_HEREDOC_QQ) {
|
|
ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
|
|
} else {
|
|
sc.Forward(len);
|
|
}
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
} else {
|
|
sc.Forward(lengthToEnd); // no end delimiter found
|
|
}
|
|
hereDelim.clear(); // clear heredoc delimiter
|
|
break;
|
|
case SCE_RAKU_REGEX:
|
|
// account for typeDetect = RAKUTYPE_REGEX_S/TR/Y
|
|
while (sc.state == SCE_RAKU_REGEX) {
|
|
|
|
// No string: previous char was the delimiter
|
|
if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) {
|
|
if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
|
|
continue;
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
}
|
|
|
|
// Process the string for variables: move to end delimiter
|
|
else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) {
|
|
ProcessStringVars(sc, len, SCE_RAKU_REGEX_VAR);
|
|
if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
|
|
continue;
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
} else {
|
|
sc.Forward(lengthToEnd); // no end delimiter found
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
case SCE_RAKU_NUMBER:
|
|
if (sc.ch == '.') {
|
|
if (sc.chNext == '.') { // '..' is an operator
|
|
sc.SetState(SCE_RAKU_OPERATOR);
|
|
sc.Forward();
|
|
if (sc.chNext == '.') // '...' is also an operator
|
|
sc.Forward();
|
|
break;
|
|
} else if (numState > RAKUNUM_FLOAT_EXP
|
|
&& (cntDecimal < 1 || numState == RAKUNUM_VERSION)) {
|
|
cntDecimal++;
|
|
sc.Forward();
|
|
} else {
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break; // too many decimal places
|
|
}
|
|
}
|
|
switch (numState) {
|
|
case RAKUNUM_BINARY:
|
|
if (!IsNumberChar(sc.ch, 2))
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
case RAKUNUM_OCTAL:
|
|
if (!IsNumberChar(sc.ch, 8))
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
case RAKUNUM_HEX:
|
|
if (!IsNumberChar(sc.ch, 16))
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
case RAKUNUM_DECIMAL:
|
|
case RAKUNUM_VERSION:
|
|
if (!IsNumberChar(sc.ch))
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
}
|
|
break;
|
|
case SCE_RAKU_WORD:
|
|
case SCE_RAKU_FUNCTION:
|
|
case SCE_RAKU_TYPEDEF:
|
|
case SCE_RAKU_ADVERB:
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
case SCE_RAKU_MU:
|
|
case SCE_RAKU_POSITIONAL:
|
|
case SCE_RAKU_ASSOCIATIVE:
|
|
case SCE_RAKU_CALLABLE:
|
|
case SCE_RAKU_IDENTIFIER:
|
|
case SCE_RAKU_GRAMMAR:
|
|
case SCE_RAKU_CLASS:
|
|
sc.SetState(SCE_RAKU_DEFAULT);
|
|
break;
|
|
}
|
|
|
|
/* *** Determine if a new state should be entered ******************* *
|
|
* Everything below here identifies the beginning of a state, all or part
|
|
* of the characters within this state are processed here, the rest are
|
|
* completed above in the terminate state section.
|
|
* ****************************************************************** */
|
|
if (sc.state == SCE_RAKU_DEFAULT) {
|
|
|
|
// --- Single line comment
|
|
if (sc.ch == '#') {
|
|
sc.SetState(SCE_RAKU_COMMENTLINE);
|
|
}
|
|
|
|
// --- POD block
|
|
else if (sc.atLineStart && sc.Match("=begin pod")) {
|
|
sc.SetState(SCE_RAKU_POD);
|
|
sc.Forward(10);
|
|
}
|
|
|
|
// --- String (normal)
|
|
else if (sc.chPrev != '\\' && (IsValidQuoteOpener(sc.ch, dpString, RAKUDELIM_QUOTE))) {
|
|
sc.SetState(SCE_RAKU_STRING);
|
|
}
|
|
|
|
// --- String (Q Language) ----------------------------------------
|
|
// - https://docs.raku.org/language/quoting
|
|
// - Q :adverb :adverb //;
|
|
// - q,qx,qw,qq,qqx,qqw,qqww :adverb :adverb //;
|
|
else if (IsQLangStartAtScPos(sc, typeDetect, lengthToEnd)) {
|
|
int state = SCE_RAKU_STRING_Q_LANG;
|
|
Sci_Position forward = 1; // single char ident (default)
|
|
if (typeDetect > RAKUTYPE_QLANG) {
|
|
state = SCE_RAKU_STRING_Q;
|
|
if (typeDetect == RAKUTYPE_STR_WQ)
|
|
forward = 0; // no char ident
|
|
}
|
|
if (typeDetect > RAKUTYPE_STR_Q) {
|
|
if (typeDetect == RAKUTYPE_STR_QQ)
|
|
state = SCE_RAKU_STRING_QQ;
|
|
forward++; // two char ident
|
|
}
|
|
if (typeDetect > RAKUTYPE_STR_QQ)
|
|
forward++; // three char ident
|
|
if (typeDetect == RAKUTYPE_STR_QQWW)
|
|
forward++; // four char ident
|
|
|
|
// Proceed: check for a valid character after statement
|
|
if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_QLANG) {
|
|
sc.SetState(state);
|
|
sc.Forward(forward);
|
|
lastAdverbs.Clear();
|
|
|
|
// Process: adverbs / opening delimiter / adverbs after delim
|
|
if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
|
|
lastAdverbs, dpRegQ))
|
|
sc.SetState(state);
|
|
}
|
|
}
|
|
|
|
// --- Regex (rx/s/m/tr/y) ----------------------------------------
|
|
// - https://docs.raku.org/language/regexes
|
|
else if ((IsRegexStartAtScPos(sc, typeDetect, setOperator) || regexIdent.InList(wordLast.c_str()))) {
|
|
if (typeDetect == -1) { // must be a regex identifier word
|
|
wordLast.clear();
|
|
typeDetect = RAKUTYPE_REGEX;
|
|
}
|
|
Sci_Position forward = 0; // no ident (RAKUTYPE_REGEX, RAKUTYPE_REGEX_NORM)
|
|
if (typeDetect > 0 && typeDetect != RAKUTYPE_REGEX)
|
|
forward++; // single char ident
|
|
if (typeDetect > RAKUTYPE_REGEX)
|
|
forward++; // two char ident
|
|
|
|
// Proceed: check for a valid character after statement
|
|
if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_REGEX_NORM) {
|
|
sc.SetState(SCE_RAKU_REGEX);
|
|
sc.Forward(forward);
|
|
lastAdverbs.Clear();
|
|
|
|
// Process: adverbs / opening delimiter / adverbs after delim
|
|
if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
|
|
lastAdverbs, dpRegQ))
|
|
sc.SetState(SCE_RAKU_REGEX);
|
|
}
|
|
}
|
|
|
|
// --- Numbers ----------------------------------------------------
|
|
else if (IsValidIdentPrecede(sc.chPrev) && (IsNumberChar(sc.ch)
|
|
|| (sc.ch == 'v' && IsNumberChar(sc.chNext) && wordLast == "use"))) {
|
|
numState = RAKUNUM_DECIMAL; // default: decimal (base 10)
|
|
cntDecimal = 0;
|
|
sc.SetState(SCE_RAKU_NUMBER);
|
|
if (sc.ch == 'v') // forward past 'v'
|
|
sc.Forward();
|
|
if (wordLast == "use") { // package version number
|
|
numState = RAKUNUM_VERSION;
|
|
} else if (sc.ch == '0') { // other type of number
|
|
switch (sc.chNext) {
|
|
case 'b': // binary (base 2)
|
|
numState = RAKUNUM_BINARY;
|
|
break;
|
|
case 'o': // octal (base 8)
|
|
numState = RAKUNUM_OCTAL;
|
|
break;
|
|
case 'x': // hexadecimal (base 16)
|
|
numState = RAKUNUM_HEX;
|
|
}
|
|
if (numState != RAKUNUM_DECIMAL)
|
|
sc.Forward(); // forward to number type char
|
|
}
|
|
}
|
|
|
|
// --- Keywords / functions / types / barewords -------------------
|
|
else if ((sc.currentPos == 0 || sc.atLineStart || IsValidIdentPrecede(sc.chPrev))
|
|
&& IsWordStartChar(sc.ch)) {
|
|
len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s));
|
|
if (keywords.InList(s)) {
|
|
sc.SetState(SCE_RAKU_WORD); // Keywords
|
|
} else if(functions.InList(s)) {
|
|
sc.SetState(SCE_RAKU_FUNCTION); // Functions
|
|
} else if(typesBasic.InList(s)) {
|
|
sc.SetState(SCE_RAKU_TYPEDEF); // Types (basic)
|
|
} else if(typesComposite.InList(s)) {
|
|
sc.SetState(SCE_RAKU_TYPEDEF); // Types (composite)
|
|
} else if(typesDomainSpecific.InList(s)) {
|
|
sc.SetState(SCE_RAKU_TYPEDEF); // Types (domain-specific)
|
|
} else if(typesExceptions.InList(s)) {
|
|
sc.SetState(SCE_RAKU_TYPEDEF); // Types (exceptions)
|
|
} else {
|
|
if (wordLast == "class")
|
|
sc.SetState(SCE_RAKU_CLASS); // a Class ident
|
|
else if (wordLast == "grammar")
|
|
sc.SetState(SCE_RAKU_GRAMMAR); // a Grammar ident
|
|
else
|
|
sc.SetState(SCE_RAKU_IDENTIFIER); // Bareword
|
|
identLast = s; // save identifier
|
|
}
|
|
if (adverbLast == "sym") { // special adverb ":sym"
|
|
sc.SetState(SCE_RAKU_IDENTIFIER); // treat as identifier
|
|
identLast = s; // save identifier
|
|
}
|
|
if (sc.state != SCE_RAKU_IDENTIFIER)
|
|
wordLast = s; // save word
|
|
sc.Forward(len - 1); // ...forward past word
|
|
}
|
|
|
|
// --- Adverbs ----------------------------------------------------
|
|
else if (sc.ch == ':' && IsWordStartChar(sc.chNext)) {
|
|
len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s), 1);
|
|
if (adverbs.InList(s)) {
|
|
sc.SetState(SCE_RAKU_ADVERB); // Adverbs (begin with ':')
|
|
adverbLast = s; // save word
|
|
sc.Forward(len); // ...forward past word (less offset: 1)
|
|
}
|
|
}
|
|
|
|
// --- Identifiers: $mu / @positional / %associative / &callable --
|
|
// see: https://docs.raku.org/language/variables
|
|
else if (setSigil.Contains(sc.ch) && (setTwigil.Contains(sc.chNext)
|
|
|| setSpecialVar.Contains(sc.chNext)
|
|
|| IsWordStartChar(sc.chNext))) {
|
|
|
|
// State based on sigil
|
|
switch (sc.ch) {
|
|
case '$': sc.SetState(SCE_RAKU_MU);
|
|
break;
|
|
case '@': sc.SetState(SCE_RAKU_POSITIONAL);
|
|
break;
|
|
case '%': sc.SetState(SCE_RAKU_ASSOCIATIVE);
|
|
break;
|
|
case '&': sc.SetState(SCE_RAKU_CALLABLE);
|
|
}
|
|
const int state = sc.state;
|
|
sc.Forward();
|
|
char ch_delim = 0;
|
|
if (setSpecialVar.Contains(sc.ch)
|
|
&& !setWord.Contains(sc.chNext)) { // Process Special Var
|
|
ch_delim = -1;
|
|
} else if (setTwigil.Contains(sc.ch)) { // Process Twigil
|
|
sc.SetState(SCE_RAKU_OPERATOR);
|
|
if (sc.ch == '<' && setWord.Contains(sc.chNext))
|
|
ch_delim = '>';
|
|
sc.Forward();
|
|
sc.SetState(state);
|
|
}
|
|
|
|
// Process (any) identifier
|
|
if (ch_delim >= 0) {
|
|
sc.Forward(LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)) - 1);
|
|
if (ch_delim > 0 && sc.chNext == ch_delim) {
|
|
sc.Forward();
|
|
sc.SetState(SCE_RAKU_OPERATOR);
|
|
}
|
|
identLast = s; // save identifier
|
|
}
|
|
}
|
|
|
|
// --- Operators --------------------------------------------------
|
|
else if (IsOperatorChar(sc.ch)) {
|
|
// FIXME: better valid operator sequences needed?
|
|
sc.SetState(SCE_RAKU_OPERATOR);
|
|
}
|
|
|
|
// --- Heredoc: begin ---------------------------------------------
|
|
else if (!hereDelim.empty() && sc.atLineEnd) {
|
|
if (IsANewLine(sc.ch))
|
|
sc.Forward(); // skip a possible CRLF situation
|
|
sc.SetState(hereState);
|
|
}
|
|
|
|
// Reset words: on operator semi-colon OR '}' (end of statement)
|
|
if (sc.state == SCE_RAKU_OPERATOR && (sc.ch == ';' || sc.ch == '}')) {
|
|
wordLast.clear();
|
|
identLast.clear();
|
|
adverbLast.clear();
|
|
}
|
|
}
|
|
|
|
/* *** Determine if an "embedded comment" is to be entered ********** *
|
|
* This type of embedded comment section, or multi-line comment comes
|
|
* after a normal comment has begun... e.g: #`[ ... ]
|
|
* ****************************************************************** */
|
|
else if (sc.state == SCE_RAKU_COMMENTLINE && sc.chPrev == '#' && sc.ch == '`') {
|
|
if (IsBracketOpenChar(sc.chNext)) {
|
|
sc.Forward(); // Condition met for "embedded comment"
|
|
dpEmbeded.opener = sc.ch;
|
|
|
|
// Find the opposite (termination) closing bracket (if any)
|
|
dpEmbeded.closer[0] = GetBracketCloseChar(dpEmbeded.opener);
|
|
if (dpEmbeded.closer[0] > 0) { // Enter "embedded comment"
|
|
|
|
// Find multiple opening character occurrence
|
|
dpEmbeded.count = GetRepeatCharCount(sc, dpEmbeded.opener, lengthToEnd);
|
|
sc.SetState(SCE_RAKU_COMMENTEMBED);
|
|
sc.Forward(dpEmbeded.count - 1); // incremented in the next loop
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// And we're done...
|
|
sc.Complete();
|
|
}
|
|
|
|
/*
|
|
* LexerRaku::Lex
|
|
* - Main fold method
|
|
* NOTE: although Raku uses and supports UNICODE characters, we're only looking
|
|
* at normal chars here, using 'SafeGetCharAt' - for folding purposes
|
|
* that is all we need.
|
|
*/
|
|
#define RAKU_HEADFOLD_SHIFT 4
|
|
#define RAKU_HEADFOLD_MASK 0xF0
|
|
void SCI_METHOD LexerRaku::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
|
|
|
|
// init LexAccessor / return if fold option is off
|
|
if (!options.fold) return;
|
|
LexAccessor styler(pAccess);
|
|
|
|
// init char and line positions
|
|
const Sci_PositionU endPos = startPos + length;
|
|
Sci_Position lineCurrent = styler.GetLine(startPos);
|
|
|
|
// Backtrack to last SCE_RAKU_DEFAULT line
|
|
if (startPos > 0 && lineCurrent > 0) {
|
|
while (lineCurrent > 0 && styler.StyleAt(startPos) != SCE_RAKU_DEFAULT) {
|
|
lineCurrent--;
|
|
startPos = styler.LineStart(lineCurrent);
|
|
}
|
|
lineCurrent = styler.GetLine(startPos);
|
|
}
|
|
Sci_PositionU lineStart = startPos;
|
|
Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1);
|
|
|
|
// init line folding level
|
|
int levelPrev = SC_FOLDLEVELBASE;
|
|
if (lineCurrent > 0)
|
|
levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
|
|
int levelCurrent = levelPrev;
|
|
|
|
// init char and style variables
|
|
char chNext = styler[startPos];
|
|
int stylePrev = styler.StyleAt(startPos - 1);
|
|
int styleNext = styler.StyleAt(startPos);
|
|
int styleNextStartLine = styler.StyleAt(lineStartNext);
|
|
int visibleChars = 0;
|
|
bool wasCommentMulti = false;
|
|
|
|
// main loop
|
|
for (Sci_PositionU i = startPos; i < endPos; i++) {
|
|
|
|
// next char, style and flags
|
|
const char ch = chNext;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
const int style = styleNext;
|
|
styleNext = styler.StyleAt(i + 1);
|
|
const bool atEOL = i == (lineStartNext - 1);
|
|
const bool atLineStart = i == lineStart;
|
|
|
|
// --- Comments / Multi-line / POD ------------------------------------
|
|
if (options.foldComment) {
|
|
|
|
// Multi-line
|
|
if (options.foldCommentMultiline) {
|
|
if (style == SCE_RAKU_COMMENTLINE && atLineStart && ch == '#' && chNext == '`'
|
|
&& styleNextStartLine == SCE_RAKU_COMMENTEMBED) {
|
|
levelCurrent++;
|
|
wasCommentMulti = true; // don't confuse line comments
|
|
} else if (style == SCE_RAKU_COMMENTEMBED && atLineStart
|
|
&& styleNextStartLine != SCE_RAKU_COMMENTEMBED) {
|
|
levelCurrent--;
|
|
}
|
|
}
|
|
|
|
// Line comments
|
|
if (!wasCommentMulti && atEOL && stylePrev == SCE_RAKU_COMMENTLINE
|
|
&& IsCommentLine(lineCurrent, styler)) {
|
|
if (!IsCommentLine(lineCurrent - 1, styler)
|
|
&& IsCommentLine(lineCurrent + 1, styler))
|
|
levelCurrent++;
|
|
else if (IsCommentLine(lineCurrent - 1, styler)
|
|
&& !IsCommentLine(lineCurrent + 1, styler))
|
|
levelCurrent--;
|
|
}
|
|
|
|
// POD
|
|
if (options.foldCommentPOD && atLineStart && style == SCE_RAKU_POD) {
|
|
if (styler.Match(i, "=begin"))
|
|
levelCurrent++;
|
|
else if (styler.Match(i, "=end"))
|
|
levelCurrent--;
|
|
}
|
|
}
|
|
|
|
// --- Code block -----------------------------------------------------
|
|
if (style == SCE_RAKU_OPERATOR) {
|
|
if (ch == '{') {
|
|
if (levelCurrent < levelPrev) levelPrev--;
|
|
levelCurrent++;
|
|
} else if (ch == '}') {
|
|
levelCurrent--;
|
|
}
|
|
}
|
|
|
|
// --- at end of line / range / apply fold ----------------------------
|
|
if (atEOL) {
|
|
int level = levelPrev;
|
|
|
|
// set level flags
|
|
level |= levelCurrent << 16;
|
|
if (visibleChars == 0 && options.foldCompact)
|
|
level |= SC_FOLDLEVELWHITEFLAG;
|
|
if ((levelCurrent > levelPrev) && (visibleChars > 0))
|
|
level |= SC_FOLDLEVELHEADERFLAG;
|
|
if (level != styler.LevelAt(lineCurrent)) {
|
|
styler.SetLevel(lineCurrent, level);
|
|
}
|
|
lineCurrent++;
|
|
lineStart = lineStartNext;
|
|
lineStartNext = styler.LineStart(lineCurrent + 1);
|
|
styleNextStartLine = styler.StyleAt(lineStartNext);
|
|
levelPrev = levelCurrent;
|
|
visibleChars = 0;
|
|
wasCommentMulti = false;
|
|
}
|
|
|
|
// increment visibleChars / set previous char
|
|
if (!isspacechar(ch))
|
|
visibleChars++;
|
|
stylePrev = style;
|
|
}
|
|
|
|
// Done: set real level of the next line
|
|
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
|
|
styler.SetLevel(lineCurrent, levelPrev | flagsNext);
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*
|
|
* --- Scintilla: LexerModule ---
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
extern const LexerModule lmRaku(SCLEX_RAKU, LexerRaku::LexerFactoryRaku, "raku", rakuWordLists);
|