notepad-plus-plus/lexilla/lexers/LexRaku.cxx

/** @file LexRaku.cxx
 ** Lexer for Raku
 **
 ** Copyright (c) 2019 Mark Reay <mark@reay.net.au>
 **/
// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.

/*
 * Raku (Perl6) Lexer for Scintilla
 * ---------------------------------
 * ---------------------------------
 * 06-Dec-2019: More Unicode support:
 *              - Added a full scope of allowed numbers and letters
 * 29-Nov-2019: More  highlighting / implemented basic folding:
 *              - Operators (blanket cover, no sequence checking)
 *              - Class / Grammar name highlighting
 *              - Folding:
 *                - Comments: line / multi-line
 *                - POD sections
 *                - Code blocks {}
 * 26-Nov-2019: Basic syntax highlighting covering the following:
 *              - Comments, both line and embedded (multi-line)
 *              - POD, no inline highlighting as yet...
 *              - Heredoc block string, with variable highlighting (with qq)
 *              - Strings, with variable highlighting (with ")
 *              - Q Language, including adverbs (also basic q and qq)
 *              - Regex, including adverbs
 *              - Numbers
 *              - Bareword / identifiers
 *              - Types
 *              - Variables: mu, positional, associative, callable
 * TODO:
 *       - POD inline
 *       - Better operator sequence coverage
 */

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>

#include <string>
#include <string_view>
#include <vector>
#include <map>
#include <functional>

#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"

#include "WordList.h"
#include "LexAccessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "CharacterCategory.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "DefaultLexer.h"

using namespace Scintilla;
using namespace Lexilla;

namespace { // anonymous namespace to isolate any name clashes
/*----------------------------------------------------------------------------*
 * --- DEFINITIONS: OPTIONS / CONSTANTS ---
 *----------------------------------------------------------------------------*/

// Number types
#define RAKUNUM_BINARY		1	// order is significant: 1-3 cannot have a dot
#define RAKUNUM_OCTAL		2
#define RAKUNUM_FLOAT_EXP	3	// exponent part only
#define RAKUNUM_HEX			4	// may be a hex float
#define RAKUNUM_DECIMAL		5	// 1-5 are numbers; 6-7 are strings
#define RAKUNUM_VECTOR		6
#define RAKUNUM_V_VECTOR	7
#define RAKUNUM_VERSION		8	// can contain multiple '.'s
#define RAKUNUM_BAD			9

// Regex / Q string types
#define RAKUTYPE_REGEX_NORM		0	// 0 char ident
#define RAKUTYPE_REGEX_S		1	// order is significant:
#define RAKUTYPE_REGEX_M		2	// 1 char ident
#define RAKUTYPE_REGEX_Y		3	// 1 char ident
#define RAKUTYPE_REGEX			4	// > RAKUTYPE_REGEX == 2 char identifiers
#define RAKUTYPE_REGEX_RX		5	// 2 char ident
#define RAKUTYPE_REGEX_TR		6	// 2 char ident
#define RAKUTYPE_QLANG			7	// < RAKUTYPE_QLANG == RAKUTYPE_REGEX_?
#define RAKUTYPE_STR_WQ			8	// 0 char ident < word quote >
#define RAKUTYPE_STR_Q			9	// 1 char ident
#define RAKUTYPE_STR_QX			10	// 2 char ident
#define RAKUTYPE_STR_QW			11	// 2 char ident
#define RAKUTYPE_STR_QQ			12	// 2 char ident
#define RAKUTYPE_STR_QQX		13	// 3 char ident
#define RAKUTYPE_STR_QQW		14	// 3 char ident
#define RAKUTYPE_STR_QQWW		15	// 4 char ident

// Delimiter types
#define RAKUDELIM_BRACKET		0	// bracket: regex, Q language
#define RAKUDELIM_QUOTE			1	// quote: normal string

// rakuWordLists: keywords as defined in config
const char *const rakuWordLists[] = {
	"Keywords and identifiers",
	"Functions",
	"Types basic",
	"Types composite",
	"Types domain-specific",
	"Types exception",
	"Adverbs",
	nullptr,
};

// Options and defaults
struct OptionsRaku {
	bool fold;
	bool foldCompact;
	bool foldComment;
	bool foldCommentMultiline;
	bool foldCommentPOD;
	OptionsRaku() {
		fold					= true;
		foldCompact				= false;
		foldComment				= true;
		foldCommentMultiline	= true;
		foldCommentPOD			= true;
	}
};

// init options and words
struct OptionSetRaku : public OptionSet<OptionsRaku> {
	OptionSetRaku() {
		DefineProperty("fold",			&OptionsRaku::fold);
		DefineProperty("fold.comment",	&OptionsRaku::foldComment);
		DefineProperty("fold.compact",	&OptionsRaku::foldCompact);

		DefineProperty("fold.raku.comment.multiline",	&OptionsRaku::foldCommentMultiline,
			"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
		DefineProperty("fold.raku.comment.pod",			&OptionsRaku::foldCommentPOD,
			"Set this property to 0 to disable folding POD comments when fold.comment=1.");

		// init word lists
		DefineWordListSets(rakuWordLists);
	}
};

// Delimiter pair
struct DelimPair {
	int opener;		// opener char
	int closer[2];	// closer chars
	bool interpol;	// can variables be interpolated?
	short count;	// delimiter char count
	DelimPair() {
		opener = 0;
		closer[0] = 0;
		closer[1] = 0;
		interpol = false;
		count = 0;
	}
	bool isCloser(int ch) const {
		return ch == closer[0] || ch == closer[1];
	}
};

/*----------------------------------------------------------------------------*
 * --- FUNCTIONS ---
 *----------------------------------------------------------------------------*/

/*
 * IsANewLine
 * - returns true if this is a new line char
 */
constexpr bool IsANewLine(int ch) noexcept {
	return ch == '\r' || ch == '\n';
}

/*
 * IsAWhitespace
 * - returns true if this is a whitespace (or newline) char
 */
bool IsAWhitespace(int ch) noexcept {
	return IsASpaceOrTab(ch) || IsANewLine(ch);
}

/*
 * IsAlphabet
 * - returns true if this is an alphabetical char
 */
constexpr bool IsAlphabet(int ch) noexcept {
	return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
}

/*
 * IsCommentLine
 * - returns true if this is a comment line
 *   - tests: SCE_RAKU_COMMENTLINE or SCE_RAKU_COMMENTEMBED
 * modified from: LexPerl.cxx
 */
bool IsCommentLine(Sci_Position line, LexAccessor &styler, int type = SCE_RAKU_COMMENTLINE) {
	Sci_Position pos = styler.LineStart(line);
	Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
	for (Sci_Position i = pos; i < eol_pos; i++) {
		char ch = styler[i];
		int style = styler.StyleAt(i);
		if (type == SCE_RAKU_COMMENTEMBED) {
			if (i == (eol_pos - 1) && style == type)
				return true;
		} else { // make sure the line is NOT a SCE_RAKU_COMMENTEMBED
			if (ch == '#' && style == type && styler[i+1] != '`' )
				return true;
			else if (!IsASpaceOrTab(ch))
				return false;
		}
	}
	return false;
}

/*
 * ContainsQTo
 * - returns true if this range contains ":to" in style SCE_RAKU_ADVERB indicating the start
 * of a SCE_RAKU_HEREDOC_Q or SCE_RAKU_HEREDOC_QQ.
 */
bool ContainsQTo(Sci_Position start, Sci_Position end, LexAccessor &styler) {
	std::string adverb;
	for (Sci_Position i = start; i < end; i++) {
		if (styler.StyleAt(i) == SCE_RAKU_ADVERB) {
			adverb.push_back(styler[i]);
		}
	}
	return adverb.find(":to") != std::string::npos;
}

/*
 * GetBracketCloseChar
 * - returns the end bracket char: opposite of start
 *   - see: http://www.unicode.org/Public/5.1.0/ucd/BidiMirroring.txt (first section)
 * - Categories are general matches for valid BiDi types
 * - Most closer chars are opener + 1
 */
int GetBracketCloseChar(const int ch) noexcept {
	const CharacterCategory cc = CategoriseCharacter(ch);
	switch (cc) {
		case ccSm:
			switch (ch) {
				case 0x3C: return 0x3E; // LESS-THAN SIGN
				case 0x2208: return 0x220B; // ELEMENT OF
				case 0x2209: return 0x220C; // NOT AN ELEMENT OF
				case 0x220A: return 0x220D; // SMALL ELEMENT OF
				case 0x2215: return 0x29F5; // DIVISION SLASH
				case 0x2243: return 0x22CD; // ASYMPTOTICALLY EQUAL TO
				case 0x2298: return 0x29B8; // CIRCLED DIVISION SLASH
				case 0x22A6: return 0x2ADE; // ASSERTION
				case 0x22A8: return 0x2AE4; // TRUE
				case 0x22A9: return 0x2AE3; // FORCES
				case 0x22AB: return 0x2AE5; // DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
				case 0x22F2: return 0x22FA; // ELEMENT OF WITH LONG HORIZONTAL STROKE
				case 0x22F3: return 0x22FB; // ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
				case 0x22F4: return 0x22FC; // SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
				case 0x22F6: return 0x22FD; // ELEMENT OF WITH OVERBAR
				case 0x22F7: return 0x22FE; // SMALL ELEMENT OF WITH OVERBAR
				case 0xFF1C: return 0xFF1E; // FULLWIDTH LESS-THAN SIGN
			}
			break;
		case ccPs:
			switch (ch) {
				case 0x5B: return 0x5D; // LEFT SQUARE BRACKET
				case 0x7B: return 0x7D; // LEFT CURLY BRACKET
				case 0x298D: return 0x2990; // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
				case 0x298F: return 0x298E; // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
				case 0xFF3B: return 0xFF3D; // FULLWIDTH LEFT SQUARE BRACKET
				case 0xFF5B: return 0xFF5D; // FULLWIDTH LEFT CURLY BRACKET
			}
			break;
		case ccPi:
			break;
		default: return 0;
	}
	return ch + 1;
}

/*
 * IsValidQuoteOpener
 * -
 */
bool IsValidQuoteOpener(const int ch, DelimPair &dp, int type = RAKUDELIM_BRACKET) noexcept {
	dp.closer[0] = 0;
	dp.closer[1] = 0;
	dp.interpol = true;
	if (type == RAKUDELIM_QUOTE) {
		switch (ch) {
			//   Opener		Closer					Description
			case '\'':		dp.closer[0] = '\'';	// APOSTROPHE
				dp.interpol = false;
				break;
			case '"':		dp.closer[0] = '"';		// QUOTATION MARK
				break;
			case 0x2018:	dp.closer[0] = 0x2019;	// LEFT SINGLE QUOTATION MARK
				dp.interpol = false;
				break;
			case 0x201C:	dp.closer[0] = 0x201D;	// LEFT DOUBLE QUOTATION MARK
				break;
			case 0x201D:	dp.closer[0] = 0x201C;	// RIGHT DOUBLE QUOTATION MARK
				break;
			case 0x201E:	dp.closer[0] = 0x201C;	// DOUBLE LOW-9 QUOTATION MARK
							dp.closer[1] = 0x201D;
				break;
			case 0xFF62:	dp.closer[0] = 0xFF63;	// HALFWIDTH LEFT CORNER BRACKET
				dp.interpol = false;
				break;
			default:		return false;
		}
	} else if (type == RAKUDELIM_BRACKET) {
		dp.closer[0] = GetBracketCloseChar(ch);
	}
	dp.opener = ch;
	dp.count = 1;
	return dp.closer[0] > 0;
}

/*
 * IsBracketOpenChar
 * - true if this is a valid start bracket character
 */
bool IsBracketOpenChar(int ch) noexcept {
	return GetBracketCloseChar(ch) > 0;
}

/*
 * IsValidRegOrQAdjacent
 * - returns true if ch is a valid character to put directly after Q / q
 *   * ref: Q Language: https://docs.raku.org/language/quoting
 */
bool IsValidRegOrQAdjacent(int ch) noexcept {
	return !(IsAlphaNumeric(ch) || ch == '_' || ch == '(' || ch == ')' || ch == '\'' );
}

/*
 * IsValidRegOrQPrecede
 * - returns true if ch is a valid preceding character to put directly before Q / q
 *   * ref: Q Language: https://docs.raku.org/language/quoting
 */
bool IsValidRegOrQPrecede(int ch) noexcept {
	return !(IsAlphaNumeric(ch) || ch == '_');
}

/*
 * MatchCharInRange
 * - returns true if the mach character is found in range (of length)
 * - ignoreDelim (default false)
 */
bool MatchCharInRange(StyleContext &sc, const Sci_Position length,
		const int match, bool ignoreDelim = false) {
	Sci_Position len = 0;
	int chPrev = sc.chPrev;
	while (++len < length) {
		const int ch = sc.GetRelativeCharacter(len);
		if (ch == match && (ignoreDelim || chPrev != '\\'))
			return true;
	}
	return false;
}

/*
 * PrevNonWhitespaceChar
 * - returns the last non-whitespace char
 */
int PrevNonWhitespaceChar(StyleContext &sc) {
	Sci_Position rel = 0;
	Sci_Position max_back = 0 - sc.currentPos;
	while (--rel > max_back) {
		const int ch = sc.GetRelativeCharacter(rel);
		if (!IsAWhitespace(ch))
			return ch;
	}
	return 0; // no matching char
}

/*
 * IsQLangStartAtScPos
 * - returns true if this is a valid Q Language sc position
 *   - ref: https://docs.raku.org/language/quoting
 *   - Q :adverb :adverb //;
 *   - q,qx,qw,qq,qqx,qqw,qqww :adverb /:adverb /;
 */
bool IsQLangStartAtScPos(StyleContext &sc, int &type, const Sci_Position length) {
	const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
	const int chFw2 = sc.GetRelativeCharacter(2);
	const int chFw3 = sc.GetRelativeCharacter(3);
	type = -1;
	if (IsValidRegOrQPrecede(sc.chPrev)) {
		if (sc.ch == 'Q' && valid_adj) {
			type = RAKUTYPE_QLANG;
		} else if (sc.ch == 'q') {
			switch (sc.chNext) {
				case 'x':
					type = RAKUTYPE_STR_QX;
					break;
				case 'w':
					type = RAKUTYPE_STR_QW;
					break;
				case 'q':
					if (chFw2 == 'x') {
						type = RAKUTYPE_STR_QQX;
					} else if (chFw2 == 'w') {
						if (chFw3 == 'w') {
							type = RAKUTYPE_STR_QQWW;
						} else {
							type = RAKUTYPE_STR_QQW;
						}
					} else {
						type = RAKUTYPE_STR_QQ;
					}
					break;
				default:
					type = RAKUTYPE_STR_Q;
			}
		} else if (sc.ch == '<' && MatchCharInRange(sc, length, '>')) {
			type = RAKUTYPE_STR_WQ; // < word quote >
		}
	}
	return type >= 0;
}

/*
 * IsRegexStartAtScPos
 * - returns true if this is a valid Regex sc position
 *   - ref: https://docs.raku.org/language/regexes
 *   - Regex: (rx/s/m/tr/y) :adverb /:adverb /;
 *   -              regex R :adverb //;
 *   -                     /:adverb /;
 */
bool IsRegexStartAtScPos(StyleContext &sc, int &type, CharacterSet &set) {
	const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
	type = -1;
	if (IsValidRegOrQPrecede(sc.chPrev)) {
		switch (sc.ch) {
			case 'r':
				if (sc.chNext == 'x')
					type = RAKUTYPE_REGEX_RX;
				break;
			case 't':
			case 'T':
				if (sc.chNext == 'r' || sc.chNext == 'R')
					type = RAKUTYPE_REGEX_TR;
				break;
			case 'm':
				if (valid_adj)
					type = RAKUTYPE_REGEX_M;
				break;
			case 's':
			case 'S':
				if (valid_adj)
					type = RAKUTYPE_REGEX_S;
				break;
			case 'y':
				if (valid_adj)
					type = RAKUTYPE_REGEX_Y;
				break;
			case '/':
				if (set.Contains(PrevNonWhitespaceChar(sc)))
					type = RAKUTYPE_REGEX_NORM;
		}
	}
	return type >= 0;
}

/*
 * IsValidIdentPrecede
 * - returns if ch is a valid preceding char to put directly before an identifier
 */
bool IsValidIdentPrecede(int ch) noexcept {
	return !(IsAlphaNumeric(ch) || ch == '_' || ch == '@' || ch == '$' || ch == '%');
}

/*
 * IsValidDelimiter
 * - returns if ch is a valid delimiter (most chars are valid)
 *   * ref: Q Language: https://docs.raku.org/language/quoting
 */
bool IsValidDelimiter(int ch) noexcept {
	return !(IsAlphaNumeric(ch) || ch == ':');
}

/*
 * GetDelimiterCloseChar
 * - returns the corresponding close char for a given delimiter (could be the same char)
 */
int GetDelimiterCloseChar(int ch) noexcept {
	int ch_end = GetBracketCloseChar(ch);
	if (ch_end == 0 && IsValidDelimiter(ch)) {
		ch_end = ch;
	}
	return ch_end;
}

/*
 * GetRepeatCharCount
 * - returns the occurrence count of match
 */
Sci_Position GetRepeatCharCount(StyleContext &sc, int chMatch, Sci_Position length) {
	Sci_Position cnt = 0;
	while (cnt < length) {
		if (sc.GetRelativeCharacter(cnt) != chMatch) {
			break;
		}
		cnt++;
	}
	return cnt;
}

/*
 * LengthToDelimiter
 * - returns the length until the end of a delimited string section
 *   - Ignores nested delimiters (if opener != closer)
 *   - no trailing char after last closer (default false)
 */
Sci_Position LengthToDelimiter(StyleContext &sc, const DelimPair &dp,
		Sci_Position length, bool noTrailing = false) {
	short cnt_open = 0;			// count open bracket
	short cnt_close = 0;		// count close bracket
	bool is_escape = false;		// has been escaped using '\'?
	Sci_Position len = 0;		// count characters
	int chOpener = dp.opener;	// look for nested opener / closer
	if (dp.opener == dp.closer[0])
		chOpener = 0;			// no opening delimiter (no nesting possible)

	while (len < length) {
		const int chPrev = sc.GetRelativeCharacter(len - 1);
		const int ch = sc.GetRelativeCharacter(len);
		const int chNext = sc.GetRelativeCharacter(len+1);

		if (cnt_open == 0 && cnt_close == dp.count) {
			return len;				// end condition has been met
		} else if (is_escape) {
			is_escape = false;
		} else if (ch == '\\') {
			is_escape = true;
		} else {
			if (ch == chOpener) {
				cnt_open++;			// open nested bracket
			} else if (dp.isCloser(ch)) {
				if ( cnt_open > 0 ) {
					cnt_open--;		// close nested bracket
				} else if (dp.count > 1 && cnt_close < (dp.count - 1)) {
					if (cnt_close > 1) {
						if (dp.isCloser(chPrev)) {
							cnt_close++;
						} else {	// reset if previous char was not close
							cnt_close = 0;
						}
					} else {
						cnt_close++;
					}
				} else if (!noTrailing || (IsAWhitespace(chNext))) {
					cnt_close++;		// found last close
					if (cnt_close > 1 && !dp.isCloser(chPrev)) {
						cnt_close = 0;	// reset if previous char was not close
					}
				} else {
					cnt_close = 0;		// non handled close: reset
				}
			} else if (IsANewLine(ch)) {
				cnt_open = 0;			// reset after each line
				cnt_close = 0;
			}
		}
		len++;
	}
	return -1; // end condition has NOT been met
}

/*
 * LengthToEndHeredoc
 * - returns the length until the end of a heredoc section
 *   - delimiter string MUST begin on a new line
 */
Sci_Position LengthToEndHeredoc(const StyleContext &sc, LexAccessor &styler,
		const Sci_Position length, const char *delim) {
	bool on_new_ln = false;
	int i = 0; // str index
	for (int n = 0; n < length; n++) {
		const char ch = styler.SafeGetCharAt(sc.currentPos + n, 0);
		if (on_new_ln) {
			if (delim[i] == '\0')
				return n;	// at end of str, match found!
			if (ch != delim[i++])
				i = 0;		// no char match, reset 'i'ndex
		}
		if (i == 0)			// detect new line
			on_new_ln = IsANewLine(ch);
	}
	return -1;				// no match found
}

/*
 * LengthToNextChar
 * - returns the length until the next character
 */
Sci_Position LengthToNextChar(StyleContext &sc, const Sci_Position length) {
	Sci_Position len = 0;
	while (++len < length) {
		const int ch = sc.GetRelativeCharacter(len);
		if (!IsASpaceOrTab(ch) && !IsANewLine(ch)) {
			break;
		}
	}
	return len;
}

/*
 * GetRelativeString
 * - gets a relative string and sets it in &str
 *   - resets string before setting
 */
void GetRelativeString(StyleContext &sc, Sci_Position offset, Sci_Position length,
		std::string &str) {
	Sci_Position pos = offset;
	str.clear();
	while (pos < length) {
		str += sc.GetRelativeCharacter(pos++);
	}
}

} // end anonymous namespace

/*----------------------------------------------------------------------------*
 * --- class: LexerRaku ---
 *----------------------------------------------------------------------------*/
//class LexerRaku : public ILexerWithMetaData {
class LexerRaku : public DefaultLexer {
	CharacterSet setWord;
	CharacterSet setSigil;
	CharacterSet setTwigil;
	CharacterSet setOperator;
	CharacterSet setSpecialVar;
	WordList regexIdent;			// identifiers that specify a regex
	OptionsRaku options;			// Options from config
	OptionSetRaku osRaku;
	WordList keywords;				// Word Lists from config
	WordList functions;
	WordList typesBasic;
	WordList typesComposite;
	WordList typesDomainSpecific;
	WordList typesExceptions;
	WordList adverbs;

public:
	// Defined as explicit, so that constructor can not be copied
	explicit LexerRaku() :
		DefaultLexer("raku", SCLEX_RAKU),
		setWord(CharacterSet::setAlphaNum, "-_", 0x80),
		setSigil(CharacterSet::setNone, "$&%@"),
		setTwigil(CharacterSet::setNone, "!*.:<=?^~"),
		setOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;<>,?!.~"),
		setSpecialVar(CharacterSet::setNone, "_/!") {
		regexIdent.Set("regex rule token");
	}
	// Deleted so LexerRaku objects can not be copied.
	LexerRaku(const LexerRaku &) = delete;
	LexerRaku(LexerRaku &&) = delete;
	void operator=(const LexerRaku &) = delete;
	void operator=(LexerRaku &&) = delete;
	virtual ~LexerRaku() {
	}
	void SCI_METHOD Release() noexcept override {
		delete this;
	}
	int SCI_METHOD Version() const noexcept override {
		return lvRelease5;
	}
	const char *SCI_METHOD PropertyNames() override {
		return osRaku.PropertyNames();
	}
	int SCI_METHOD PropertyType(const char *name) override {
		return osRaku.PropertyType(name);
	}
	const char *SCI_METHOD DescribeProperty(const char *name) override {
		return osRaku.DescribeProperty(name);
	}
	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
	const char *SCI_METHOD PropertyGet(const char *key) override {
		return osRaku.PropertyGet(key);
	}
	const char *SCI_METHOD DescribeWordListSets() override {
		return osRaku.DescribeWordListSets();
	}
	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;

	static ILexer5 *LexerFactoryRaku() {
		return new LexerRaku();
	}

protected:
	bool IsOperatorChar(const int ch);
	bool IsWordChar(const int ch, bool allowNumber = true);
	bool IsWordStartChar(const int ch);
	bool IsNumberChar(const int ch, int base = 10);
	bool ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
		int &type, const DelimPair &dp);
	void ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState);
	bool ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
		WordList &wordsAdverbs, DelimPair &dp);
	Sci_Position LengthToNonWordChar(StyleContext &sc, Sci_Position length,
		char *s, const int size, Sci_Position offset = 0);
};

/*----------------------------------------------------------------------------*
 * --- METHODS: LexerRaku ---
 *----------------------------------------------------------------------------*/

/*
 * LexerRaku::IsOperatorChar
 * - Test for both ASCII and Unicode operators
 *   see: https://docs.raku.org/language/unicode_entry
 */
bool LexerRaku::IsOperatorChar(const int ch) {
	if (ch > 0x7F) {
		switch (ch) {
			//   Unicode	ASCII Equiv.
			case 0x2208:	// (elem)
			case 0x2209:	// !(elem)
			case 0x220B:	// (cont)
			case 0x220C:	// !(cont)
			case 0x2216:	// (-)
			case 0x2229:	// (&)
			case 0x222A:	// (|)
			case 0x2282:	// (<)
			case 0x2283:	// (>)
			case 0x2284:	// !(<)
			case 0x2285:	// !(>)
			case 0x2286:	// (<=)
			case 0x2287:	// (>=)
			case 0x2288:	// !(<=)
			case 0x2289:	// !(>=)
			case 0x228D:	// (.)
			case 0x228E:	// (+)
			case 0x2296:	// (^)
				return true;
		}
	}
	return setOperator.Contains(ch);
}

/*
 * LexerRaku::IsWordChar
 * - Test for both ASCII and Unicode identifier characters
 *   see: https://docs.raku.org/language/unicode_ascii
 *   also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
 *   FIXME: *still* may not contain all valid characters
 */
bool LexerRaku::IsWordChar(const int ch, bool allowNumber) {
	// Unicode numbers should not appear in word identifiers
	if (ch > 0x7F) {
		const CharacterCategory cc = CategoriseCharacter(ch);
		switch (cc) {
			// Letters
			case ccLu:
			case ccLl:
			case ccLt:
			case ccLm:
			case ccLo:
				return true;
			default:
				return false;
		}
	} else if (allowNumber && IsADigit(ch)) {
		return true; // an ASCII number type
	}
	return setWord.Contains(ch);
}

/*
 * LexerRaku::IsWordStartChar
 * - Test for both ASCII and Unicode identifier "start / first" characters
 */
bool LexerRaku::IsWordStartChar(const int ch) {
	return ch != '-' && IsWordChar(ch, false); // no numbers allowed
}

/*
 * LexerRaku::IsNumberChar
 * - Test for both ASCII and Unicode identifier number characters
 *   see: https://docs.raku.org/language/unicode_ascii
 *   also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
 *   FILTERED by Unicode letters that are NUMBER
 *     and NOT PARENTHESIZED or CIRCLED
 *   FIXME: *still* may not contain all valid number characters
 */
bool LexerRaku::IsNumberChar(const int ch, int base) {
	if (ch > 0x7F) {
		const CharacterCategory cc = CategoriseCharacter(ch);
		switch (cc) {
			// Numbers
			case ccNd:
			case ccNl:
			case ccNo:
				return true;
			default:
				return false;
		}
	}
	return IsADigit(ch, base);
}

/*
 * LexerRaku::PropertySet
 * -
 */
Sci_Position SCI_METHOD LexerRaku::PropertySet(const char *key, const char *val) {
	if (osRaku.PropertySet(&options, key, val))
		return 0;
	return -1;
}

/*
 * LexerRaku::WordListSet
 * -
 */
Sci_Position SCI_METHOD LexerRaku::WordListSet(int n, const char *wl) {
	WordList *wordListN = nullptr;
	switch (n) {
		case 0:
			wordListN = &keywords;
			break;
		case 1:
			wordListN = &functions;
			break;
		case 2:
			wordListN = &typesBasic;
			break;
		case 3:
			wordListN = &typesComposite;
			break;
		case 4:
			wordListN = &typesDomainSpecific;
			break;
		case 5:
			wordListN = &typesExceptions;
			break;
		case 6:
			wordListN = &adverbs;
			break;
	}
	Sci_Position firstModification = -1;
	if (wordListN) {
		if (wordListN->Set(wl)) {
			firstModification = 0;
		}
	}
	return firstModification;
}

/*
 * LexerRaku::ProcessRegexTwinCapture
 * - processes the transition between a regex pair (two sets of delimiters)
 * - moves to first new delimiter, if a bracket
 * - returns true when valid delimiter start found (if bracket)
 */
bool LexerRaku::ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
		int &type, const DelimPair &dp) {

	if (type == RAKUTYPE_REGEX_S || type == RAKUTYPE_REGEX_TR || type == RAKUTYPE_REGEX_Y) {
		type = -1; // clear type

		// move past chRegQClose if it was the previous char
		if (dp.isCloser(sc.chPrev))
			sc.Forward();

		// no processing needed for non-bracket
		if (dp.isCloser(dp.opener))
			return true;

		// move to next opening bracket
		const Sci_Position len = LengthToNextChar(sc, length);
		if (sc.GetRelativeCharacter(len) == dp.opener) {
			sc.Forward(len);
			return true;
		}
	}
	return false;
}

/*
 * LexerRaku::ProcessStringVars
 * - processes a string and highlights any valid variables
 */
void LexerRaku::ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState) {
	const int state = sc.state;
	for (Sci_Position pos = 0; pos < length; pos++) {
		if (sc.state == varState && !IsWordChar(sc.ch)) {
			sc.SetState(state);
		} else if (sc.chPrev != '\\'
				&& (sc.ch == '$' || sc.ch == '@')
				&& IsWordStartChar(sc.chNext)) {
			sc.SetState(varState);
		}
		sc.Forward(); // Next character
	}
}
/*
 * LexerRaku::ProcessValidRegQlangStart
 * - processes a section of the document range from after a Regex / Q delimiter
 * - returns true on success
 *   - sets: adverbs, chOpen, chClose, chCount
 *  ref: https://docs.raku.org/language/regexes
 */
bool LexerRaku::ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
		WordList &wordsAdverbs, DelimPair &dp) {
	Sci_Position startPos = sc.currentPos;
	Sci_Position startLen = length;
	const int target_state = sc.state;
	int state = SCE_RAKU_DEFAULT;
	std::string str;

	// find our opening delimiter (and occurrences) / save any adverbs
	dp.opener = 0;					// adverbs can be after the first delimiter
	bool got_all_adverbs = false;	// in Regex statements
	bool got_ident = false;			// regex can have an identifier: 'regex R'
	sc.SetState(state);				// set state default to avoid pre-highlights
	while ((dp.opener == 0 || !got_all_adverbs) && sc.More()) {

		// move to the next non-space character
		const bool was_space = IsAWhitespace(sc.ch);
		if (!got_all_adverbs && was_space) {
			sc.Forward(LengthToNextChar(sc, length));
		}
		length = startLen - (sc.currentPos - startPos); // update length remaining

		// parse / eat an identifier (if type == RAKUTYPE_REGEX)
		if (dp.opener == 0 && !got_ident && type == RAKUTYPE_REGEX && IsAlphabet(sc.ch)) {

			// eat identifier / account for special adverb :sym<name>
			bool got_sym = false;
			while (sc.More()) {
				sc.SetState(SCE_RAKU_IDENTIFIER);
				while (sc.More() && (IsAlphaNumeric(sc.chNext)
						|| sc.chNext == '_' || sc.chNext == '-')) {
					sc.Forward();
				}
				sc.Forward();
				if (got_sym && sc.ch == '>') {
					sc.SetState(SCE_RAKU_OPERATOR);	// '>'
					sc.Forward();
					break;
				} else if (type == RAKUTYPE_REGEX && sc.Match(":sym<")) {
					sc.SetState(SCE_RAKU_ADVERB);	// ':sym'
					sc.Forward(4);
					sc.SetState(SCE_RAKU_OPERATOR);	// '<'
					sc.Forward();
					got_sym = true;
				} else {
					break;
				}
			}
			sc.SetState(state);
			got_ident = true;
		}

		// parse / save an adverb: RAKUTYPE_REGEX only has adverbs after delim
		//                      >= RAKUTYPE_QLANG only has adverbs before delim
		else if (!got_all_adverbs && sc.ch == ':' && (!(dp.opener == 0 && got_ident)
				&& !(dp.opener > 0 && type >= RAKUTYPE_QLANG))) {
			sc.SetState(SCE_RAKU_ADVERB);
			while (IsAlphaNumeric(sc.chNext) && sc.More()) {
				sc.Forward();
				str += sc.ch;
			}
			str += ' ';
			sc.Forward();
			sc.SetState(state);
		}

		// find starting delimiter
		else if (dp.opener == 0 && (was_space || IsValidRegOrQAdjacent(sc.ch))
				&& IsValidDelimiter(sc.ch)) {	// make sure the delimiter is legal (most are)
			sc.SetState((state = target_state));// start state here...
			dp.opener = sc.ch;					// this is our delimiter, get count
			if (type < RAKUTYPE_QLANG)			// type is Regex
				dp.count = 1;					// has only one delimiter
			else
				dp.count = GetRepeatCharCount(sc, dp.opener, length);
			sc.Forward(dp.count);
		}

		// we must have all the adverbs by now...
		else {
			if (got_all_adverbs)
				break; // prevent infinite loop: occurs on missing open char
			got_all_adverbs = true;
		}
	}

	// set word list / find a valid closing delimiter (or bomb!)
	wordsAdverbs.Set(str.c_str());
	dp.closer[0] = GetDelimiterCloseChar(dp.opener);
	dp.closer[1] = 0; // no other closer char
	return dp.closer[0] > 0;
}

/*
 * LexerRaku::LengthToNonWordChar
 * - returns the length until the next non "word" character: AlphaNum + '_'
 *   - also sets all the parsed chars in 's'
 */
Sci_Position LexerRaku::LengthToNonWordChar(StyleContext &sc, Sci_Position length,
		char *s, const int size, Sci_Position offset) {
	Sci_Position len = 0;
	Sci_Position max_length = size < length ? size : length;
	while (len <= max_length) {
		const int ch = sc.GetRelativeCharacter(len + offset);
		if (!IsWordChar(ch)) {
			s[len] = '\0';
			break;
		}
		s[len] = ch;
		len++;
	}
	s[len + 1] = '\0';
	return len;
}

/*
 * LexerRaku::Lex
 * - Main lexer method
 */
void SCI_METHOD LexerRaku::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
	LexAccessor styler(pAccess);
	DelimPair dpEmbeded;			// delimiter pair: embedded comments
	DelimPair dpString;				// delimiter pair: string
	DelimPair dpRegQ;				// delimiter pair: Regex / Q Lang
	std::string hereDelim;			// heredoc delimiter (if in heredoc)
	int hereState = 0;				// heredoc state to use (Q / QQ)
	int numState = 0;				// number state / type
	short cntDecimal = 0;			// number decimal count
	std::string wordLast;			// last word seen
	std::string identLast;			// last identifier seen
	std::string adverbLast;			// last (single) adverb seen
	WordList lastAdverbs;			// last adverbs seen
	Sci_Position len;				// temp length value
	char s[100];					// temp char string
	int typeDetect = -1;			// temp type detected (for regex and Q lang)
	Sci_Position lengthToEnd;		// length until the end of range

	// Backtrack to safe start position before complex quoted elements

	Sci_PositionU newStartPos = startPos;
	if (initStyle != SCE_RAKU_DEFAULT) {
		// Backtrack to last SCE_RAKU_DEFAULT or 0
		while (newStartPos > 0) {
			newStartPos--;
			if (styler.StyleAt(newStartPos) == SCE_RAKU_DEFAULT)
				break;
		}
		// Backtrack to start of line before SCE_RAKU_HEREDOC_Q?
		if (initStyle == SCE_RAKU_HEREDOC_Q || initStyle == SCE_RAKU_HEREDOC_QQ) {
			if (newStartPos > 0) {
				newStartPos = styler.LineStart(styler.GetLine(newStartPos));
			}
		}
	} else {
		const Sci_Position line = styler.GetLine(newStartPos);
		if (line > 0) {
			// If the previous line is a start of a q or qq heredoc, backtrack to start of line
			const Sci_Position startPreviousLine = styler.LineStart(line-1);
			if (ContainsQTo(startPreviousLine, newStartPos, styler)) {
				newStartPos = startPreviousLine;
			}
		}
	}


	// Re-calculate (any) changed startPos, length and initStyle state
	if (newStartPos < startPos) {
		initStyle = SCE_RAKU_DEFAULT;
		length += startPos - newStartPos;
		startPos = newStartPos;
	}

	// init StyleContext
	StyleContext sc(startPos, length, initStyle, styler);

	// StyleContext Loop
	for (; sc.More(); sc.Forward()) {
		lengthToEnd = (length - (sc.currentPos - startPos)); // end of range

		/* *** Determine if the current state should terminate ************** *
		 * Everything within the 'switch' statement processes characters up
		 * until the end of a syntax highlight section / state.
		 * ****************************************************************** */
		switch (sc.state) {
			case SCE_RAKU_OPERATOR:
				sc.SetState(SCE_RAKU_DEFAULT);
				break; // FIXME: better valid operator sequences needed?
			case SCE_RAKU_COMMENTLINE:
				if (IsANewLine(sc.ch)) {
					sc.SetState(SCE_RAKU_DEFAULT);
				}
				break;
			case SCE_RAKU_COMMENTEMBED:
				if ((len = LengthToDelimiter(sc, dpEmbeded, lengthToEnd)) >= 0) {
					sc.Forward(len);			// Move to end delimiter
					sc.SetState(SCE_RAKU_DEFAULT);
				} else {
					sc.Forward(lengthToEnd);	// no end delimiter found
				}
				break;
			case SCE_RAKU_POD:
				if (sc.atLineStart && sc.Match("=end pod")) {
					sc.Forward(8);
					sc.SetState(SCE_RAKU_DEFAULT);
				}
				break;
			case SCE_RAKU_STRING:

				// Process the string for variables: move to end delimiter
				if ((len = LengthToDelimiter(sc, dpString, lengthToEnd)) >= 0) {
					if (dpString.interpol) {
						ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
					} else {
						sc.Forward(len);
					}
					sc.SetState(SCE_RAKU_DEFAULT);
				} else {
					sc.Forward(lengthToEnd);	// no end delimiter found
				}
				break;
			case SCE_RAKU_STRING_Q:
			case SCE_RAKU_STRING_QQ:
			case SCE_RAKU_STRING_Q_LANG:

				// No string: previous char was the delimiter
				if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) {
					sc.SetState(SCE_RAKU_DEFAULT);
				}

				// Process the string for variables: move to end delimiter
				else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) {

					// set (any) heredoc delimiter string
					if (lastAdverbs.InList("to")) {
						GetRelativeString(sc, -1, len - dpRegQ.count, hereDelim);
						hereState = SCE_RAKU_HEREDOC_Q; // default heredoc state
					}

					// select variable identifiers
					if (sc.state == SCE_RAKU_STRING_QQ || lastAdverbs.InList("qq")) {
						ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
						hereState = SCE_RAKU_HEREDOC_QQ; // potential heredoc state
					} else {
						sc.Forward(len);
					}
					sc.SetState(SCE_RAKU_DEFAULT);
				} else {
					sc.Forward(lengthToEnd);	// no end delimiter found
				}
				break;
			case SCE_RAKU_HEREDOC_Q:
			case SCE_RAKU_HEREDOC_QQ:
				if ((len = LengthToEndHeredoc(sc, styler, lengthToEnd, hereDelim.c_str())) >= 0) {
					// select variable identifiers
					if (sc.state == SCE_RAKU_HEREDOC_QQ) {
						ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
					} else {
						sc.Forward(len);
					}
					sc.SetState(SCE_RAKU_DEFAULT);
				} else {
					sc.Forward(lengthToEnd);	// no end delimiter found
				}
				hereDelim.clear();				// clear heredoc delimiter
				break;
			case SCE_RAKU_REGEX:
				// account for typeDetect = RAKUTYPE_REGEX_S/TR/Y
				while (sc.state == SCE_RAKU_REGEX) {

					// No string: previous char was the delimiter
					if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) {
						if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
							continue;
						sc.SetState(SCE_RAKU_DEFAULT);
						break;
					}

					// Process the string for variables: move to end delimiter
					else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) {
						ProcessStringVars(sc, len, SCE_RAKU_REGEX_VAR);
						if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
							continue;
						sc.SetState(SCE_RAKU_DEFAULT);
						break;
					} else {
						sc.Forward(lengthToEnd); // no end delimiter found
						break;
					}
				}
				break;
			case SCE_RAKU_NUMBER:
				if (sc.ch == '.') {
					if (sc.chNext == '.') {		// '..' is an operator
						sc.SetState(SCE_RAKU_OPERATOR);
						sc.Forward();
						if (sc.chNext == '.')	// '...' is also an operator
							sc.Forward();
						break;
					} else if (numState > RAKUNUM_FLOAT_EXP
							&& (cntDecimal < 1 || numState == RAKUNUM_VERSION)) {
						cntDecimal++;
						sc.Forward();
					} else {
						sc.SetState(SCE_RAKU_DEFAULT);
						break; // too many decimal places
					}
				}
				switch (numState) {
					case RAKUNUM_BINARY:
						if (!IsNumberChar(sc.ch, 2))
							sc.SetState(SCE_RAKU_DEFAULT);
						break;
					case RAKUNUM_OCTAL:
						if (!IsNumberChar(sc.ch, 8))
							sc.SetState(SCE_RAKU_DEFAULT);
						break;
					case RAKUNUM_HEX:
						if (!IsNumberChar(sc.ch, 16))
							sc.SetState(SCE_RAKU_DEFAULT);
						break;
					case RAKUNUM_DECIMAL:
					case RAKUNUM_VERSION:
						if (!IsNumberChar(sc.ch))
							sc.SetState(SCE_RAKU_DEFAULT);
				}
				break;
			case SCE_RAKU_WORD:
			case SCE_RAKU_FUNCTION:
			case SCE_RAKU_TYPEDEF:
			case SCE_RAKU_ADVERB:
				sc.SetState(SCE_RAKU_DEFAULT);
				break;
			case SCE_RAKU_MU:
			case SCE_RAKU_POSITIONAL:
			case SCE_RAKU_ASSOCIATIVE:
			case SCE_RAKU_CALLABLE:
			case SCE_RAKU_IDENTIFIER:
			case SCE_RAKU_GRAMMAR:
			case SCE_RAKU_CLASS:
				sc.SetState(SCE_RAKU_DEFAULT);
				break;
		}

		/* *** Determine if a new state should be entered ******************* *
		 * Everything below here identifies the beginning of a state, all or part
		 * of the characters within this state are processed here, the rest are
		 * completed above in the terminate state section.
		 * ****************************************************************** */
		if (sc.state == SCE_RAKU_DEFAULT) {

			// --- Single line comment
			if (sc.ch == '#') {
				sc.SetState(SCE_RAKU_COMMENTLINE);
			}

			// --- POD block
			else if (sc.atLineStart && sc.Match("=begin pod")) {
				sc.SetState(SCE_RAKU_POD);
				sc.Forward(10);
			}

			// --- String (normal)
			else if (sc.chPrev != '\\' && (IsValidQuoteOpener(sc.ch, dpString, RAKUDELIM_QUOTE))) {
				sc.SetState(SCE_RAKU_STRING);
			}

			// --- String (Q Language) ----------------------------------------
			//   - https://docs.raku.org/language/quoting
			//   - Q :adverb :adverb //;
			//   - q,qx,qw,qq,qqx,qqw,qqww :adverb :adverb //;
			else if (IsQLangStartAtScPos(sc, typeDetect, lengthToEnd)) {
				int state = SCE_RAKU_STRING_Q_LANG;
				Sci_Position forward = 1;	// single char ident (default)
				if (typeDetect > RAKUTYPE_QLANG) {
					state = SCE_RAKU_STRING_Q;
					if (typeDetect == RAKUTYPE_STR_WQ)
						forward = 0;		// no char ident
				}
				if (typeDetect > RAKUTYPE_STR_Q) {
					if (typeDetect == RAKUTYPE_STR_QQ)
						state = SCE_RAKU_STRING_QQ;
					forward++;				// two char ident
				}
				if (typeDetect > RAKUTYPE_STR_QQ)
					forward++;				// three char ident
				if (typeDetect == RAKUTYPE_STR_QQWW)
					forward++;				// four char ident

				// Proceed: check for a valid character after statement
				if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_QLANG) {
					sc.SetState(state);
					sc.Forward(forward);
					lastAdverbs.Clear();

					// Process: adverbs / opening delimiter / adverbs after delim
					if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
							lastAdverbs, dpRegQ))
						sc.SetState(state);
				}
			}

			// --- Regex (rx/s/m/tr/y) ----------------------------------------
			//   - https://docs.raku.org/language/regexes
			else if ((IsRegexStartAtScPos(sc, typeDetect, setOperator) || regexIdent.InList(wordLast.c_str()))) {
				if (typeDetect == -1) { // must be a regex identifier word
					wordLast.clear();
					typeDetect = RAKUTYPE_REGEX;
				}
				Sci_Position forward = 0;	// no ident (RAKUTYPE_REGEX, RAKUTYPE_REGEX_NORM)
				if (typeDetect > 0 && typeDetect != RAKUTYPE_REGEX)
					forward++;				// single char ident
				if (typeDetect > RAKUTYPE_REGEX)
					forward++;				// two char ident

				// Proceed: check for a valid character after statement
				if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_REGEX_NORM) {
					sc.SetState(SCE_RAKU_REGEX);
					sc.Forward(forward);
					lastAdverbs.Clear();

					// Process: adverbs / opening delimiter / adverbs after delim
					if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
							lastAdverbs, dpRegQ))
						sc.SetState(SCE_RAKU_REGEX);
				}
			}

			// --- Numbers ----------------------------------------------------
			else if (IsValidIdentPrecede(sc.chPrev) && (IsNumberChar(sc.ch)
					|| (sc.ch == 'v' && IsNumberChar(sc.chNext) && wordLast == "use"))) {
				numState = RAKUNUM_DECIMAL;	// default: decimal (base 10)
				cntDecimal = 0;
				sc.SetState(SCE_RAKU_NUMBER);
				if (sc.ch == 'v')			// forward past 'v'
					sc.Forward();
				if (wordLast == "use") {	// package version number
					numState = RAKUNUM_VERSION;
				} else if (sc.ch == '0') {	// other type of number
					switch (sc.chNext) {
						case 'b':	// binary (base 2)
							numState = RAKUNUM_BINARY;
							break;
						case 'o':	// octal (base 8)
							numState = RAKUNUM_OCTAL;
							break;
						case 'x':	// hexadecimal (base 16)
							numState = RAKUNUM_HEX;
					}
					if (numState != RAKUNUM_DECIMAL)
						sc.Forward();		// forward to number type char
				}
			}

			// --- Keywords / functions / types / barewords -------------------
			else if ((sc.currentPos == 0 || sc.atLineStart || IsValidIdentPrecede(sc.chPrev))
					&& IsWordStartChar(sc.ch)) {
				len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s));
				if (keywords.InList(s)) {
					sc.SetState(SCE_RAKU_WORD);		// Keywords
				} else if(functions.InList(s)) {
					sc.SetState(SCE_RAKU_FUNCTION);	// Functions
				} else if(typesBasic.InList(s)) {
					sc.SetState(SCE_RAKU_TYPEDEF);	// Types (basic)
				} else if(typesComposite.InList(s)) {
					sc.SetState(SCE_RAKU_TYPEDEF);	// Types (composite)
				} else if(typesDomainSpecific.InList(s)) {
					sc.SetState(SCE_RAKU_TYPEDEF);	// Types (domain-specific)
				} else if(typesExceptions.InList(s)) {
					sc.SetState(SCE_RAKU_TYPEDEF);	// Types (exceptions)
				} else {
					if (wordLast == "class")
						sc.SetState(SCE_RAKU_CLASS);	// a Class ident
					else if (wordLast == "grammar")
						sc.SetState(SCE_RAKU_GRAMMAR);	// a Grammar ident
					else
						sc.SetState(SCE_RAKU_IDENTIFIER);	// Bareword
					identLast = s;						// save identifier
				}
				if (adverbLast == "sym") {				// special adverb ":sym"
					sc.SetState(SCE_RAKU_IDENTIFIER);	// treat as identifier
					identLast = s;						// save identifier
				}
				if (sc.state != SCE_RAKU_IDENTIFIER)
					wordLast = s;					// save word
				sc.Forward(len - 1);				// ...forward past word
			}

			// --- Adverbs ----------------------------------------------------
			else if (sc.ch == ':' && IsWordStartChar(sc.chNext)) {
				len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s), 1);
				if (adverbs.InList(s)) {
					sc.SetState(SCE_RAKU_ADVERB);	// Adverbs (begin with ':')
					adverbLast = s;					// save word
					sc.Forward(len); // ...forward past word (less offset: 1)
				}
			}

			// --- Identifiers: $mu / @positional / %associative / &callable --
			//     see: https://docs.raku.org/language/variables
			else if (setSigil.Contains(sc.ch) && (setTwigil.Contains(sc.chNext)
					|| setSpecialVar.Contains(sc.chNext)
					|| IsWordStartChar(sc.chNext))) {

				// State based on sigil
				switch (sc.ch) {
					case '$': sc.SetState(SCE_RAKU_MU);
						break;
					case '@': sc.SetState(SCE_RAKU_POSITIONAL);
						break;
					case '%': sc.SetState(SCE_RAKU_ASSOCIATIVE);
						break;
					case '&': sc.SetState(SCE_RAKU_CALLABLE);
				}
				const int state = sc.state;
				sc.Forward();
				char ch_delim = 0;
				if (setSpecialVar.Contains(sc.ch)
						&& !setWord.Contains(sc.chNext)) {	// Process Special Var
					ch_delim = -1;
				} else if (setTwigil.Contains(sc.ch)) {		// Process Twigil
					sc.SetState(SCE_RAKU_OPERATOR);
					if (sc.ch == '<' && setWord.Contains(sc.chNext))
						ch_delim = '>';
					sc.Forward();
					sc.SetState(state);
				}

				// Process (any) identifier
				if (ch_delim >= 0) {
					sc.Forward(LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)) - 1);
					if (ch_delim > 0 && sc.chNext == ch_delim) {
						sc.Forward();
						sc.SetState(SCE_RAKU_OPERATOR);
					}
					identLast = s;	// save identifier
				}
			}

			// --- Operators --------------------------------------------------
			else if (IsOperatorChar(sc.ch)) {
				// FIXME: better valid operator sequences needed?
				sc.SetState(SCE_RAKU_OPERATOR);
			}

			// --- Heredoc: begin ---------------------------------------------
			else if (!hereDelim.empty() && sc.atLineEnd) {
				if (IsANewLine(sc.ch))
					sc.Forward(); // skip a possible CRLF situation
				sc.SetState(hereState);
			}

			// Reset words: on operator semi-colon OR '}' (end of statement)
			if (sc.state == SCE_RAKU_OPERATOR && (sc.ch == ';' || sc.ch == '}')) {
				wordLast.clear();
				identLast.clear();
				adverbLast.clear();
			}
		}

		/* *** Determine if an "embedded comment" is to be entered ********** *
		 * This type of embedded comment section, or multi-line comment comes
		 * after a normal comment has begun... e.g: #`[ ... ]
		 * ****************************************************************** */
		else if (sc.state == SCE_RAKU_COMMENTLINE && sc.chPrev == '#' && sc.ch == '`') {
			if (IsBracketOpenChar(sc.chNext)) {
				sc.Forward(); // Condition met for "embedded comment"
				dpEmbeded.opener = sc.ch;

				// Find the opposite (termination) closing bracket (if any)
				dpEmbeded.closer[0] = GetBracketCloseChar(dpEmbeded.opener);
				if (dpEmbeded.closer[0] > 0) { // Enter "embedded comment"

					// Find multiple opening character occurrence
					dpEmbeded.count = GetRepeatCharCount(sc, dpEmbeded.opener, lengthToEnd);
					sc.SetState(SCE_RAKU_COMMENTEMBED);
					sc.Forward(dpEmbeded.count - 1); // incremented in the next loop
				}
			}
		}
	}

	// And we're done...
	sc.Complete();
}

/*
 * LexerRaku::Lex
 * - Main fold method
 *   NOTE: although Raku uses and supports UNICODE characters, we're only looking
 *         at normal chars here, using 'SafeGetCharAt' - for folding purposes
 *         that is all we need.
 */
#define RAKU_HEADFOLD_SHIFT	4
#define RAKU_HEADFOLD_MASK	0xF0
void SCI_METHOD LexerRaku::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {

	// init LexAccessor / return if fold option is off
	if (!options.fold) return;
	LexAccessor styler(pAccess);

	// init char and line positions
	const Sci_PositionU endPos = startPos + length;
	Sci_Position lineCurrent = styler.GetLine(startPos);

	// Backtrack to last SCE_RAKU_DEFAULT line
	if (startPos > 0 && lineCurrent > 0) {
		while (lineCurrent > 0 && styler.StyleAt(startPos) != SCE_RAKU_DEFAULT) {
			lineCurrent--;
			startPos = styler.LineStart(lineCurrent);
		}
		lineCurrent = styler.GetLine(startPos);
	}
	Sci_PositionU lineStart = startPos;
	Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1);

	// init line folding level
	int levelPrev = SC_FOLDLEVELBASE;
	if (lineCurrent > 0)
		levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
	int levelCurrent = levelPrev;

	// init char and style variables
	char chNext = styler[startPos];
	int stylePrev = styler.StyleAt(startPos - 1);
	int styleNext = styler.StyleAt(startPos);
	int styleNextStartLine = styler.StyleAt(lineStartNext);
	int visibleChars = 0;
	bool wasCommentMulti = false;

	// main loop
	for (Sci_PositionU i = startPos; i < endPos; i++) {

		// next char, style and flags
		const char ch = chNext;
		chNext = styler.SafeGetCharAt(i + 1);
		const int style = styleNext;
		styleNext = styler.StyleAt(i + 1);
		const bool atEOL = i == (lineStartNext - 1);
		const bool atLineStart = i == lineStart;

		// --- Comments / Multi-line / POD ------------------------------------
		if (options.foldComment) {

			// Multi-line
			if (options.foldCommentMultiline) {
				if (style == SCE_RAKU_COMMENTLINE && atLineStart && ch == '#' && chNext == '`'
						&& styleNextStartLine == SCE_RAKU_COMMENTEMBED) {
					levelCurrent++;
					wasCommentMulti = true; // don't confuse line comments
				} else if (style == SCE_RAKU_COMMENTEMBED && atLineStart
						&& styleNextStartLine != SCE_RAKU_COMMENTEMBED) {
					levelCurrent--;
				}
			}

			// Line comments
			if (!wasCommentMulti && atEOL && stylePrev == SCE_RAKU_COMMENTLINE
					&& IsCommentLine(lineCurrent, styler)) {
				if (!IsCommentLine(lineCurrent - 1, styler)
						&& IsCommentLine(lineCurrent + 1, styler))
					levelCurrent++;
				else if (IsCommentLine(lineCurrent - 1, styler)
						&& !IsCommentLine(lineCurrent + 1, styler))
					levelCurrent--;
			}

			// POD
			if (options.foldCommentPOD && atLineStart && style == SCE_RAKU_POD) {
				if (styler.Match(i, "=begin"))
					levelCurrent++;
				else if (styler.Match(i, "=end"))
					levelCurrent--;
			}
		}

		// --- Code block -----------------------------------------------------
		if (style == SCE_RAKU_OPERATOR) {
			if (ch == '{') {
				if (levelCurrent < levelPrev) levelPrev--;
				levelCurrent++;
			} else if (ch == '}') {
				levelCurrent--;
			}
		}

		// --- at end of line / range / apply fold ----------------------------
		if (atEOL) {
			int level = levelPrev;

			// set level flags
			level |= levelCurrent << 16;
			if (visibleChars == 0 && options.foldCompact)
				level |= SC_FOLDLEVELWHITEFLAG;
			if ((levelCurrent > levelPrev) && (visibleChars > 0))
				level |= SC_FOLDLEVELHEADERFLAG;
			if (level != styler.LevelAt(lineCurrent)) {
				styler.SetLevel(lineCurrent, level);
			}
			lineCurrent++;
			lineStart = lineStartNext;
			lineStartNext = styler.LineStart(lineCurrent + 1);
			styleNextStartLine = styler.StyleAt(lineStartNext);
			levelPrev = levelCurrent;
			visibleChars = 0;
			wasCommentMulti = false;
		}

		// increment visibleChars / set previous char
		if (!isspacechar(ch))
			visibleChars++;
		stylePrev = style;
	}

	// Done: set real level of the next line
	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
}

/*----------------------------------------------------------------------------*
 * --- Scintilla: LexerModule ---
 *----------------------------------------------------------------------------*/

extern const LexerModule lmRaku(SCLEX_RAKU, LexerRaku::LexerFactoryRaku, "raku", rakuWordLists);