You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
notepad-plus-plus/lexilla/lexers/LexJulia.cxx

1263 lines
41 KiB

// Scintilla source code edit control
// Encoding: UTF-8
/** @file LexJulia.cxx
** Lexer for Julia.
** Reusing code from LexMatlab, LexPython and LexRust
**
** Written by Bertrand Lacoste
**
**/
// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
#include <cstdlib>
#include <cassert>
#include <cstring>
#include <string>
#include <string_view>
#include <vector>
#include <map>
#include <algorithm>
#include <functional>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "StringCopy.h"
#include "PropSetSimple.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "CharacterCategory.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "DefaultLexer.h"
using namespace Scintilla;
using namespace Lexilla;
static const int MAX_JULIA_IDENT_CHARS = 1023;
// Options used for LexerJulia
struct OptionsJulia {
bool fold;
bool foldComment;
bool foldCompact;
bool foldDocstring;
bool foldSyntaxBased;
bool highlightTypeannotation;
bool highlightLexerror;
OptionsJulia() {
fold = true;
foldComment = true;
foldCompact = false;
foldDocstring = true;
foldSyntaxBased = true;
highlightTypeannotation = false;
highlightLexerror = false;
}
};
const char * const juliaWordLists[] = {
"Primary keywords and identifiers",
"Built in types",
"Other keywords",
"Built in functions",
0,
};
struct OptionSetJulia : public OptionSet<OptionsJulia> {
OptionSetJulia() {
DefineProperty("fold", &OptionsJulia::fold);
DefineProperty("fold.compact", &OptionsJulia::foldCompact);
DefineProperty("fold.comment", &OptionsJulia::foldComment);
DefineProperty("fold.julia.docstring", &OptionsJulia::foldDocstring,
"Fold multiline triple-doublequote strings, usually used to document a function or type above the definition.");
DefineProperty("fold.julia.syntax.based", &OptionsJulia::foldSyntaxBased,
"Set this property to 0 to disable syntax based folding.");
DefineProperty("lexer.julia.highlight.typeannotation", &OptionsJulia::highlightTypeannotation,
"This option enables highlighting of the type identifier after `::`.");
DefineProperty("lexer.julia.highlight.lexerror", &OptionsJulia::highlightLexerror,
"This option enables highlighting of syntax error int character or number definition.");
DefineWordListSets(juliaWordLists);
}
};
LexicalClass juliaLexicalClasses[] = {
// Lexer Julia SCLEX_JULIA SCE_JULIA_:
0, "SCE_JULIA_DEFAULT", "default", "White space",
1, "SCE_JULIA_COMMENT", "comment", "Comment",
2, "SCE_JULIA_NUMBER", "literal numeric", "Number",
3, "SCE_JULIA_KEYWORD1", "keyword", "Reserved keywords",
4, "SCE_JULIA_KEYWORD2", "identifier", "Builtin type names",
5, "SCE_JULIA_KEYWORD3", "identifier", "Constants",
6, "SCE_JULIA_CHAR", "literal string character", "Single quoted string",
7, "SCE_JULIA_OPERATOR", "operator", "Operator",
8, "SCE_JULIA_BRACKET", "bracket operator", "Bracket operator",
9, "SCE_JULIA_IDENTIFIER", "identifier", "Identifier",
10, "SCE_JULIA_STRING", "literal string", "Double quoted String",
11, "SCE_JULIA_SYMBOL", "literal string symbol", "Symbol",
12, "SCE_JULIA_MACRO", "macro preprocessor", "Macro",
13, "SCE_JULIA_STRINGINTERP", "literal string interpolated", "String interpolation",
14, "SCE_JULIA_DOCSTRING", "literal string documentation", "Docstring",
15, "SCE_JULIA_STRINGLITERAL", "literal string", "String literal prefix",
16, "SCE_JULIA_COMMAND", "literal string command", "Command",
17, "SCE_JULIA_COMMANDLITERAL", "literal string command", "Command literal prefix",
18, "SCE_JULIA_TYPEANNOT", "identifier type", "Type annotation identifier",
19, "SCE_JULIA_LEXERROR", "lexer error", "Lexing error",
20, "SCE_JULIA_KEYWORD4", "identifier", "Builtin function names",
21, "SCE_JULIA_TYPEOPERATOR", "operator type", "Type annotation operator",
};
class LexerJulia : public DefaultLexer {
WordList keywords;
WordList identifiers2;
WordList identifiers3;
WordList identifiers4;
OptionsJulia options;
OptionSetJulia osJulia;
public:
explicit LexerJulia() :
DefaultLexer("julia", SCLEX_JULIA, juliaLexicalClasses, ELEMENTS(juliaLexicalClasses)) {
}
virtual ~LexerJulia() {
}
void SCI_METHOD Release() override {
delete this;
}
int SCI_METHOD Version() const override {
return lvRelease5;
}
const char * SCI_METHOD PropertyNames() override {
return osJulia.PropertyNames();
}
int SCI_METHOD PropertyType(const char *name) override {
return osJulia.PropertyType(name);
}
const char * SCI_METHOD DescribeProperty(const char *name) override {
return osJulia.DescribeProperty(name);
}
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
const char * SCI_METHOD PropertyGet(const char *key) override {
return osJulia.PropertyGet(key);
}
const char * SCI_METHOD DescribeWordListSets() override {
return osJulia.DescribeWordListSets();
}
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void * SCI_METHOD PrivateCall(int, void *) override {
return 0;
}
static ILexer5 *LexerFactoryJulia() {
return new LexerJulia();
}
};
Sci_Position SCI_METHOD LexerJulia::PropertySet(const char *key, const char *val) {
if (osJulia.PropertySet(&options, key, val)) {
return 0;
}
return -1;
}
Sci_Position SCI_METHOD LexerJulia::WordListSet(int n, const char *wl) {
WordList *wordListN = nullptr;
switch (n) {
case 0:
wordListN = &keywords;
break;
case 1:
wordListN = &identifiers2;
break;
case 2:
wordListN = &identifiers3;
break;
case 3:
wordListN = &identifiers4;
break;
}
Sci_Position firstModification = -1;
if (wordListN) {
Update to scintilla 5.5.0 & Lexilla 5.3.2 Sintilla Release 5.5.0 (https://www.scintilla.org/scintilla550.zip) Released 23 April 2024. Add elements for inactive additional selections SC_ELEMENT_SELECTION_INACTIVE_ADDITIONAL_TEXT and SC_ELEMENT_SELECTION_INACTIVE_ADDITIONAL_BACK. When not set these default to SC_ELEMENT_SELECTION_INACTIVE_TEXT and SC_ELEMENT_SELECTION_INACTIVE_BACK. Bug #2417. On Cocoa, avoid use of NSUserDefaults which will soon require justification when used in applications on the App Store. Fix Win32 IME crash in windowed mode. Bug #2433. Scale reverse arrow cursor for margins to match other cursors when user changes pointer size. Bug #2321. Lexilla Release 5.3.2 (https://www.scintilla.org/lexilla532.zip) Released 23 April 2024. COBOL: Stop string literal continuing over line end. Issue #229. COBOL: Stop doc comment assigning different styles to \r and \n at line end. Issue #229. COBOL: Recognize keywords that start with 'V'. Issue #230. COBOL: Recognize comments after tag or that start with '/'. Issue #231. HTML: Implement substyles for tags, attributes, and identifiers SCE_H_TAG, SCE_H_ATTRIBUTE, SCE_HJ_WORD, SCE_HJA_WORD, SCE_HB_WORD, SCE_HP_WORD, SCE_HPHP_WORD. HTML: Implement context-sensitive attributes. "tag.attribute" matches "attribute" only inside "tag". HTML: Match standard handling of comments. Issue #232. Lua: Implement substyles for identifiers SCE_LUA_IDENTIFIER. Ruby: Allow non-ASCII here-doc delimiters. Issue #234. Ruby: Allow modifier if, unless, while and until after heredoc delimiter. Issue #236. Rust: Recognize raw identifiers. Issue #239, Pull request #240. Close #15042
7 months ago
if (wordListN->Set(wl)) {
firstModification = 0;
}
}
return firstModification;
}
static inline bool IsJuliaOperator(int ch) {
if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
ch == '-' || ch == '+' || ch == '=' || ch == '|' ||
ch == '<' || ch == '>' || ch == '/' || ch == '~' ||
ch == '\\' ) {
return true;
}
return false;
}
// The list contains non-ascii unary operators
static inline bool IsJuliaUnaryOperator (int ch) {
if (ch == 0x00ac || ch == 0x221a || ch == 0x221b ||
ch == 0x221c || ch == 0x22c6 || ch == 0x00b1 ||
ch == 0x2213 ) {
return true;
}
return false;
}
static inline bool IsJuliaParen (int ch) {
if (ch == '(' || ch == ')' || ch == '{' || ch == '}' ||
ch == '[' || ch == ']' ) {
return true;
}
return false;
}
// Unicode parsing from Julia source code:
// https://github.com/JuliaLang/julia/blob/master/src/flisp/julia_extensions.c
// keep the same function name to be easy to find again
static int is_wc_cat_id_start(uint32_t wc) {
const CharacterCategory cat = CategoriseCharacter((int) wc);
return (cat == ccLu || cat == ccLl ||
cat == ccLt || cat == ccLm ||
cat == ccLo || cat == ccNl ||
cat == ccSc || // allow currency symbols
// other symbols, but not arrows or replacement characters
(cat == ccSo && !(wc >= 0x2190 && wc <= 0x21FF) &&
wc != 0xfffc && wc != 0xfffd &&
wc != 0x233f && // notslash
wc != 0x00a6) || // broken bar
// math symbol (category Sm) whitelist
(wc >= 0x2140 && wc <= 0x2a1c &&
((wc >= 0x2140 && wc <= 0x2144) || // ⅀, ⅁, ⅂, ⅃, ⅄
wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿
wc == 0x22a4 || wc == 0x22a5 || //
(wc >= 0x2200 && wc <= 0x2233 &&
(wc == 0x2202 || wc == 0x2205 || wc == 0x2206 || // ∂, ∅, ∆
wc == 0x2207 || wc == 0x220e || wc == 0x220f || // ∇, ∎, ∏
wc == 0x2200 || wc == 0x2203 || wc == 0x2204 || // ∀, ∃, ∄
wc == 0x2210 || wc == 0x2211 || // ∐, ∑
wc == 0x221e || wc == 0x221f || // ∞, ∟
wc >= 0x222b)) || // ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳
(wc >= 0x22c0 && wc <= 0x22c3) || // N-ary big ops: ⋀, , ⋂,
(wc >= 0x25F8 && wc <= 0x25ff) || // ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿
(wc >= 0x266f &&
(wc == 0x266f || wc == 0x27d8 || wc == 0x27d9 || // ♯, ⟘,
(wc >= 0x27c0 && wc <= 0x27c1) || // ⟀, ⟁
(wc >= 0x29b0 && wc <= 0x29b4) || // ⦰, ⦱, ⦲, ⦳, ⦴
(wc >= 0x2a00 && wc <= 0x2a06) || // ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆
(wc >= 0x2a09 && wc <= 0x2a16) || // ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒, ⨓, ⨔, ⨕, ⨖
wc == 0x2a1b || wc == 0x2a1c)))) || // ⨛, ⨜
(wc >= 0x1d6c1 && // variants of \nabla and \partial
(wc == 0x1d6c1 || wc == 0x1d6db ||
wc == 0x1d6fb || wc == 0x1d715 ||
wc == 0x1d735 || wc == 0x1d74f ||
wc == 0x1d76f || wc == 0x1d789 ||
wc == 0x1d7a9 || wc == 0x1d7c3)) ||
// super- and subscript +-=()
(wc >= 0x207a && wc <= 0x207e) ||
(wc >= 0x208a && wc <= 0x208e) ||
// angle symbols
(wc >= 0x2220 && wc <= 0x2222) || // ∠, ∡, ∢
(wc >= 0x299b && wc <= 0x29af) || // ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥, ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯
// Other_ID_Start
wc == 0x2118 || wc == 0x212E || // ℘,
(wc >= 0x309B && wc <= 0x309C) || // katakana-hiragana sound marks
// bold-digits and double-struck digits
(wc >= 0x1D7CE && wc <= 0x1D7E1)); // 𝟎 through 𝟗 (inclusive), 𝟘 through 𝟡 (inclusive)
}
static inline bool IsIdentifierFirstCharacter (int ch) {
if (IsASCII(ch)) {
return (bool) (isalpha(ch) || ch == '_');
}
if (ch < 0xA1 || ch > 0x10ffff) {
return false;
}
return is_wc_cat_id_start((uint32_t) ch);
}
static inline bool IsIdentifierCharacter (int ch) {
if (IsASCII(ch)) {
return (bool) (isalnum(ch) || ch == '_' || ch == '!');
}
if (ch < 0xA1 || ch > 0x10ffff) {
return false;
}
if (is_wc_cat_id_start((uint32_t) ch)) {
return true;
}
const CharacterCategory cat = CategoriseCharacter(ch);
if (cat == ccMn || cat == ccMc ||
cat == ccNd || cat == ccPc ||
cat == ccSk || cat == ccMe ||
cat == ccNo ||
// primes (single, double, triple, their reverses, and quadruple)
(ch >= 0x2032 && ch <= 0x2037) || (ch == 0x2057)) {
return true;
}
return false;
}
// keep the same function name to be easy to find again
static const uint32_t opsuffs[] = {
0x00b2, // ²
0x00b3, // ³
0x00b9, // ¹
0x02b0, // ʰ
0x02b2, // ʲ
0x02b3, // ʳ
0x02b7, // ʷ
0x02b8, // ʸ
0x02e1, // ˡ
0x02e2, // ˢ
0x02e3, // ˣ
0x1d2c, // ᴬ
0x1d2e, // ᴮ
0x1d30, // ᴰ
0x1d31, // ᴱ
0x1d33, // ᴳ
0x1d34, // ᴴ
0x1d35, // ᴵ
0x1d36, // ᴶ
0x1d37, // ᴷ
0x1d38, // ᴸ
0x1d39, // ᴹ
0x1d3a, // ᴺ
0x1d3c, // ᴼ
0x1d3e, // ᴾ
0x1d3f, // ᴿ
0x1d40, // ᵀ
0x1d41, // ᵁ
0x1d42, // ᵂ
0x1d43, // ᵃ
0x1d47, // ᵇ
0x1d48, // ᵈ
0x1d49, // ᵉ
0x1d4d, // ᵍ
0x1d4f, // ᵏ
0x1d50, // ᵐ
0x1d52, // ᵒ
0x1d56, // ᵖ
0x1d57, // ᵗ
0x1d58, // ᵘ
0x1d5b, // ᵛ
0x1d5d, // ᵝ
0x1d5e, // ᵞ
0x1d5f, // ᵟ
0x1d60, // ᵠ
0x1d61, // ᵡ
0x1d62, // ᵢ
0x1d63, // ᵣ
0x1d64, // ᵤ
0x1d65, // ᵥ
0x1d66, // ᵦ
0x1d67, // ᵧ
0x1d68, // ᵨ
0x1d69, // ᵩ
0x1d6a, // ᵪ
0x1d9c, // ᶜ
0x1da0, // ᶠ
0x1da5, // ᶥ
0x1da6, // ᶦ
0x1dab, // ᶫ
0x1db0, // ᶰ
0x1db8, // ᶸ
0x1dbb, // ᶻ
0x1dbf, // ᶿ
0x2032, //
0x2033, // ″
0x2034, // ‴
0x2035, //
0x2036, // ‶
0x2037, // ‷
0x2057, // ⁗
0x2070, // ⁰
0x2071, // ⁱ
0x2074, // ⁴
0x2075, // ⁵
0x2076, // ⁶
0x2077, // ⁷
0x2078, // ⁸
0x2079, // ⁹
0x207a, // ⁺
0x207b, // ⁻
0x207c, // ⁼
0x207d, // ⁽
0x207e, // ⁾
0x207f, // ⁿ
0x2080, // ₀
0x2081, // ₁
0x2082, // ₂
0x2083, // ₃
0x2084, // ₄
0x2085, // ₅
0x2086, // ₆
0x2087, // ₇
0x2088, // ₈
0x2089, // ₉
0x208a, // ₊
0x208b, // ₋
0x208c, // ₌
0x208d, // ₍
0x208e, // ₎
0x2090, // ₐ
0x2091, // ₑ
0x2092, // ₒ
0x2093, // ₓ
0x2095, // ₕ
0x2096, // ₖ
0x2097, // ₗ
0x2098, // ₘ
0x2099, // ₙ
0x209a, // ₚ
0x209b, // ₛ
0x209c, // ₜ
0x2c7c, // ⱼ
0x2c7d, // ⱽ
0xa71b, // ꜛ
0xa71c, // ꜜ
0xa71d // ꜝ
};
static const size_t opsuffs_len = sizeof(opsuffs) / (sizeof(uint32_t));
// keep the same function name to be easy to find again
static bool jl_op_suffix_char(uint32_t wc) {
if (wc < 0xA1 || wc > 0x10ffff) {
return false;
}
const CharacterCategory cat = CategoriseCharacter((int) wc);
if (cat == ccMn || cat == ccMc ||
cat == ccMe) {
return true;
}
for (size_t i = 0; i < opsuffs_len; ++i) {
if (wc == opsuffs[i]) {
return true;
}
}
return false;
}
// keep the same function name to be easy to find again
static bool never_id_char(uint32_t wc) {
const CharacterCategory cat = CategoriseCharacter((int) wc);
return (
// spaces and control characters:
(cat >= ccZs && cat <= ccCs) ||
// ASCII and Latin1 non-connector punctuation
(wc < 0xff &&
cat >= ccPd && cat <= ccPo) ||
wc == '`' ||
// mathematical brackets
(wc >= 0x27e6 && wc <= 0x27ef) ||
// angle, corner, and lenticular brackets
(wc >= 0x3008 && wc <= 0x3011) ||
// tortoise shell, square, and more lenticular brackets
(wc >= 0x3014 && wc <= 0x301b) ||
// fullwidth parens
(wc == 0xff08 || wc == 0xff09) ||
// fullwidth square brackets
(wc == 0xff3b || wc == 0xff3d));
}
static bool IsOperatorFirstCharacter (int ch) {
if (IsASCII(ch)) {
if (IsJuliaOperator(ch) ||
ch == '!' || ch == '?' ||
ch == ':' || ch == ';' ||
ch == ',' || ch == '.' ) {
return true;
}else {
return false;
}
} else if (is_wc_cat_id_start((uint32_t) ch)) {
return false;
} else if (IsJuliaUnaryOperator(ch) ||
! never_id_char((uint32_t) ch)) {
return true;
}
return false;
}
static bool IsOperatorCharacter (int ch) {
if (IsOperatorFirstCharacter(ch) ||
(!IsASCII(ch) && jl_op_suffix_char((uint32_t) ch)) ) {
return true;
}
return false;
}
static bool CheckBoundsIndexing(char *str) {
if (strcmp("begin", str) == 0 || strcmp("end", str) == 0 ) {
return true;
}
return false;
}
static int CheckKeywordFoldPoint(char *str) {
if (strcmp ("if", str) == 0 ||
strcmp ("for", str) == 0 ||
strcmp ("while", str) == 0 ||
strcmp ("try", str) == 0 ||
strcmp ("do", str) == 0 ||
strcmp ("begin", str) == 0 ||
strcmp ("let", str) == 0 ||
strcmp ("baremodule", str) == 0 ||
strcmp ("quote", str) == 0 ||
strcmp ("module", str) == 0 ||
strcmp ("struct", str) == 0 ||
strcmp ("type", str) == 0 ||
strcmp ("macro", str) == 0 ||
strcmp ("function", str) == 0) {
return 1;
}
if (strcmp("end", str) == 0) {
return -1;
}
return 0;
}
static bool IsNumberExpon(int ch, int base) {
if ((base == 10 && (ch == 'e' || ch == 'E' || ch == 'f')) ||
(base == 16 && (ch == 'p' || ch == 'P'))) {
return true;
}
return false;
}
/* Scans a sequence of digits, returning true if it found any. */
static bool ScanDigits(StyleContext& sc, int base, bool allow_sep) {
bool found = false;
for (;;) {
if (IsADigit(sc.chNext, base) || (allow_sep && sc.chNext == '_')) {
found = true;
sc.Forward();
} else {
break;
}
}
return found;
}
static inline bool ScanNHexas(StyleContext &sc, int max) {
int n = 0;
bool error = false;
sc.Forward();
if (!IsADigit(sc.ch, 16)) {
error = true;
} else {
while (IsADigit(sc.ch, 16) && n < max) {
sc.Forward();
n++;
}
}
return error;
}
static void resumeCharacter(StyleContext &sc, bool lexerror) {
bool error = false;
// ''' case
if (sc.chPrev == '\'' && sc.ch == '\'' && sc.chNext == '\'') {
sc.Forward();
sc.ForwardSetState(SCE_JULIA_DEFAULT);
return;
} else if (lexerror && sc.chPrev == '\'' && sc.ch == '\'') {
sc.ChangeState(SCE_JULIA_LEXERROR);
sc.ForwardSetState(SCE_JULIA_DEFAULT);
// Escape characters
} else if (sc.ch == '\\') {
sc.Forward();
if (sc.ch == '\'' || sc.ch == '\\' ) {
sc.Forward();
} else if (sc.ch == 'n' || sc.ch == 't' || sc.ch == 'a' ||
sc.ch == 'b' || sc.ch == 'e' || sc.ch == 'f' ||
sc.ch == 'r' || sc.ch == 'v' ) {
sc.Forward();
} else if (sc.ch == 'x') {
error |= ScanNHexas(sc, 2);
} else if (sc.ch == 'u') {
error |= ScanNHexas(sc, 4);
} else if (sc.ch == 'U') {
error |= ScanNHexas(sc, 8);
} else if (IsADigit(sc.ch, 8)) {
int n = 1;
int max = 3;
sc.Forward();
while (IsADigit(sc.ch, 8) && n < max) {
sc.Forward();
n++;
}
}
if (lexerror) {
if (sc.ch != '\'') {
error = true;
while (sc.ch != '\'' &&
sc.ch != '\r' &&
sc.ch != '\n') {
sc.Forward();
}
}
if (error) {
sc.ChangeState(SCE_JULIA_LEXERROR);
sc.ForwardSetState(SCE_JULIA_DEFAULT);
}
}
} else if (lexerror) {
if (sc.ch < 0x20 || sc.ch > 0x10ffff) {
error = true;
} else {
// single character
sc.Forward();
if (sc.ch != '\'') {
error = true;
while (sc.ch != '\'' &&
sc.ch != '\r' &&
sc.ch != '\n') {
sc.Forward();
}
}
}
if (error) {
sc.ChangeState(SCE_JULIA_LEXERROR);
sc.ForwardSetState(SCE_JULIA_DEFAULT);
}
}
// closing quote
if (sc.ch == '\'') {
if (sc.chNext == '\'') {
sc.Forward();
} else {
sc.ForwardSetState(SCE_JULIA_DEFAULT);
}
}
}
static inline bool IsACharacter(StyleContext &sc) {
return (sc.chPrev == '\'' && sc.chNext == '\'');
}
static void ScanParenInterpolation(StyleContext &sc) {
// TODO: no syntax highlighting inside a string interpolation
// Level of nested parenthesis
int interp_level = 0;
// If true, it is inside a string and parenthesis are not counted.
bool allow_paren_string = false;
// check for end of states
for (; sc.More(); sc.Forward()) {
// TODO: check corner cases for nested string interpolation
// TODO: check corner cases with Command inside interpolation
if ( sc.ch == '\"' && sc.chPrev != '\\') {
// Toggle the string environment (parenthesis are not counted inside a string)
allow_paren_string = !allow_paren_string;
} else if ( !allow_paren_string ) {
if ( sc.ch == '(' && !IsACharacter(sc) ) {
interp_level ++;
} else if ( sc.ch == ')' && !IsACharacter(sc) && interp_level > 0 ) {
interp_level --;
if (interp_level == 0) {
// Exit interpolation
return;
}
}
}
}
}
/*
* Start parsing a number, parse the base.
*/
static void initNumber (StyleContext &sc, int &base, bool &with_dot) {
base = 10;
with_dot = false;
sc.SetState(SCE_JULIA_NUMBER);
if (sc.ch == '0') {
if (sc.chNext == 'x') {
sc.Forward();
base = 16;
if (sc.chNext == '.') {
sc.Forward();
with_dot = true;
}
} else if (sc.chNext == 'o') {
sc.Forward();
base = 8;
} else if (sc.chNext == 'b') {
sc.Forward();
base = 2;
}
} else if (sc.ch == '.') {
with_dot = true;
}
}
/*
* Resume parsing a String or Command, bounded by the `quote` character (\" or \`)
* The `triple` argument specifies if it is a triple-quote String or Command.
* Interpolation is detected (with `$`), and parsed if `allow_interp` is true.
*/
static void resumeStringLike(StyleContext &sc, int quote, bool triple, bool allow_interp, bool full_highlight) {
int stylePrev = sc.state;
bool checkcurrent = false;
// Escape characters
if (sc.ch == '\\') {
if (sc.chNext == quote || sc.chNext == '\\' || sc.chNext == '$') {
sc.Forward();
}
} else if (allow_interp && sc.ch == '$') {
// If the interpolation is only of a variable, do not change state
if (sc.chNext == '(') {
if (full_highlight) {
sc.SetState(SCE_JULIA_STRINGINTERP);
} else {
sc.ForwardSetState(SCE_JULIA_STRINGINTERP);
}
ScanParenInterpolation(sc);
sc.ForwardSetState(stylePrev);
checkcurrent = true;
} else if (full_highlight && IsIdentifierFirstCharacter(sc.chNext)) {
sc.SetState(SCE_JULIA_STRINGINTERP);
sc.Forward();
sc.Forward();
for (; sc.More(); sc.Forward()) {
if (! IsIdentifierCharacter(sc.ch)) {
break;
}
}
sc.SetState(stylePrev);
checkcurrent = true;
}
if (checkcurrent) {
// Check that the current character is not a special char,
// otherwise it will be skipped
resumeStringLike(sc, quote, triple, allow_interp, full_highlight);
}
} else if (sc.ch == quote) {
if (triple) {
if (sc.chNext == quote && sc.GetRelativeCharacter(2) == quote) {
// Move to the end of the triple quotes
Sci_PositionU nextIndex = sc.currentPos + 2;
while (nextIndex > sc.currentPos && sc.More()) {
sc.Forward();
}
sc.ForwardSetState(SCE_JULIA_DEFAULT);
}
} else {
sc.ForwardSetState(SCE_JULIA_DEFAULT);
}
}
}
static void resumeCommand(StyleContext &sc, bool triple, bool allow_interp) {
return resumeStringLike(sc, '`', triple, allow_interp, true);
}
static void resumeString(StyleContext &sc, bool triple, bool allow_interp) {
return resumeStringLike(sc, '"', triple, allow_interp, true);
}
static void resumeNumber (StyleContext &sc, int base, bool &with_dot, bool lexerror) {
if (IsNumberExpon(sc.ch, base)) {
if (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-') {
sc.Forward();
// Capture all digits
ScanDigits(sc, 10, false);
sc.Forward();
}
sc.SetState(SCE_JULIA_DEFAULT);
} else if (sc.ch == '.' && sc.chNext == '.') {
// Interval operator `..`
sc.SetState(SCE_JULIA_OPERATOR);
sc.Forward();
sc.ForwardSetState(SCE_JULIA_DEFAULT);
} else if (sc.ch == '.' && !with_dot) {
with_dot = true;
ScanDigits(sc, base, true);
} else if (IsADigit(sc.ch, base) || sc.ch == '_') {
ScanDigits(sc, base, true);
} else if (IsADigit(sc.ch) && !IsADigit(sc.ch, base)) {
if (lexerror) {
sc.ChangeState(SCE_JULIA_LEXERROR);
}
ScanDigits(sc, 10, false);
sc.ForwardSetState(SCE_JULIA_DEFAULT);
} else {
sc.SetState(SCE_JULIA_DEFAULT);
}
}
static void resumeOperator (StyleContext &sc) {
if (sc.chNext == ':' && (sc.ch == ':' || sc.ch == '<' ||
(sc.ch == '>' && (sc.chPrev != '-' && sc.chPrev != '=')))) {
// Case `:a=>:b`
sc.Forward();
sc.ForwardSetState(SCE_JULIA_DEFAULT);
} else if (sc.ch == ':') {
// Case `foo(:baz,:baz)` or `:one+:two`
// Let the default case switch decide if it is a symbol
sc.SetState(SCE_JULIA_DEFAULT);
} else if (sc.ch == '\'') {
sc.SetState(SCE_JULIA_DEFAULT);
} else if ((sc.ch == '.' && sc.chPrev != '.') || IsIdentifierFirstCharacter(sc.ch) ||
(! (sc.chPrev == '.' && IsOperatorFirstCharacter(sc.ch)) &&
! IsOperatorCharacter(sc.ch)) ) {
sc.SetState(SCE_JULIA_DEFAULT);
}
}
void SCI_METHOD LexerJulia::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
PropSetSimple props;
Accessor styler(pAccess, &props);
Sci_Position pos = startPos;
styler.StartAt(pos);
styler.StartSegment(pos);
// use the line state of each line to store block/multiline states
Sci_Position curLine = styler.GetLine(startPos);
// Default is false for everything and 0 counters.
int lineState = (curLine > 0) ? styler.GetLineState(curLine-1) : 0;
bool transpose = (lineState >> 0) & 0x01; // 1 bit to know if ' is allowed to mean transpose
bool istripledocstring = (lineState >> 1) & 0x01; // 1 bit to know if we are in a triple doublequotes string
bool triple_backtick = (lineState >> 2) & 0x01; // 1 bit to know if we are in a triple backtick command
bool israwstring = (lineState >> 3) & 0x01; // 1 bit to know if we are in a raw string
int indexing_level = (int)((lineState >> 4) & 0x0F); // 4 bits of bracket nesting counter
int list_comprehension = (int)((lineState >> 8) & 0x0F); // 4 bits of parenthesis nesting counter
int commentDepth = (int)((lineState >> 12) & 0x0F); // 4 bits of nested comment counter
// base for parsing number
int base = 10;
// number has a float dot ?
bool with_dot = false;
StyleContext sc(startPos, length, initStyle, styler);
for (; sc.More(); sc.Forward()) {
//// check for end of states
switch (sc.state) {
case SCE_JULIA_BRACKET:
sc.SetState(SCE_JULIA_DEFAULT);
break;
case SCE_JULIA_OPERATOR:
resumeOperator(sc);
break;
case SCE_JULIA_TYPEOPERATOR:
sc.SetState(SCE_JULIA_DEFAULT);
break;
case SCE_JULIA_TYPEANNOT:
if (! IsIdentifierCharacter(sc.ch)) {
sc.SetState(SCE_JULIA_DEFAULT);
}
break;
case SCE_JULIA_IDENTIFIER:
// String literal
if (sc.ch == '\"') {
// If the string literal has a prefix, interpolation is disabled
israwstring = true;
sc.ChangeState(SCE_JULIA_STRINGLITERAL);
sc.SetState(SCE_JULIA_DEFAULT);
} else if (sc.ch == '`') {
// If the string literal has a prefix, interpolation is disabled
israwstring = true;
sc.ChangeState(SCE_JULIA_COMMANDLITERAL);
sc.SetState(SCE_JULIA_DEFAULT);
// Continue if the character is an identifier character
} else if (! IsIdentifierCharacter(sc.ch)) {
char s[MAX_JULIA_IDENT_CHARS + 1];
sc.GetCurrent(s, sizeof(s));
// Treat the keywords differently if we are indexing or not
if ( indexing_level > 0 && CheckBoundsIndexing(s)) {
// Inside [], (), `begin` and `end` are numbers not block keywords
sc.ChangeState(SCE_JULIA_NUMBER);
transpose = false;
} else {
if (keywords.InList(s)) {
sc.ChangeState(SCE_JULIA_KEYWORD1);
transpose = false;
} else if (identifiers2.InList(s)) {
sc.ChangeState(SCE_JULIA_KEYWORD2);
transpose = false;
} else if (identifiers3.InList(s)) {
sc.ChangeState(SCE_JULIA_KEYWORD3);
transpose = false;
} else if (identifiers4.InList(s)) {
sc.ChangeState(SCE_JULIA_KEYWORD4);
// These identifiers can be used for variable names also,
// so transpose is not forbidden.
//transpose = false;
}
}
sc.SetState(SCE_JULIA_DEFAULT);
// TODO: recognize begin-end blocks inside list comprehension
// b = [(begin n%2; n*2 end) for n in 1:10]
// TODO: recognize better comprehension for-if to avoid problem with code-folding
// c = [(if isempty(a); missing else first(b) end) for (a, b) in zip(l1, l2)]
}
break;
case SCE_JULIA_NUMBER:
resumeNumber(sc, base, with_dot, options.highlightLexerror);
break;
case SCE_JULIA_CHAR:
resumeCharacter(sc, options.highlightLexerror);
break;
case SCE_JULIA_DOCSTRING:
resumeString(sc, true, !israwstring);
if (sc.state == SCE_JULIA_DEFAULT && israwstring) {
israwstring = false;
}
break;
case SCE_JULIA_STRING:
resumeString(sc, false, !israwstring);
if (sc.state == SCE_JULIA_DEFAULT && israwstring) {
israwstring = false;
}
break;
case SCE_JULIA_COMMAND:
resumeCommand(sc, triple_backtick, !israwstring);
break;
case SCE_JULIA_MACRO:
if (IsASpace(sc.ch) || ! IsIdentifierCharacter(sc.ch)) {
sc.SetState(SCE_JULIA_DEFAULT);
}
break;
case SCE_JULIA_SYMBOL:
if (! IsIdentifierCharacter(sc.ch)) {
sc.SetState(SCE_JULIA_DEFAULT);
}
break;
case SCE_JULIA_COMMENT:
if( commentDepth > 0 ) {
// end or start of a nested a block comment
if ( sc.ch == '=' && sc.chNext == '#') {
commentDepth --;
sc.Forward();
if (commentDepth == 0) {
sc.ForwardSetState(SCE_JULIA_DEFAULT);
}
} else if( sc.ch == '#' && sc.chNext == '=') {
commentDepth ++;
sc.Forward();
}
} else {
// single line comment
if (sc.atLineEnd || sc.ch == '\r' || sc.ch == '\n') {
sc.SetState(SCE_JULIA_DEFAULT);
transpose = false;
}
}
break;
}
// check start of a new state
if (sc.state == SCE_JULIA_DEFAULT) {
if (sc.ch == '#') {
sc.SetState(SCE_JULIA_COMMENT);
// increment depth if we are a block comment
if(sc.chNext == '=') {
commentDepth ++;
sc.Forward();
}
} else if (sc.ch == '!') {
sc.SetState(SCE_JULIA_OPERATOR);
transpose = false;
} else if (sc.ch == '\'') {
if (transpose) {
sc.SetState(SCE_JULIA_OPERATOR);
} else {
sc.SetState(SCE_JULIA_CHAR);
}
} else if (sc.ch == '\"') {
istripledocstring = (sc.chNext == '\"' && sc.GetRelativeCharacter(2) == '\"');
if (istripledocstring) {
sc.SetState(SCE_JULIA_DOCSTRING);
// Move to the end of the triple quotes
Sci_PositionU nextIndex = sc.currentPos + 2;
while (nextIndex > sc.currentPos && sc.More()) {
sc.Forward();
}
} else {
sc.SetState(SCE_JULIA_STRING);
}
} else if (sc.ch == '`') {
triple_backtick = (sc.chNext == '`' && sc.GetRelativeCharacter(2) == '`');
sc.SetState(SCE_JULIA_COMMAND);
if (triple_backtick) {
// Move to the end of the triple backticks
Sci_PositionU nextIndex = sc.currentPos + 2;
while (nextIndex > sc.currentPos && sc.More()) {
sc.Forward();
}
}
} else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
initNumber(sc, base, with_dot);
} else if (IsIdentifierFirstCharacter(sc.ch)) {
sc.SetState(SCE_JULIA_IDENTIFIER);
transpose = true;
} else if (sc.ch == '@') {
sc.SetState(SCE_JULIA_MACRO);
transpose = false;
// Several parsing of operators, should keep the order of `if` blocks
} else if ((sc.ch == ':' || sc.ch == '<' || sc.ch == '>') && sc.chNext == ':') {
sc.SetState(SCE_JULIA_TYPEOPERATOR);
sc.Forward();
// Highlight the next identifier, if option is set
if (options.highlightTypeannotation &&
IsIdentifierFirstCharacter(sc.chNext)) {
sc.ForwardSetState(SCE_JULIA_TYPEANNOT);
}
} else if (sc.ch == ':') {
// TODO: improve detection of range
// should be solved with begin-end parsing
// `push!(arr, s1 :s2)` and `a[begin :end]
if (IsIdentifierFirstCharacter(sc.chNext) &&
! IsIdentifierCharacter(sc.chPrev) &&
sc.chPrev != ')' && sc.chPrev != ']' ) {
sc.SetState(SCE_JULIA_SYMBOL);
} else {
sc.SetState(SCE_JULIA_OPERATOR);
}
} else if (IsJuliaParen(sc.ch)) {
if (sc.ch == '[') {
list_comprehension ++;
indexing_level ++;
} else if (sc.ch == ']' && (indexing_level > 0)) {
list_comprehension --;
indexing_level --;
} else if (sc.ch == '(') {
list_comprehension ++;
} else if (sc.ch == ')' && (list_comprehension > 0)) {
list_comprehension --;
}
if (sc.ch == ')' || sc.ch == ']' || sc.ch == '}') {
transpose = true;
} else {
transpose = false;
}
sc.SetState(SCE_JULIA_BRACKET);
} else if (IsOperatorFirstCharacter(sc.ch)) {
transpose = false;
sc.SetState(SCE_JULIA_OPERATOR);
} else {
transpose = false;
}
}
// update the line information (used for line-by-line lexing and folding)
if (sc.atLineEnd) {
// set the line state to the current state
curLine = styler.GetLine(sc.currentPos);
lineState = ((transpose ? 1 : 0) << 0) |
((istripledocstring ? 1 : 0) << 1) |
((triple_backtick ? 1 : 0) << 2) |
((israwstring ? 1 : 0) << 3) |
((indexing_level & 0x0F) << 4) |
((list_comprehension & 0x0F) << 8) |
((commentDepth & 0x0F) << 12);
styler.SetLineState(curLine, lineState);
}
}
sc.Complete();
}
void SCI_METHOD LexerJulia::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
if (!options.fold)
return;
LexAccessor styler(pAccess);
Sci_PositionU endPos = startPos + length;
int visibleChars = 0;
Sci_Position lineCurrent = styler.GetLine(startPos);
int levelCurrent = SC_FOLDLEVELBASE;
int lineState = 0;
if (lineCurrent > 0) {
levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
lineState = styler.GetLineState(lineCurrent-1);
}
// level of nested brackets
int indexing_level = (int)((lineState >> 4) & 0x0F); // 4 bits of bracket nesting counter
// level of nested parenthesis or brackets
int list_comprehension = (int)((lineState >> 8) & 0x0F); // 4 bits of parenthesis nesting counter
//int commentDepth = (int)((lineState >> 12) & 0x0F); // 4 bits of nested comment counter
Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
int levelNext = levelCurrent;
char chNext = styler[startPos];
int stylePrev = styler.StyleAt(startPos - 1);
int styleNext = styler.StyleAt(startPos);
int style = initStyle;
char word[100];
int wordlen = 0;
for (Sci_PositionU i = startPos; i < endPos; i++) {
char ch = chNext;
chNext = styler.SafeGetCharAt(i + 1);
style = styleNext;
styleNext = styler.StyleAt(i + 1);
bool atEOL = i == (lineStartNext-1);
// a start/end of comment block
if (options.foldComment && style == SCE_JULIA_COMMENT) {
// start of block comment
if (ch == '#' && chNext == '=') {
levelNext ++;
}
// end of block comment
if (ch == '=' && chNext == '#' && levelNext > 0) {
levelNext --;
}
}
// Syntax based folding, accounts for list comprehension
if (options.foldSyntaxBased) {
// list comprehension allow `for`, `if` and `begin` without `end`
if (style == SCE_JULIA_BRACKET) {
if (ch == '[') {
list_comprehension ++;
indexing_level ++;
levelNext ++;
} else if (ch == ']') {
list_comprehension --;
indexing_level --;
levelNext --;
} else if (ch == '(') {
list_comprehension ++;
levelNext ++;
} else if (ch == ')') {
list_comprehension --;
levelNext --;
}
// check non-negative
if (indexing_level < 0) {
indexing_level = 0;
}
if (list_comprehension < 0) {
list_comprehension = 0;
}
}
// keyword
if (style == SCE_JULIA_KEYWORD1) {
word[wordlen++] = static_cast<char>(ch);
if (wordlen == 100) { // prevent overflow
word[0] = '\0';
wordlen = 1;
}
if (styleNext != SCE_JULIA_KEYWORD1) {
word[wordlen] = '\0';
wordlen = 0;
if (list_comprehension <= 0 && indexing_level <= 0) {
levelNext += CheckKeywordFoldPoint(word);
}
}
}
}
// Docstring
if (options.foldDocstring) {
if (stylePrev != SCE_JULIA_DOCSTRING && style == SCE_JULIA_DOCSTRING) {
levelNext ++;
} else if (style == SCE_JULIA_DOCSTRING && styleNext != SCE_JULIA_DOCSTRING) {
levelNext --;
}
}
// check non-negative level
if (levelNext < 0) {
levelNext = 0;
}
if (!IsASpace(ch)) {
visibleChars++;
}
stylePrev = style;
if (atEOL || (i == endPos-1)) {
int levelUse = levelCurrent;
int lev = levelUse | levelNext << 16;
if (visibleChars == 0 && options.foldCompact) {
lev |= SC_FOLDLEVELWHITEFLAG;
}
if (levelUse < levelNext) {
lev |= SC_FOLDLEVELHEADERFLAG;
}
if (lev != styler.LevelAt(lineCurrent)) {
styler.SetLevel(lineCurrent, lev);
}
lineCurrent++;
lineStartNext = styler.LineStart(lineCurrent+1);
levelCurrent = levelNext;
if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - 1))) {
// There is an empty line at end of file so give it same level and empty
styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
}
visibleChars = 0;
}
}
}
extern const LexerModule lmJulia(SCLEX_JULIA, LexerJulia::LexerFactoryJulia, "julia", juliaWordLists);