You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1901 lines
61 KiB
1901 lines
61 KiB
// Scintilla source code edit control |
|
/** @file LexPerl.cxx |
|
** Lexer for Perl. |
|
** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net> |
|
**/ |
|
// Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org> |
|
// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my> |
|
// The License.txt file describes the conditions under which this software may be distributed. |
|
|
|
#include <stdlib.h> |
|
#include <string.h> |
|
#include <stdio.h> |
|
#include <stdarg.h> |
|
#include <assert.h> |
|
#include <ctype.h> |
|
|
|
#include <string> |
|
#include <string_view> |
|
#include <map> |
|
#include <functional> |
|
|
|
#include "ILexer.h" |
|
#include "Scintilla.h" |
|
#include "SciLexer.h" |
|
|
|
#include "WordList.h" |
|
#include "LexAccessor.h" |
|
#include "StyleContext.h" |
|
#include "CharacterSet.h" |
|
#include "LexerModule.h" |
|
#include "OptionSet.h" |
|
#include "DefaultLexer.h" |
|
|
|
using namespace Scintilla; |
|
using namespace Lexilla; |
|
|
|
namespace { |
|
|
|
// Info for HERE document handling from perldata.pod (reformatted): |
|
// ---------------------------------------------------------------- |
|
// A line-oriented form of quoting is based on the shell ``here-doc'' syntax. |
|
// Following a << you specify a string to terminate the quoted material, and |
|
// all lines following the current line down to the terminating string are |
|
// the value of the item. |
|
// Prefixing the terminating string with a "~" specifies that you want to |
|
// use "Indented Here-docs" (see below). |
|
// * The terminating string may be either an identifier (a word), or some |
|
// quoted text. |
|
// * If quoted, the type of quotes you use determines the treatment of the |
|
// text, just as in regular quoting. |
|
// * An unquoted identifier works like double quotes. |
|
// * There must be no space between the << and the identifier. |
|
// (If you put a space it will be treated as a null identifier, |
|
// which is valid, and matches the first empty line.) |
|
// (This is deprecated, -w warns of this syntax) |
|
// * The terminating string must appear by itself (unquoted and |
|
// with no surrounding whitespace) on the terminating line. |
|
// |
|
// Indented Here-docs |
|
// ------------------ |
|
// The here-doc modifier "~" allows you to indent your here-docs to |
|
// make the code more readable. |
|
// The delimiter is used to determine the exact whitespace to remove |
|
// from the beginning of each line. All lines must have at least the |
|
// same starting whitespace (except lines only containing a newline) |
|
// or perl will croak. Tabs and spaces can be mixed, but are matched |
|
// exactly. One tab will not be equal to 8 spaces! |
|
// Additional beginning whitespace (beyond what preceded the |
|
// delimiter) will be preserved. |
|
|
|
#define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter |
|
|
|
#define PERLNUM_BINARY 1 // order is significant: 1-3 cannot have a dot |
|
#define PERLNUM_OCTAL 2 |
|
#define PERLNUM_FLOAT_EXP 3 // exponent part only |
|
#define PERLNUM_HEX 4 // may be a hex float |
|
#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings |
|
#define PERLNUM_VECTOR 6 |
|
#define PERLNUM_V_VECTOR 7 |
|
#define PERLNUM_BAD 8 |
|
|
|
#define BACK_NONE 0 // lookback state for bareword disambiguation: |
|
#define BACK_OPERATOR 1 // whitespace/comments are insignificant |
|
#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation |
|
|
|
#define SUB_BEGIN 0 // states for subroutine prototype scan: |
|
#define SUB_HAS_PROTO 1 // only 'prototype' attribute allows prototypes |
|
#define SUB_HAS_ATTRIB 2 // other attributes can exist leftward |
|
#define SUB_HAS_MODULE 3 // sub name can have a ::identifier part |
|
#define SUB_HAS_SUB 4 // 'sub' keyword |
|
|
|
// all interpolated styles are different from their parent styles by a constant difference |
|
// we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value |
|
#define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING) |
|
|
|
bool isPerlKeyword(Sci_PositionU start, Sci_PositionU end, WordList &keywords, LexAccessor &styler) { |
|
// old-style keyword matcher; needed because GetCurrent() needs |
|
// current segment to be committed, but we may abandon early... |
|
char s[100]; |
|
Sci_PositionU i, len = end - start; |
|
if (len > 30) { len = 30; } |
|
for (i = 0; i < len; i++, start++) s[i] = styler[start]; |
|
s[i] = '\0'; |
|
return keywords.InList(s); |
|
} |
|
|
|
int disambiguateBareword(LexAccessor &styler, Sci_PositionU bk, Sci_PositionU fw, |
|
int backFlag, Sci_PositionU backPos, Sci_PositionU endPos) { |
|
// identifiers are recognized by Perl as barewords under some |
|
// conditions, the following attempts to do the disambiguation |
|
// by looking backward and forward; result in 2 LSB |
|
int result = 0; |
|
bool moreback = false; // true if passed newline/comments |
|
bool brace = false; // true if opening brace found |
|
// if BACK_NONE, neither operator nor keyword, so skip test |
|
if (backFlag == BACK_NONE) |
|
return result; |
|
// first look backwards past whitespace/comments to set EOL flag |
|
// (some disambiguation patterns must be on a single line) |
|
if (backPos <= static_cast<Sci_PositionU>(styler.LineStart(styler.GetLine(bk)))) |
|
moreback = true; |
|
// look backwards at last significant lexed item for disambiguation |
|
bk = backPos - 1; |
|
int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); |
|
if (ch == '{' && !moreback) { |
|
// {bareword: possible variable spec |
|
brace = true; |
|
} else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&') |
|
// &bareword: subroutine call |
|
|| styler.Match(bk - 1, "->") |
|
// ->bareword: part of variable spec |
|
|| styler.Match(bk - 1, "::") |
|
// ::bareword: part of module spec |
|
|| styler.Match(bk - 2, "sub")) { |
|
// sub bareword: subroutine declaration |
|
// (implied BACK_KEYWORD, no keywords end in 'sub'!) |
|
result |= 1; |
|
} |
|
// next, scan forward after word past tab/spaces only; |
|
// if ch isn't one of '[{(,' we can skip the test |
|
if ((ch == '{' || ch == '(' || ch == '['|| ch == ',') |
|
&& fw < endPos) { |
|
while (IsASpaceOrTab(ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw))) |
|
&& fw < endPos) { |
|
fw++; |
|
} |
|
if ((ch == '}' && brace) |
|
// {bareword}: variable spec |
|
|| styler.Match(fw, "=>")) { |
|
// [{(, bareword=>: hash literal |
|
result |= 2; |
|
} |
|
} |
|
return result; |
|
} |
|
|
|
void skipWhitespaceComment(LexAccessor &styler, Sci_PositionU &p) { |
|
// when backtracking, we need to skip whitespace and comments |
|
while (p > 0) { |
|
const int style = styler.StyleAt(p); |
|
if (style != SCE_PL_DEFAULT && style != SCE_PL_COMMENTLINE) |
|
break; |
|
p--; |
|
} |
|
} |
|
|
|
int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) { |
|
// scan backward past whitespace and comments to find a lexeme |
|
skipWhitespaceComment(styler, bk); |
|
if (bk == 0) |
|
return 0; |
|
int sz = 1; |
|
style = styler.StyleAt(bk); |
|
while (bk > 0) { // find extent of lexeme |
|
if (styler.StyleAt(bk - 1) == style) { |
|
bk--; sz++; |
|
} else |
|
break; |
|
} |
|
return sz; |
|
} |
|
|
|
int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) { |
|
// backtrack to find open '{' corresponding to a '}', balanced |
|
// return significant style to be tested for '/' disambiguation |
|
int braceCount = 1; |
|
if (bk == 0) |
|
return SCE_PL_DEFAULT; |
|
while (--bk > 0) { |
|
if (styler.StyleAt(bk) == SCE_PL_OPERATOR) { |
|
int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); |
|
if (bkch == ';') { // early out |
|
break; |
|
} else if (bkch == '}') { |
|
braceCount++; |
|
} else if (bkch == '{') { |
|
if (--braceCount == 0) break; |
|
} |
|
} |
|
} |
|
if (bk > 0 && braceCount == 0) { |
|
// balanced { found, bk > 0, skip more whitespace/comments |
|
bk--; |
|
skipWhitespaceComment(styler, bk); |
|
return styler.StyleAt(bk); |
|
} |
|
return SCE_PL_DEFAULT; |
|
} |
|
|
|
int styleCheckIdentifier(LexAccessor &styler, Sci_PositionU bk) { |
|
// backtrack to classify sub-styles of identifier under test |
|
// return sub-style to be tested for '/' disambiguation |
|
if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo> |
|
return 1; |
|
// backtrack to check for possible "->" or "::" before identifier |
|
while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) { |
|
bk--; |
|
} |
|
while (bk > 0) { |
|
int bkstyle = styler.StyleAt(bk); |
|
if (bkstyle == SCE_PL_DEFAULT |
|
|| bkstyle == SCE_PL_COMMENTLINE) { |
|
// skip whitespace, comments |
|
} else if (bkstyle == SCE_PL_OPERATOR) { |
|
// test for "->" and "::" |
|
if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::")) |
|
return 2; |
|
} else |
|
return 3; // bare identifier |
|
bk--; |
|
} |
|
return 0; |
|
} |
|
|
|
int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU endPos) { |
|
// forward scan the current line to classify line for POD style |
|
int state = -1; |
|
while (pos < endPos) { |
|
int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos)); |
|
if (ch == '\n' || ch == '\r') { |
|
if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++; |
|
break; |
|
} |
|
if (IsASpaceOrTab(ch)) { // whitespace, take note |
|
if (state == -1) |
|
state = SCE_PL_DEFAULT; |
|
} else if (state == SCE_PL_DEFAULT) { // verbatim POD line |
|
state = SCE_PL_POD_VERB; |
|
} else if (state != SCE_PL_POD_VERB) { // regular POD line |
|
state = SCE_PL_POD; |
|
} |
|
pos++; |
|
} |
|
if (state == -1) |
|
state = SCE_PL_DEFAULT; |
|
return state; |
|
} |
|
|
|
bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) { |
|
// backtrack to identify if we're starting a subroutine prototype |
|
// we also need to ignore whitespace/comments, format is like: |
|
// sub abc::pqr :const :prototype(...) |
|
// lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc. |
|
// and a state machine generates legal subroutine syntax matches |
|
styler.Flush(); |
|
int state = SUB_BEGIN; |
|
do { |
|
// find two lexemes, lexeme 2 follows lexeme 1 |
|
int style2 = SCE_PL_DEFAULT; |
|
Sci_PositionU pos2 = bk; |
|
int len2 = findPrevLexeme(styler, pos2, style2); |
|
int style1 = SCE_PL_DEFAULT; |
|
Sci_PositionU pos1 = pos2; |
|
if (pos1 > 0) pos1--; |
|
int len1 = findPrevLexeme(styler, pos1, style1); |
|
if (len1 == 0 || len2 == 0) // lexeme pair must exist |
|
break; |
|
|
|
// match parts of syntax, if invalid subroutine syntax, break off |
|
if (style1 == SCE_PL_OPERATOR && len1 == 1 && |
|
styler.SafeGetCharAt(pos1) == ':') { // ':' |
|
if (style2 == SCE_PL_IDENTIFIER || style2 == SCE_PL_WORD) { |
|
if (len2 == 9 && styler.Match(pos2, "prototype")) { // ':' 'prototype' |
|
if (state == SUB_BEGIN) { |
|
state = SUB_HAS_PROTO; |
|
} else |
|
break; |
|
} else { // ':' <attribute> |
|
if (state == SUB_HAS_PROTO || state == SUB_HAS_ATTRIB) { |
|
state = SUB_HAS_ATTRIB; |
|
} else |
|
break; |
|
} |
|
} else |
|
break; |
|
} else if (style1 == SCE_PL_OPERATOR && len1 == 2 && |
|
styler.Match(pos1, "::")) { // '::' |
|
if (style2 == SCE_PL_IDENTIFIER) { // '::' <identifier> |
|
state = SUB_HAS_MODULE; |
|
} else |
|
break; |
|
} else if (style1 == SCE_PL_WORD && len1 == 3 && |
|
styler.Match(pos1, "sub")) { // 'sub' |
|
if (style2 == SCE_PL_IDENTIFIER) { // 'sub' <identifier> |
|
state = SUB_HAS_SUB; |
|
} else |
|
break; |
|
} else |
|
break; |
|
bk = pos1; // set position for finding next lexeme pair |
|
if (bk > 0) bk--; |
|
} while (state != SUB_HAS_SUB); |
|
return (state == SUB_HAS_SUB); |
|
} |
|
|
|
int actualNumStyle(int numberStyle) { |
|
if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) { |
|
return SCE_PL_STRING; |
|
} else if (numberStyle == PERLNUM_BAD) { |
|
return SCE_PL_ERROR; |
|
} |
|
return SCE_PL_NUMBER; |
|
} |
|
|
|
int opposite(int ch) { |
|
if (ch == '(') return ')'; |
|
if (ch == '[') return ']'; |
|
if (ch == '{') return '}'; |
|
if (ch == '<') return '>'; |
|
return ch; |
|
} |
|
|
|
bool IsCommentLine(Sci_Position line, LexAccessor &styler) { |
|
Sci_Position pos = styler.LineStart(line); |
|
Sci_Position eol_pos = styler.LineStart(line + 1) - 1; |
|
for (Sci_Position i = pos; i < eol_pos; i++) { |
|
char ch = styler[i]; |
|
int style = styler.StyleAt(i); |
|
if (ch == '#' && style == SCE_PL_COMMENTLINE) |
|
return true; |
|
else if (!IsASpaceOrTab(ch)) |
|
return false; |
|
} |
|
return false; |
|
} |
|
|
|
bool IsPackageLine(Sci_Position line, LexAccessor &styler) { |
|
Sci_Position pos = styler.LineStart(line); |
|
int style = styler.StyleAt(pos); |
|
if (style == SCE_PL_WORD && styler.Match(pos, "package")) { |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
int PodHeadingLevel(Sci_Position pos, LexAccessor &styler) { |
|
int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5)); |
|
if (lvl >= '1' && lvl <= '4') { |
|
return lvl - '0'; |
|
} |
|
return 0; |
|
} |
|
|
|
// An individual named option for use in an OptionSet |
|
|
|
// Options used for LexerPerl |
|
struct OptionsPerl { |
|
bool fold; |
|
bool foldComment; |
|
bool foldCompact; |
|
// Custom folding of POD and packages |
|
bool foldPOD; // fold.perl.pod |
|
// Enable folding Pod blocks when using the Perl lexer. |
|
bool foldPackage; // fold.perl.package |
|
// Enable folding packages when using the Perl lexer. |
|
|
|
bool foldCommentExplicit; |
|
|
|
bool foldAtElse; |
|
|
|
OptionsPerl() { |
|
fold = false; |
|
foldComment = false; |
|
foldCompact = true; |
|
foldPOD = true; |
|
foldPackage = true; |
|
foldCommentExplicit = true; |
|
foldAtElse = false; |
|
} |
|
}; |
|
|
|
const char *const perlWordListDesc[] = { |
|
"Keywords", |
|
0 |
|
}; |
|
|
|
struct OptionSetPerl : public OptionSet<OptionsPerl> { |
|
OptionSetPerl() { |
|
DefineProperty("fold", &OptionsPerl::fold); |
|
|
|
DefineProperty("fold.comment", &OptionsPerl::foldComment); |
|
|
|
DefineProperty("fold.compact", &OptionsPerl::foldCompact); |
|
|
|
DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD, |
|
"Set to 0 to disable folding Pod blocks when using the Perl lexer."); |
|
|
|
DefineProperty("fold.perl.package", &OptionsPerl::foldPackage, |
|
"Set to 0 to disable folding packages when using the Perl lexer."); |
|
|
|
DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit, |
|
"Set to 0 to disable explicit folding."); |
|
|
|
DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse, |
|
"This option enables Perl folding on a \"} else {\" line of an if statement."); |
|
|
|
DefineWordListSets(perlWordListDesc); |
|
} |
|
}; |
|
|
|
const LexicalClass lexicalClasses[] = { |
|
// Lexer perl SCLEX_PERL SCE_PL_: |
|
0, "SCE_PL_DEFAULT", "default", "white space", |
|
1, "SCE_PL_ERROR", "error", "error", |
|
2, "SCE_PL_COMMENTLINE", "comment line", "comment", |
|
3, "SCE_PL_POD", "data", "pod: = at beginning of line", |
|
4, "SCE_PL_NUMBER", "literal numeric", "number", |
|
5, "SCE_PL_WORD", "keyword", "keyword", |
|
6, "SCE_PL_STRING", "literal string interpolated", "double quoted string", |
|
7, "SCE_PL_CHARACTER", "literal string", "single quoted string", |
|
8, "SCE_PL_PUNCTUATION", "operator", "symbols / punctuation. currently not used", |
|
9, "SCE_PL_PREPROCESSOR", "preprocessor unused", "preprocessor. currently not used", |
|
10, "SCE_PL_OPERATOR", "operator", "operators", |
|
11, "SCE_PL_IDENTIFIER", "identifier", "identifiers (functions, etc.)", |
|
12, "SCE_PL_SCALAR", "identifier", "scalars: $var", |
|
13, "SCE_PL_ARRAY", "identifier", "array: @var", |
|
14, "SCE_PL_HASH", "identifier", "hash: %var", |
|
15, "SCE_PL_SYMBOLTABLE", "identifier", "symbol table: *var", |
|
16, "SCE_PL_VARIABLE_INDEXER", "identifier unused", "sce_pl_variable_indexer allocated but unused", |
|
17, "SCE_PL_REGEX", "literal regex", "regex: /re/ or m{re}", |
|
18, "SCE_PL_REGSUBST", "literal regex", "substitution: s/re/ore/", |
|
19, "SCE_PL_LONGQUOTE", "literal string", "long quote (qq, qr, qw, qx) -- obsolete: replaced by qq, qx, qr, qw", |
|
20, "SCE_PL_BACKTICKS", "literal string interpolated", "back ticks", |
|
21, "SCE_PL_DATASECTION", "data", "data section: __data__ or __end__ at beginning of line", |
|
22, "SCE_PL_HERE_DELIM", "here-doc literal string", "here-doc (delimiter)", |
|
23, "SCE_PL_HERE_Q", "here-doc literal string", "here-doc (single quoted, q)", |
|
24, "SCE_PL_HERE_QQ", "here-doc literal string interpolated", "here-doc (double quoted, qq)", |
|
25, "SCE_PL_HERE_QX", "here-doc literal interpolated", "here-doc (back ticks, qx)", |
|
26, "SCE_PL_STRING_Q", "literal string", "single quoted string, generic", |
|
27, "SCE_PL_STRING_QQ", "literal string interpolated", "qq = double quoted string", |
|
28, "SCE_PL_STRING_QX", "literal string interpolated", "qx = back ticks", |
|
29, "SCE_PL_STRING_QR", "literal regex", "qr = regex", |
|
30, "SCE_PL_STRING_QW", "literal string interpolated", "qw = array", |
|
31, "SCE_PL_POD_VERB", "data", "pod: verbatim paragraphs", |
|
40, "SCE_PL_SUB_PROTOTYPE", "identifier", "subroutine prototype", |
|
41, "SCE_PL_FORMAT_IDENT", "identifier", "format identifier", |
|
42, "SCE_PL_FORMAT", "literal string", "format body", |
|
43, "SCE_PL_STRING_VAR", "identifier interpolated", "double quoted string (interpolated variable)", |
|
44, "SCE_PL_XLAT", "literal string", "translation: tr{}{} y{}{}", |
|
54, "SCE_PL_REGEX_VAR", "identifier interpolated", "regex: /re/ or m{re} (interpolated variable)", |
|
55, "SCE_PL_REGSUBST_VAR", "identifier interpolated", "substitution: s/re/ore/ (interpolated variable)", |
|
57, "SCE_PL_BACKTICKS_VAR", "identifier interpolated", "back ticks (interpolated variable)", |
|
61, "SCE_PL_HERE_QQ_VAR", "identifier interpolated", "here-doc (double quoted, qq) (interpolated variable)", |
|
62, "SCE_PL_HERE_QX_VAR", "identifier interpolated", "here-doc (back ticks, qx) (interpolated variable)", |
|
64, "SCE_PL_STRING_QQ_VAR", "identifier interpolated", "qq = double quoted string (interpolated variable)", |
|
65, "SCE_PL_STRING_QX_VAR", "identifier interpolated", "qx = back ticks (interpolated variable)", |
|
66, "SCE_PL_STRING_QR_VAR", "identifier interpolated", "qr = regex (interpolated variable)", |
|
}; |
|
|
|
class LexerPerl : public DefaultLexer { |
|
CharacterSet setWordStart; |
|
CharacterSet setWord; |
|
CharacterSet setSpecialVar; |
|
CharacterSet setControlVar; |
|
WordList keywords; |
|
OptionsPerl options; |
|
OptionSetPerl osPerl; |
|
public: |
|
LexerPerl() : |
|
DefaultLexer("perl", SCLEX_PERL, lexicalClasses, std::size(lexicalClasses)), |
|
setWordStart(CharacterSet::setAlpha, "_", 0x80, true), |
|
setWord(CharacterSet::setAlphaNum, "_", 0x80, true), |
|
setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"), |
|
setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") { |
|
} |
|
virtual ~LexerPerl() { |
|
} |
|
void SCI_METHOD Release() override { |
|
delete this; |
|
} |
|
int SCI_METHOD Version() const override { |
|
return lvRelease5; |
|
} |
|
const char *SCI_METHOD PropertyNames() override { |
|
return osPerl.PropertyNames(); |
|
} |
|
int SCI_METHOD PropertyType(const char *name) override { |
|
return osPerl.PropertyType(name); |
|
} |
|
const char *SCI_METHOD DescribeProperty(const char *name) override { |
|
return osPerl.DescribeProperty(name); |
|
} |
|
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; |
|
const char * SCI_METHOD PropertyGet(const char *key) override { |
|
return osPerl.PropertyGet(key); |
|
} |
|
const char *SCI_METHOD DescribeWordListSets() override { |
|
return osPerl.DescribeWordListSets(); |
|
} |
|
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; |
|
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
|
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
|
|
|
void *SCI_METHOD PrivateCall(int, void *) override { |
|
return 0; |
|
} |
|
|
|
static ILexer5 *LexerFactoryPerl() { |
|
return new LexerPerl(); |
|
} |
|
int InputSymbolScan(StyleContext &sc); |
|
void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false); |
|
}; |
|
|
|
Sci_Position SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) { |
|
if (osPerl.PropertySet(&options, key, val)) { |
|
return 0; |
|
} |
|
return -1; |
|
} |
|
|
|
Sci_Position SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) { |
|
WordList *wordListN = 0; |
|
switch (n) { |
|
case 0: |
|
wordListN = &keywords; |
|
break; |
|
} |
|
Sci_Position firstModification = -1; |
|
if (wordListN) { |
|
if (wordListN->Set(wl)) { |
|
firstModification = 0; |
|
} |
|
} |
|
return firstModification; |
|
} |
|
|
|
int LexerPerl::InputSymbolScan(StyleContext &sc) { |
|
// forward scan for matching > on same line; file handles |
|
int c, sLen = 0; |
|
while ((c = sc.GetRelativeCharacter(++sLen)) != 0) { |
|
if (c == '\r' || c == '\n') { |
|
return 0; |
|
} else if (c == '>') { |
|
if (sc.Match("<=>")) // '<=>' case |
|
return 0; |
|
return sLen; |
|
} |
|
} |
|
return 0; |
|
} |
|
|
|
void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) { |
|
// interpolate a segment (with no active backslashes or delimiters within) |
|
// switch in or out of an interpolation style or continue current style |
|
// commit variable patterns if found, trim segment, repeat until done |
|
while (maxSeg > 0) { |
|
bool isVar = false; |
|
int sLen = 0; |
|
if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) { |
|
// $#[$]*word [$@][$]*word (where word or {word} is always present) |
|
bool braces = false; |
|
sLen = 1; |
|
if (sc.ch == '$' && sc.chNext == '#') { // starts with $# |
|
sLen++; |
|
} |
|
while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$')) // >0 $ dereference within |
|
sLen++; |
|
if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) { // { start for {word} |
|
sLen++; |
|
braces = true; |
|
} |
|
if (maxSeg > sLen) { |
|
int c = sc.GetRelativeCharacter(sLen); |
|
if (setWordStart.Contains(c)) { // word (various) |
|
sLen++; |
|
isVar = true; |
|
while (maxSeg > sLen) { |
|
if (!setWord.Contains(sc.GetRelativeCharacter(sLen))) |
|
break; |
|
sLen++; |
|
} |
|
} else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit} |
|
sLen++; |
|
isVar = true; |
|
} |
|
} |
|
if (braces) { |
|
if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) { // } end for {word} |
|
sLen++; |
|
} else |
|
isVar = false; |
|
} |
|
} |
|
if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns |
|
int c = sc.chNext; |
|
if (sc.ch == '$') { |
|
sLen = 1; |
|
if (IsADigit(c)) { // $[0-9] and slurp trailing digits |
|
sLen++; |
|
isVar = true; |
|
while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen))) |
|
sLen++; |
|
} else if (setSpecialVar.Contains(c)) { // $ special variables |
|
sLen++; |
|
isVar = true; |
|
} else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) { // $ additional |
|
sLen++; |
|
isVar = true; |
|
} else if (c == '^') { // $^A control-char style |
|
sLen++; |
|
if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) { |
|
sLen++; |
|
isVar = true; |
|
} |
|
} |
|
} else if (sc.ch == '@') { |
|
sLen = 1; |
|
if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern |
|
sLen++; |
|
isVar = true; |
|
} |
|
} |
|
} |
|
if (isVar) { // commit as interpolated variable or normal character |
|
if (sc.state < SCE_PL_STRING_VAR) |
|
sc.SetState(sc.state + INTERPOLATE_SHIFT); |
|
sc.Forward(sLen); |
|
maxSeg -= sLen; |
|
} else { |
|
if (sc.state >= SCE_PL_STRING_VAR) |
|
sc.SetState(sc.state - INTERPOLATE_SHIFT); |
|
sc.Forward(); |
|
maxSeg--; |
|
} |
|
} |
|
if (sc.state >= SCE_PL_STRING_VAR) |
|
sc.SetState(sc.state - INTERPOLATE_SHIFT); |
|
} |
|
|
|
void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { |
|
LexAccessor styler(pAccess); |
|
|
|
// keywords that forces /PATTERN/ at all times; should track vim's behaviour |
|
WordList reWords; |
|
reWords.Set("elsif if split while"); |
|
|
|
// charset classes |
|
CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC"); |
|
// lexing of "%*</" operators is non-trivial; these are missing in the set below |
|
CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~"); |
|
CharacterSet setQDelim(CharacterSet::setNone, "qrwx"); |
|
CharacterSet setModifiers(CharacterSet::setAlpha); |
|
CharacterSet setPreferRE(CharacterSet::setNone, "*/<%"); |
|
// setArray and setHash also accepts chars for special vars like $_, |
|
// which are then truncated when the next char does not match setVar |
|
CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true); |
|
CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true); |
|
CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true); |
|
CharacterSet &setPOD = setModifiers; |
|
CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@"); |
|
CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_"); |
|
CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t"); |
|
CharacterSet setRepetition(CharacterSet::setDigits, ")\"'"); |
|
// for format identifiers |
|
CharacterSet setFormatStart(CharacterSet::setAlpha, "_="); |
|
CharacterSet &setFormat = setHereDocDelim; |
|
|
|
// Lexer for perl often has to backtrack to start of current style to determine |
|
// which characters are being used as quotes, how deeply nested is the |
|
// start position and what the termination string is for HERE documents. |
|
|
|
class HereDocCls { // Class to manage HERE doc sequence |
|
public: |
|
int State; |
|
// 0: '<<' encountered |
|
// 1: collect the delimiter |
|
// 2: here doc text (lines after the delimiter) |
|
int Quote; // the char after '<<' |
|
bool Quoted; // true if Quote in ('\'','"','`') |
|
bool StripIndent; // true if '<<~' requested to strip leading whitespace |
|
int DelimiterLength; // strlen(Delimiter) |
|
char Delimiter[HERE_DELIM_MAX]; // the Delimiter |
|
HereDocCls() { |
|
State = 0; |
|
Quote = 0; |
|
Quoted = false; |
|
StripIndent = false; |
|
DelimiterLength = 0; |
|
Delimiter[0] = '\0'; |
|
} |
|
void Append(int ch) { |
|
Delimiter[DelimiterLength++] = static_cast<char>(ch); |
|
Delimiter[DelimiterLength] = '\0'; |
|
} |
|
~HereDocCls() { |
|
} |
|
}; |
|
HereDocCls HereDoc; // TODO: FIFO for stacked here-docs |
|
|
|
class QuoteCls { // Class to manage quote pairs |
|
public: |
|
int Rep; |
|
int Count; |
|
int Up, Down; |
|
QuoteCls() { |
|
New(1); |
|
} |
|
void New(int r = 1) { |
|
Rep = r; |
|
Count = 0; |
|
Up = '\0'; |
|
Down = '\0'; |
|
} |
|
void Open(int u) { |
|
Count++; |
|
Up = u; |
|
Down = opposite(Up); |
|
} |
|
}; |
|
QuoteCls Quote; |
|
|
|
// additional state for number lexing |
|
int numState = PERLNUM_DECIMAL; |
|
int dotCount = 0; |
|
|
|
Sci_PositionU endPos = startPos + length; |
|
|
|
// Backtrack to beginning of style if required... |
|
// If in a long distance lexical state, backtrack to find quote characters. |
|
// Includes strings (may be multi-line), numbers (additional state), format |
|
// bodies, as well as POD sections. |
|
if (initStyle == SCE_PL_HERE_Q |
|
|| initStyle == SCE_PL_HERE_QQ |
|
|| initStyle == SCE_PL_HERE_QX |
|
|| initStyle == SCE_PL_FORMAT |
|
|| initStyle == SCE_PL_HERE_QQ_VAR |
|
|| initStyle == SCE_PL_HERE_QX_VAR |
|
) { |
|
// backtrack through multiple styles to reach the delimiter start |
|
int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM; |
|
while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) { |
|
startPos--; |
|
} |
|
startPos = styler.LineStart(styler.GetLine(startPos)); |
|
initStyle = styler.StyleAt(startPos - 1); |
|
} |
|
if (initStyle == SCE_PL_STRING |
|
|| initStyle == SCE_PL_STRING_QQ |
|
|| initStyle == SCE_PL_BACKTICKS |
|
|| initStyle == SCE_PL_STRING_QX |
|
|| initStyle == SCE_PL_REGEX |
|
|| initStyle == SCE_PL_STRING_QR |
|
|| initStyle == SCE_PL_REGSUBST |
|
|| initStyle == SCE_PL_STRING_VAR |
|
|| initStyle == SCE_PL_STRING_QQ_VAR |
|
|| initStyle == SCE_PL_BACKTICKS_VAR |
|
|| initStyle == SCE_PL_STRING_QX_VAR |
|
|| initStyle == SCE_PL_REGEX_VAR |
|
|| initStyle == SCE_PL_STRING_QR_VAR |
|
|| initStyle == SCE_PL_REGSUBST_VAR |
|
) { |
|
// for interpolation, must backtrack through a mix of two different styles |
|
int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ? |
|
initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT; |
|
while (startPos > 1) { |
|
int st = styler.StyleAt(startPos - 1); |
|
if ((st != initStyle) && (st != otherStyle)) |
|
break; |
|
startPos--; |
|
} |
|
initStyle = SCE_PL_DEFAULT; |
|
} else if (initStyle == SCE_PL_STRING_Q |
|
|| initStyle == SCE_PL_STRING_QW |
|
|| initStyle == SCE_PL_XLAT |
|
|| initStyle == SCE_PL_CHARACTER |
|
|| initStyle == SCE_PL_NUMBER |
|
|| initStyle == SCE_PL_IDENTIFIER |
|
|| initStyle == SCE_PL_ERROR |
|
|| initStyle == SCE_PL_SUB_PROTOTYPE |
|
) { |
|
while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) { |
|
startPos--; |
|
} |
|
initStyle = SCE_PL_DEFAULT; |
|
} else if (initStyle == SCE_PL_POD |
|
|| initStyle == SCE_PL_POD_VERB |
|
) { |
|
// POD backtracking finds preceding blank lines and goes back past them |
|
Sci_Position ln = styler.GetLine(startPos); |
|
if (ln > 0) { |
|
initStyle = styler.StyleAt(styler.LineStart(--ln)); |
|
if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) { |
|
while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT) |
|
ln--; |
|
} |
|
startPos = styler.LineStart(++ln); |
|
initStyle = styler.StyleAt(startPos - 1); |
|
} else { |
|
startPos = 0; |
|
initStyle = SCE_PL_DEFAULT; |
|
} |
|
} |
|
|
|
// backFlag, backPos are additional state to aid identifier corner cases. |
|
// Look backwards past whitespace and comments in order to detect either |
|
// operator or keyword. Later updated as we go along. |
|
int backFlag = BACK_NONE; |
|
Sci_PositionU backPos = startPos; |
|
if (backPos > 0) { |
|
backPos--; |
|
skipWhitespaceComment(styler, backPos); |
|
if (styler.StyleAt(backPos) == SCE_PL_OPERATOR) |
|
backFlag = BACK_OPERATOR; |
|
else if (styler.StyleAt(backPos) == SCE_PL_WORD) |
|
backFlag = BACK_KEYWORD; |
|
backPos++; |
|
} |
|
|
|
StyleContext sc(startPos, endPos - startPos, initStyle, styler); |
|
|
|
for (; sc.More(); sc.Forward()) { |
|
|
|
// Determine if the current state should terminate. |
|
switch (sc.state) { |
|
case SCE_PL_OPERATOR: |
|
sc.SetState(SCE_PL_DEFAULT); |
|
backFlag = BACK_OPERATOR; |
|
backPos = sc.currentPos; |
|
break; |
|
case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol |
|
if ((!setWord.Contains(sc.ch) && sc.ch != '\'') |
|
|| sc.Match('.', '.') |
|
|| sc.chPrev == '>') { // end of inputsymbol |
|
sc.SetState(SCE_PL_DEFAULT); |
|
} |
|
break; |
|
case SCE_PL_WORD: // keyword, plus special cases |
|
if (!setWord.Contains(sc.ch)) { |
|
char s[100]; |
|
sc.GetCurrent(s, sizeof(s)); |
|
if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) { |
|
sc.ChangeState(SCE_PL_DATASECTION); |
|
} else { |
|
if ((strcmp(s, "format") == 0)) { |
|
sc.SetState(SCE_PL_FORMAT_IDENT); |
|
HereDoc.State = 0; |
|
} else { |
|
sc.SetState(SCE_PL_DEFAULT); |
|
} |
|
backFlag = BACK_KEYWORD; |
|
backPos = sc.currentPos; |
|
} |
|
} |
|
break; |
|
case SCE_PL_SCALAR: |
|
case SCE_PL_ARRAY: |
|
case SCE_PL_HASH: |
|
case SCE_PL_SYMBOLTABLE: |
|
if (sc.Match(':', ':')) { // skip :: |
|
sc.Forward(); |
|
} else if (!setVar.Contains(sc.ch)) { |
|
if (sc.LengthCurrent() == 1) { |
|
// Special variable: $(, $_ etc. |
|
sc.Forward(); |
|
} |
|
sc.SetState(SCE_PL_DEFAULT); |
|
} |
|
break; |
|
case SCE_PL_NUMBER: |
|
// if no early break, number style is terminated at "(go through)" |
|
if (sc.ch == '.') { |
|
if (sc.chNext == '.') { |
|
// double dot is always an operator (go through) |
|
} else if (numState <= PERLNUM_FLOAT_EXP) { |
|
// non-decimal number or float exponent, consume next dot |
|
sc.SetState(SCE_PL_OPERATOR); |
|
break; |
|
} else { // decimal or vectors allows dots |
|
dotCount++; |
|
if (numState == PERLNUM_DECIMAL) { |
|
if (dotCount <= 1) // number with one dot in it |
|
break; |
|
if (IsADigit(sc.chNext)) { // really a vector |
|
numState = PERLNUM_VECTOR; |
|
break; |
|
} |
|
// number then dot (go through) |
|
} else if (numState == PERLNUM_HEX) { |
|
if (dotCount <= 1 && IsADigit(sc.chNext, 16)) { |
|
break; // hex with one dot is a hex float |
|
} else { |
|
sc.SetState(SCE_PL_OPERATOR); |
|
break; |
|
} |
|
// hex then dot (go through) |
|
} else if (IsADigit(sc.chNext)) // vectors |
|
break; |
|
// vector then dot (go through) |
|
} |
|
} else if (sc.ch == '_') { |
|
// permissive underscoring for number and vector literals |
|
break; |
|
} else if (numState == PERLNUM_DECIMAL) { |
|
if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign |
|
numState = PERLNUM_FLOAT_EXP; |
|
if (sc.chNext == '+' || sc.chNext == '-') { |
|
sc.Forward(); |
|
} |
|
break; |
|
} else if (IsADigit(sc.ch)) |
|
break; |
|
// number then word (go through) |
|
} else if (numState == PERLNUM_HEX) { |
|
if (sc.ch == 'P' || sc.ch == 'p') { // hex float exponent, sign |
|
numState = PERLNUM_FLOAT_EXP; |
|
if (sc.chNext == '+' || sc.chNext == '-') { |
|
sc.Forward(); |
|
} |
|
break; |
|
} else if (IsADigit(sc.ch, 16)) |
|
break; |
|
// hex or hex float then word (go through) |
|
} else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { |
|
if (IsADigit(sc.ch)) // vector |
|
break; |
|
if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word |
|
sc.ChangeState(SCE_PL_IDENTIFIER); |
|
break; |
|
} |
|
// vector then word (go through) |
|
} else if (IsADigit(sc.ch)) { |
|
if (numState == PERLNUM_FLOAT_EXP) { |
|
break; |
|
} else if (numState == PERLNUM_OCTAL) { |
|
if (sc.ch <= '7') break; |
|
} else if (numState == PERLNUM_BINARY) { |
|
if (sc.ch <= '1') break; |
|
} |
|
// mark invalid octal, binary numbers (go through) |
|
numState = PERLNUM_BAD; |
|
break; |
|
} |
|
// complete current number or vector |
|
sc.ChangeState(actualNumStyle(numState)); |
|
sc.SetState(SCE_PL_DEFAULT); |
|
break; |
|
case SCE_PL_COMMENTLINE: |
|
if (sc.atLineStart) { |
|
sc.SetState(SCE_PL_DEFAULT); |
|
} |
|
break; |
|
case SCE_PL_HERE_DELIM: |
|
if (HereDoc.State == 0) { // '<<' encountered |
|
int delim_ch = sc.chNext; |
|
Sci_Position ws_skip = 0; |
|
HereDoc.State = 1; // pre-init HERE doc class |
|
HereDoc.Quote = sc.chNext; |
|
HereDoc.Quoted = false; |
|
HereDoc.StripIndent = false; |
|
HereDoc.DelimiterLength = 0; |
|
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; |
|
if (delim_ch == '~') { // was actually '<<~' |
|
sc.Forward(); |
|
HereDoc.StripIndent = true; |
|
HereDoc.Quote = delim_ch = sc.chNext; |
|
} |
|
if (IsASpaceOrTab(delim_ch)) { |
|
// skip whitespace; legal only for quoted delimiters |
|
Sci_PositionU i = sc.currentPos + 1; |
|
while ((i < endPos) && IsASpaceOrTab(delim_ch)) { |
|
i++; |
|
delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i)); |
|
} |
|
ws_skip = i - sc.currentPos - 1; |
|
} |
|
if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') { |
|
// a quoted here-doc delimiter; skip any whitespace |
|
sc.Forward(ws_skip + 1); |
|
HereDoc.Quote = delim_ch; |
|
HereDoc.Quoted = true; |
|
} else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext)) |
|
|| ws_skip > 0) { |
|
// left shift << or <<= operator cases |
|
// restore position if operator |
|
sc.ChangeState(SCE_PL_OPERATOR); |
|
sc.ForwardSetState(SCE_PL_DEFAULT); |
|
backFlag = BACK_OPERATOR; |
|
backPos = sc.currentPos; |
|
HereDoc.State = 0; |
|
} else { |
|
// specially handle initial '\' for identifier |
|
if (ws_skip == 0 && HereDoc.Quote == '\\') |
|
sc.Forward(); |
|
// an unquoted here-doc delimiter, no special handling |
|
// (cannot be prefixed by spaces/tabs), or |
|
// symbols terminates; deprecated zero-length delimiter |
|
} |
|
} else if (HereDoc.State == 1) { // collect the delimiter |
|
backFlag = BACK_NONE; |
|
if (HereDoc.Quoted) { // a quoted here-doc delimiter |
|
if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter |
|
sc.ForwardSetState(SCE_PL_DEFAULT); |
|
} else if (!sc.atLineEnd) { |
|
if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote |
|
sc.Forward(); |
|
} |
|
if (sc.ch != '\r') { // skip CR if CRLF |
|
int i = 0; // else append char, possibly an extended char |
|
while (i < sc.width) { |
|
HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i))); |
|
i++; |
|
} |
|
} |
|
} |
|
} else { // an unquoted here-doc delimiter, no extended charsets |
|
if (setHereDocDelim.Contains(sc.ch)) { |
|
HereDoc.Append(sc.ch); |
|
} else { |
|
sc.SetState(SCE_PL_DEFAULT); |
|
} |
|
} |
|
if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { |
|
sc.SetState(SCE_PL_ERROR); |
|
HereDoc.State = 0; |
|
} |
|
} |
|
break; |
|
case SCE_PL_HERE_Q: |
|
case SCE_PL_HERE_QQ: |
|
case SCE_PL_HERE_QX: |
|
// also implies HereDoc.State == 2 |
|
sc.Complete(); |
|
if (HereDoc.StripIndent) { |
|
// skip whitespace |
|
while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) |
|
sc.Forward(); |
|
} |
|
if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) { |
|
int c = sc.GetRelative(HereDoc.DelimiterLength); |
|
if (c == '\r' || c == '\n') { // peek first, do not consume match |
|
sc.ForwardBytes(HereDoc.DelimiterLength); |
|
sc.SetState(SCE_PL_DEFAULT); |
|
backFlag = BACK_NONE; |
|
HereDoc.State = 0; |
|
if (!sc.atLineEnd) |
|
sc.Forward(); |
|
break; |
|
} |
|
} |
|
if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated |
|
while (!sc.atLineEnd) |
|
sc.Forward(); |
|
break; |
|
} |
|
while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated |
|
int c, sLen = 0, endType = 0; |
|
while ((c = sc.GetRelativeCharacter(sLen)) != 0) { |
|
// scan to break string into segments |
|
if (c == '\\') { |
|
endType = 1; break; |
|
} else if (c == '\r' || c == '\n') { |
|
endType = 2; break; |
|
} |
|
sLen++; |
|
} |
|
if (sLen > 0) // process non-empty segments |
|
InterpolateSegment(sc, sLen); |
|
if (endType == 1) { |
|
sc.Forward(); |
|
// \ at end-of-line does not appear to have any effect, skip |
|
if (sc.ch != '\r' && sc.ch != '\n') |
|
sc.Forward(); |
|
} else if (endType == 2) { |
|
if (!sc.atLineEnd) |
|
sc.Forward(); |
|
} |
|
} |
|
break; |
|
case SCE_PL_POD: |
|
case SCE_PL_POD_VERB: { |
|
Sci_PositionU fw = sc.currentPos; |
|
Sci_Position ln = styler.GetLine(fw); |
|
if (sc.atLineStart && sc.Match("=cut")) { // end of POD |
|
sc.SetState(SCE_PL_POD); |
|
sc.Forward(4); |
|
sc.SetState(SCE_PL_DEFAULT); |
|
styler.SetLineState(ln, SCE_PL_POD); |
|
break; |
|
} |
|
int pod = podLineScan(styler, fw, endPos); // classify POD line |
|
styler.SetLineState(ln, pod); |
|
if (pod == SCE_PL_DEFAULT) { |
|
if (sc.state == SCE_PL_POD_VERB) { |
|
Sci_PositionU fw2 = fw; |
|
while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) { |
|
fw = fw2++; // penultimate line (last blank line) |
|
pod = podLineScan(styler, fw2, endPos); |
|
styler.SetLineState(styler.GetLine(fw2), pod); |
|
} |
|
if (pod == SCE_PL_POD) { // truncate verbatim POD early |
|
sc.SetState(SCE_PL_POD); |
|
} else |
|
fw = fw2; |
|
} |
|
} else { |
|
if (pod == SCE_PL_POD_VERB // still part of current paragraph |
|
&& (styler.GetLineState(ln - 1) == SCE_PL_POD)) { |
|
pod = SCE_PL_POD; |
|
styler.SetLineState(ln, pod); |
|
} else if (pod == SCE_PL_POD |
|
&& (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) { |
|
pod = SCE_PL_POD_VERB; |
|
styler.SetLineState(ln, pod); |
|
} |
|
sc.SetState(pod); |
|
} |
|
sc.ForwardBytes(fw - sc.currentPos); // commit style |
|
} |
|
break; |
|
case SCE_PL_REGEX: |
|
case SCE_PL_STRING_QR: |
|
if (Quote.Rep <= 0) { |
|
if (!setModifiers.Contains(sc.ch)) |
|
sc.SetState(SCE_PL_DEFAULT); |
|
} else if (!Quote.Up && !IsASpace(sc.ch)) { |
|
Quote.Open(sc.ch); |
|
} else { |
|
int c, sLen = 0, endType = 0; |
|
while ((c = sc.GetRelativeCharacter(sLen)) != 0) { |
|
// scan to break string into segments |
|
if (IsASpace(c)) { |
|
break; |
|
} else if (c == '\\' && Quote.Up != '\\') { |
|
endType = 1; break; |
|
} else if (c == Quote.Down) { |
|
Quote.Count--; |
|
if (Quote.Count == 0) { |
|
Quote.Rep--; |
|
break; |
|
} |
|
} else if (c == Quote.Up) |
|
Quote.Count++; |
|
sLen++; |
|
} |
|
if (sLen > 0) { // process non-empty segments |
|
if (Quote.Up != '\'') { |
|
InterpolateSegment(sc, sLen, true); |
|
} else // non-interpolated path |
|
sc.Forward(sLen); |
|
} |
|
if (endType == 1) |
|
sc.Forward(); |
|
} |
|
break; |
|
case SCE_PL_REGSUBST: |
|
case SCE_PL_XLAT: |
|
if (Quote.Rep <= 0) { |
|
if (!setModifiers.Contains(sc.ch)) |
|
sc.SetState(SCE_PL_DEFAULT); |
|
} else if (!Quote.Up && !IsASpace(sc.ch)) { |
|
Quote.Open(sc.ch); |
|
} else { |
|
int c, sLen = 0, endType = 0; |
|
bool isPattern = (Quote.Rep == 2); |
|
while ((c = sc.GetRelativeCharacter(sLen)) != 0) { |
|
// scan to break string into segments |
|
if (c == '\\' && Quote.Up != '\\') { |
|
endType = 2; break; |
|
} else if (Quote.Count == 0 && Quote.Rep == 1) { |
|
// We matched something like s(...) or tr{...}, Perl 5.10 |
|
// appears to allow almost any character for use as the |
|
// next delimiters. Whitespace and comments are accepted in |
|
// between, but we'll limit to whitespace here. |
|
// For '#', if no whitespace in between, it's a delimiter. |
|
if (IsASpace(c)) { |
|
// Keep going |
|
} else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) { |
|
endType = 3; |
|
} else |
|
Quote.Open(c); |
|
break; |
|
} else if (c == Quote.Down) { |
|
Quote.Count--; |
|
if (Quote.Count == 0) { |
|
Quote.Rep--; |
|
endType = 1; |
|
} |
|
if (Quote.Up == Quote.Down) |
|
Quote.Count++; |
|
if (endType == 1) |
|
break; |
|
} else if (c == Quote.Up) { |
|
Quote.Count++; |
|
} else if (IsASpace(c)) |
|
break; |
|
sLen++; |
|
} |
|
if (sLen > 0) { // process non-empty segments |
|
if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') { |
|
InterpolateSegment(sc, sLen, isPattern); |
|
} else // non-interpolated path |
|
sc.Forward(sLen); |
|
} |
|
if (endType == 2) { |
|
sc.Forward(); |
|
} else if (endType == 3) |
|
sc.SetState(SCE_PL_DEFAULT); |
|
} |
|
break; |
|
case SCE_PL_STRING_Q: |
|
case SCE_PL_STRING_QQ: |
|
case SCE_PL_STRING_QX: |
|
case SCE_PL_STRING_QW: |
|
case SCE_PL_STRING: |
|
case SCE_PL_CHARACTER: |
|
case SCE_PL_BACKTICKS: |
|
if (!Quote.Down && !IsASpace(sc.ch)) { |
|
Quote.Open(sc.ch); |
|
} else { |
|
int c, sLen = 0, endType = 0; |
|
while ((c = sc.GetRelativeCharacter(sLen)) != 0) { |
|
// scan to break string into segments |
|
if (IsASpace(c)) { |
|
break; |
|
} else if (c == '\\' && Quote.Up != '\\') { |
|
endType = 2; break; |
|
} else if (c == Quote.Down) { |
|
Quote.Count--; |
|
if (Quote.Count == 0) { |
|
endType = 3; break; |
|
} |
|
} else if (c == Quote.Up) |
|
Quote.Count++; |
|
sLen++; |
|
} |
|
if (sLen > 0) { // process non-empty segments |
|
switch (sc.state) { |
|
case SCE_PL_STRING: |
|
case SCE_PL_STRING_QQ: |
|
case SCE_PL_BACKTICKS: |
|
InterpolateSegment(sc, sLen); |
|
break; |
|
case SCE_PL_STRING_QX: |
|
if (Quote.Up != '\'') { |
|
InterpolateSegment(sc, sLen); |
|
break; |
|
} |
|
// (continued for ' delim) |
|
// Falls through. |
|
default: // non-interpolated path |
|
sc.Forward(sLen); |
|
} |
|
} |
|
if (endType == 2) { |
|
sc.Forward(); |
|
} else if (endType == 3) |
|
sc.ForwardSetState(SCE_PL_DEFAULT); |
|
} |
|
break; |
|
case SCE_PL_SUB_PROTOTYPE: { |
|
int i = 0; |
|
// forward scan; must all be valid proto characters |
|
while (setSubPrototype.Contains(sc.GetRelative(i))) |
|
i++; |
|
if (sc.GetRelative(i) == ')') { // valid sub prototype |
|
sc.ForwardBytes(i); |
|
sc.ForwardSetState(SCE_PL_DEFAULT); |
|
} else { |
|
// abandon prototype, restart from '(' |
|
sc.ChangeState(SCE_PL_OPERATOR); |
|
sc.SetState(SCE_PL_DEFAULT); |
|
} |
|
} |
|
break; |
|
case SCE_PL_FORMAT: { |
|
sc.Complete(); |
|
if (sc.Match('.')) { |
|
sc.Forward(); |
|
if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n'))) |
|
sc.SetState(SCE_PL_DEFAULT); |
|
} |
|
while (!sc.atLineEnd) |
|
sc.Forward(); |
|
} |
|
break; |
|
case SCE_PL_ERROR: |
|
break; |
|
} |
|
// Needed for specific continuation styles (one follows the other) |
|
switch (sc.state) { |
|
// continued from SCE_PL_WORD |
|
case SCE_PL_FORMAT_IDENT: |
|
// occupies HereDoc state 3 to avoid clashing with HERE docs |
|
if (IsASpaceOrTab(sc.ch)) { // skip whitespace |
|
sc.ChangeState(SCE_PL_DEFAULT); |
|
while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) |
|
sc.Forward(); |
|
sc.SetState(SCE_PL_FORMAT_IDENT); |
|
} |
|
if (setFormatStart.Contains(sc.ch)) { // identifier or '=' |
|
if (sc.ch != '=') { |
|
do { |
|
sc.Forward(); |
|
} while (setFormat.Contains(sc.ch)); |
|
} |
|
while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) |
|
sc.Forward(); |
|
if (sc.ch == '=') { |
|
sc.ForwardSetState(SCE_PL_DEFAULT); |
|
HereDoc.State = 3; |
|
} else { |
|
// invalid identifier; inexact fallback, but hey |
|
sc.ChangeState(SCE_PL_IDENTIFIER); |
|
sc.SetState(SCE_PL_DEFAULT); |
|
} |
|
} else { |
|
sc.ChangeState(SCE_PL_DEFAULT); // invalid identifier |
|
} |
|
backFlag = BACK_NONE; |
|
break; |
|
} |
|
|
|
// Must check end of HereDoc states here before default state is handled |
|
if (HereDoc.State == 1 && sc.atLineEnd) { |
|
// Begin of here-doc (the line after the here-doc delimiter): |
|
// Lexically, the here-doc starts from the next line after the >>, but the |
|
// first line of here-doc seem to follow the style of the last EOL sequence |
|
int st_new = SCE_PL_HERE_QQ; |
|
HereDoc.State = 2; |
|
if (HereDoc.Quoted) { |
|
if (sc.state == SCE_PL_HERE_DELIM) { |
|
// Missing quote at end of string! We are stricter than perl. |
|
// Colour here-doc anyway while marking this bit as an error. |
|
sc.ChangeState(SCE_PL_ERROR); |
|
} |
|
switch (HereDoc.Quote) { |
|
case '\'': |
|
st_new = SCE_PL_HERE_Q; |
|
break; |
|
case '"' : |
|
st_new = SCE_PL_HERE_QQ; |
|
break; |
|
case '`' : |
|
st_new = SCE_PL_HERE_QX; |
|
break; |
|
} |
|
} else { |
|
if (HereDoc.Quote == '\\') |
|
st_new = SCE_PL_HERE_Q; |
|
} |
|
sc.SetState(st_new); |
|
} |
|
if (HereDoc.State == 3 && sc.atLineEnd) { |
|
// Start of format body. |
|
HereDoc.State = 0; |
|
sc.SetState(SCE_PL_FORMAT); |
|
} |
|
|
|
// Determine if a new state should be entered. |
|
if (sc.state == SCE_PL_DEFAULT) { |
|
if (IsADigit(sc.ch) || |
|
(IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) { |
|
sc.SetState(SCE_PL_NUMBER); |
|
backFlag = BACK_NONE; |
|
numState = PERLNUM_DECIMAL; |
|
dotCount = 0; |
|
if (sc.ch == '0') { // hex,bin,octal |
|
if (sc.chNext == 'x' || sc.chNext == 'X') { |
|
numState = PERLNUM_HEX; |
|
} else if (sc.chNext == 'b' || sc.chNext == 'B') { |
|
numState = PERLNUM_BINARY; |
|
} else if (IsADigit(sc.chNext)) { |
|
numState = PERLNUM_OCTAL; |
|
} |
|
if (numState != PERLNUM_DECIMAL) { |
|
sc.Forward(); |
|
} |
|
} else if (sc.ch == 'v') { // vector |
|
numState = PERLNUM_V_VECTOR; |
|
} |
|
} else if (setWord.Contains(sc.ch)) { |
|
// if immediately prefixed by '::', always a bareword |
|
sc.SetState(SCE_PL_WORD); |
|
if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') { |
|
sc.ChangeState(SCE_PL_IDENTIFIER); |
|
} |
|
Sci_PositionU bk = sc.currentPos; |
|
Sci_PositionU fw = sc.currentPos + 1; |
|
// first check for possible quote-like delimiter |
|
if (sc.ch == 's' && !setWord.Contains(sc.chNext)) { |
|
sc.ChangeState(SCE_PL_REGSUBST); |
|
Quote.New(2); |
|
} else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) { |
|
sc.ChangeState(SCE_PL_REGEX); |
|
Quote.New(); |
|
} else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) { |
|
sc.ChangeState(SCE_PL_STRING_Q); |
|
Quote.New(); |
|
} else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) { |
|
sc.ChangeState(SCE_PL_XLAT); |
|
Quote.New(2); |
|
} else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) { |
|
sc.ChangeState(SCE_PL_XLAT); |
|
Quote.New(2); |
|
sc.Forward(); |
|
fw++; |
|
} else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext) |
|
&& !setWord.Contains(sc.GetRelative(2))) { |
|
if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ); |
|
else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX); |
|
else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR); |
|
else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w' |
|
Quote.New(); |
|
sc.Forward(); |
|
fw++; |
|
} else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition |
|
!setWord.Contains(sc.chNext) || |
|
(setRepetition.Contains(sc.chPrev) && IsADigit(sc.chNext)))) { |
|
sc.ChangeState(SCE_PL_OPERATOR); |
|
} |
|
// if potentially a keyword, scan forward and grab word, then check |
|
// if it's really one; if yes, disambiguation test is performed |
|
// otherwise it is always a bareword and we skip a lot of scanning |
|
if (sc.state == SCE_PL_WORD) { |
|
while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw)))) |
|
fw++; |
|
if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) { |
|
sc.ChangeState(SCE_PL_IDENTIFIER); |
|
} |
|
} |
|
// if already SCE_PL_IDENTIFIER, then no ambiguity, skip this |
|
// for quote-like delimiters/keywords, attempt to disambiguate |
|
// to select for bareword, change state -> SCE_PL_IDENTIFIER |
|
if (sc.state != SCE_PL_IDENTIFIER && bk > 0) { |
|
if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos)) |
|
sc.ChangeState(SCE_PL_IDENTIFIER); |
|
} |
|
backFlag = BACK_NONE; |
|
} else if (sc.ch == '#') { |
|
sc.SetState(SCE_PL_COMMENTLINE); |
|
} else if (sc.ch == '\"') { |
|
sc.SetState(SCE_PL_STRING); |
|
Quote.New(); |
|
Quote.Open(sc.ch); |
|
backFlag = BACK_NONE; |
|
} else if (sc.ch == '\'') { |
|
if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) { |
|
// Archaic call |
|
sc.SetState(SCE_PL_IDENTIFIER); |
|
} else { |
|
sc.SetState(SCE_PL_CHARACTER); |
|
Quote.New(); |
|
Quote.Open(sc.ch); |
|
} |
|
backFlag = BACK_NONE; |
|
} else if (sc.ch == '`') { |
|
sc.SetState(SCE_PL_BACKTICKS); |
|
Quote.New(); |
|
Quote.Open(sc.ch); |
|
backFlag = BACK_NONE; |
|
} else if (sc.ch == '$') { |
|
sc.SetState(SCE_PL_SCALAR); |
|
if (sc.chNext == '{') { |
|
sc.ForwardSetState(SCE_PL_OPERATOR); |
|
} else if (IsASpace(sc.chNext)) { |
|
sc.ForwardSetState(SCE_PL_DEFAULT); |
|
} else { |
|
sc.Forward(); |
|
if (sc.Match('`', '`') || sc.Match(':', ':')) { |
|
sc.Forward(); |
|
} |
|
} |
|
backFlag = BACK_NONE; |
|
} else if (sc.ch == '@') { |
|
sc.SetState(SCE_PL_ARRAY); |
|
if (setArray.Contains(sc.chNext)) { |
|
// no special treatment |
|
} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { |
|
sc.ForwardBytes(2); |
|
} else if (sc.chNext == '{' || sc.chNext == '[') { |
|
sc.ForwardSetState(SCE_PL_OPERATOR); |
|
} else { |
|
sc.ChangeState(SCE_PL_OPERATOR); |
|
} |
|
backFlag = BACK_NONE; |
|
} else if (setPreferRE.Contains(sc.ch)) { |
|
// Explicit backward peeking to set a consistent preferRE for |
|
// any slash found, so no longer need to track preferRE state. |
|
// Find first previous significant lexed element and interpret. |
|
// A few symbols shares this code for disambiguation. |
|
bool preferRE = false; |
|
bool isHereDoc = sc.Match('<', '<'); |
|
bool hereDocSpace = false; // for: SCALAR [whitespace] '<<' |
|
Sci_PositionU bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0; |
|
sc.Complete(); |
|
styler.Flush(); |
|
if (styler.StyleAt(bk) == SCE_PL_DEFAULT) |
|
hereDocSpace = true; |
|
skipWhitespaceComment(styler, bk); |
|
if (bk == 0) { |
|
// avoid backward scanning breakage |
|
preferRE = true; |
|
} else { |
|
int bkstyle = styler.StyleAt(bk); |
|
int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); |
|
switch (bkstyle) { |
|
case SCE_PL_OPERATOR: |
|
preferRE = true; |
|
if (bkch == ')' || bkch == ']') { |
|
preferRE = false; |
|
} else if (bkch == '}') { |
|
// backtrack by counting balanced brace pairs |
|
// needed to test for variables like ${}, @{} etc. |
|
bkstyle = styleBeforeBracePair(styler, bk); |
|
if (bkstyle == SCE_PL_SCALAR |
|
|| bkstyle == SCE_PL_ARRAY |
|
|| bkstyle == SCE_PL_HASH |
|
|| bkstyle == SCE_PL_SYMBOLTABLE |
|
|| bkstyle == SCE_PL_OPERATOR) { |
|
preferRE = false; |
|
} |
|
} else if (bkch == '+' || bkch == '-') { |
|
if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1)) |
|
&& bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2))) |
|
// exceptions for operators: unary suffixes ++, -- |
|
preferRE = false; |
|
} |
|
break; |
|
case SCE_PL_IDENTIFIER: |
|
preferRE = true; |
|
bkstyle = styleCheckIdentifier(styler, bk); |
|
if ((bkstyle == 1) || (bkstyle == 2)) { |
|
// inputsymbol or var with "->" or "::" before identifier |
|
preferRE = false; |
|
} else if (bkstyle == 3) { |
|
// bare identifier, test cases follows: |
|
if (sc.ch == '/') { |
|
// if '/', /PATTERN/ unless digit/space immediately after '/' |
|
// if '//', always expect defined-or operator to follow identifier |
|
if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') |
|
preferRE = false; |
|
} else if (sc.ch == '*' || sc.ch == '%') { |
|
if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) |
|
preferRE = false; |
|
} else if (sc.ch == '<') { |
|
if (IsASpace(sc.chNext) || sc.chNext == '=') |
|
preferRE = false; |
|
} |
|
} |
|
break; |
|
case SCE_PL_SCALAR: // for $var<< case: |
|
if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc |
|
preferRE = true; |
|
break; |
|
case SCE_PL_WORD: |
|
preferRE = true; |
|
// for HERE docs, always true |
|
if (sc.ch == '/') { |
|
// adopt heuristics similar to vim-style rules: |
|
// keywords always forced as /PATTERN/: split, if, elsif, while |
|
// everything else /PATTERN/ unless digit/space immediately after '/' |
|
// for '//', defined-or favoured unless special keywords |
|
Sci_PositionU bkend = bk + 1; |
|
while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) { |
|
bk--; |
|
} |
|
if (isPerlKeyword(bk, bkend, reWords, styler)) |
|
break; |
|
if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') |
|
preferRE = false; |
|
} else if (sc.ch == '*' || sc.ch == '%') { |
|
if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) |
|
preferRE = false; |
|
} else if (sc.ch == '<') { |
|
if (IsASpace(sc.chNext) || sc.chNext == '=') |
|
preferRE = false; |
|
} |
|
break; |
|
|
|
// other styles uses the default, preferRE=false |
|
case SCE_PL_POD: |
|
case SCE_PL_HERE_Q: |
|
case SCE_PL_HERE_QQ: |
|
case SCE_PL_HERE_QX: |
|
preferRE = true; |
|
break; |
|
} |
|
} |
|
backFlag = BACK_NONE; |
|
if (isHereDoc) { // handle '<<', HERE doc |
|
if (sc.Match("<<>>")) { // double-diamond operator (5.22) |
|
sc.SetState(SCE_PL_OPERATOR); |
|
sc.Forward(3); |
|
} else if (preferRE) { |
|
sc.SetState(SCE_PL_HERE_DELIM); |
|
HereDoc.State = 0; |
|
} else { // << operator |
|
sc.SetState(SCE_PL_OPERATOR); |
|
sc.Forward(); |
|
} |
|
} else if (sc.ch == '*') { // handle '*', typeglob |
|
if (preferRE) { |
|
sc.SetState(SCE_PL_SYMBOLTABLE); |
|
if (sc.chNext == ':' && sc.GetRelative(2) == ':') { |
|
sc.ForwardBytes(2); |
|
} else if (sc.chNext == '{') { |
|
sc.ForwardSetState(SCE_PL_OPERATOR); |
|
} else { |
|
sc.Forward(); |
|
} |
|
} else { |
|
sc.SetState(SCE_PL_OPERATOR); |
|
if (sc.chNext == '*') // exponentiation |
|
sc.Forward(); |
|
} |
|
} else if (sc.ch == '%') { // handle '%', hash |
|
if (preferRE) { |
|
sc.SetState(SCE_PL_HASH); |
|
if (setHash.Contains(sc.chNext)) { |
|
sc.Forward(); |
|
} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { |
|
sc.ForwardBytes(2); |
|
} else if (sc.chNext == '{') { |
|
sc.ForwardSetState(SCE_PL_OPERATOR); |
|
} else { |
|
sc.ChangeState(SCE_PL_OPERATOR); |
|
} |
|
} else { |
|
sc.SetState(SCE_PL_OPERATOR); |
|
} |
|
} else if (sc.ch == '<') { // handle '<', inputsymbol |
|
if (preferRE) { |
|
// forward scan |
|
int i = InputSymbolScan(sc); |
|
if (i > 0) { |
|
sc.SetState(SCE_PL_IDENTIFIER); |
|
sc.Forward(i); |
|
} else { |
|
sc.SetState(SCE_PL_OPERATOR); |
|
} |
|
} else { |
|
sc.SetState(SCE_PL_OPERATOR); |
|
} |
|
} else { // handle '/', regexp |
|
if (preferRE) { |
|
sc.SetState(SCE_PL_REGEX); |
|
Quote.New(); |
|
Quote.Open(sc.ch); |
|
} else { // / and // operators |
|
sc.SetState(SCE_PL_OPERATOR); |
|
if (sc.chNext == '/') { |
|
sc.Forward(); |
|
} |
|
} |
|
} |
|
} else if (sc.ch == '=' // POD |
|
&& setPOD.Contains(sc.chNext) |
|
&& sc.atLineStart) { |
|
sc.SetState(SCE_PL_POD); |
|
backFlag = BACK_NONE; |
|
} else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases |
|
Sci_PositionU bk = sc.currentPos; |
|
Sci_PositionU fw = 2; |
|
if (setSingleCharOp.Contains(sc.chNext) && // file test operators |
|
!setWord.Contains(sc.GetRelative(2))) { |
|
sc.SetState(SCE_PL_WORD); |
|
} else { |
|
// nominally a minus and bareword; find extent of bareword |
|
while (setWord.Contains(sc.GetRelative(fw))) |
|
fw++; |
|
sc.SetState(SCE_PL_OPERATOR); |
|
} |
|
// force to bareword for hash key => or {variable literal} cases |
|
if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) { |
|
sc.ChangeState(SCE_PL_IDENTIFIER); |
|
} |
|
backFlag = BACK_NONE; |
|
} else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype |
|
sc.Complete(); |
|
if (styleCheckSubPrototype(styler, sc.currentPos - 1)) { |
|
sc.SetState(SCE_PL_SUB_PROTOTYPE); |
|
backFlag = BACK_NONE; |
|
} else { |
|
sc.SetState(SCE_PL_OPERATOR); |
|
} |
|
} else if (setPerlOperator.Contains(sc.ch)) { // operators |
|
sc.SetState(SCE_PL_OPERATOR); |
|
if (sc.Match('.', '.')) { // .. and ... |
|
sc.Forward(); |
|
if (sc.chNext == '.') sc.Forward(); |
|
} |
|
} else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source |
|
sc.SetState(SCE_PL_DATASECTION); |
|
} else { |
|
// keep colouring defaults |
|
sc.Complete(); |
|
} |
|
} |
|
} |
|
sc.Complete(); |
|
if (sc.state == SCE_PL_HERE_Q |
|
|| sc.state == SCE_PL_HERE_QQ |
|
|| sc.state == SCE_PL_HERE_QX |
|
|| sc.state == SCE_PL_FORMAT) { |
|
styler.ChangeLexerState(sc.currentPos, styler.Length()); |
|
} |
|
sc.Complete(); |
|
} |
|
|
|
#define PERL_HEADFOLD_SHIFT 4 |
|
#define PERL_HEADFOLD_MASK 0xF0 |
|
|
|
void SCI_METHOD LexerPerl::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) { |
|
|
|
if (!options.fold) |
|
return; |
|
|
|
LexAccessor styler(pAccess); |
|
|
|
Sci_PositionU endPos = startPos + length; |
|
int visibleChars = 0; |
|
Sci_Position lineCurrent = styler.GetLine(startPos); |
|
|
|
// Backtrack to previous line in case need to fix its fold status |
|
if (startPos > 0) { |
|
if (lineCurrent > 0) { |
|
lineCurrent--; |
|
startPos = styler.LineStart(lineCurrent); |
|
} |
|
} |
|
|
|
int levelPrev = SC_FOLDLEVELBASE; |
|
if (lineCurrent > 0) |
|
levelPrev = styler.LevelAt(lineCurrent - 1) >> 16; |
|
int levelCurrent = levelPrev; |
|
char chNext = styler[startPos]; |
|
char chPrev = styler.SafeGetCharAt(startPos - 1); |
|
int styleNext = styler.StyleAt(startPos); |
|
// Used at end of line to determine if the line was a package definition |
|
bool isPackageLine = false; |
|
int podHeading = 0; |
|
for (Sci_PositionU i = startPos; i < endPos; i++) { |
|
char ch = chNext; |
|
chNext = styler.SafeGetCharAt(i + 1); |
|
int style = styleNext; |
|
styleNext = styler.StyleAt(i + 1); |
|
int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT; |
|
bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
|
bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0; |
|
// Comment folding |
|
if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) { |
|
if (!IsCommentLine(lineCurrent - 1, styler) |
|
&& IsCommentLine(lineCurrent + 1, styler)) |
|
levelCurrent++; |
|
else if (IsCommentLine(lineCurrent - 1, styler) |
|
&& !IsCommentLine(lineCurrent + 1, styler)) |
|
levelCurrent--; |
|
} |
|
// {} [] block folding |
|
if (style == SCE_PL_OPERATOR) { |
|
if (ch == '{') { |
|
if (options.foldAtElse && levelCurrent < levelPrev) |
|
--levelPrev; |
|
levelCurrent++; |
|
} else if (ch == '}') { |
|
levelCurrent--; |
|
} |
|
if (ch == '[') { |
|
if (options.foldAtElse && levelCurrent < levelPrev) |
|
--levelPrev; |
|
levelCurrent++; |
|
} else if (ch == ']') { |
|
levelCurrent--; |
|
} |
|
} else if (style == SCE_PL_STRING_QW) { |
|
// qw |
|
if (stylePrevCh != style) |
|
levelCurrent++; |
|
else if (styleNext != style) |
|
levelCurrent--; |
|
} |
|
// POD folding |
|
if (options.foldPOD && atLineStart) { |
|
if (style == SCE_PL_POD) { |
|
if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB) |
|
levelCurrent++; |
|
else if (styler.Match(i, "=cut")) |
|
levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1; |
|
else if (styler.Match(i, "=head")) |
|
podHeading = PodHeadingLevel(i, styler); |
|
} else if (style == SCE_PL_DATASECTION) { |
|
if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE) |
|
levelCurrent++; |
|
else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE) |
|
levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1; |
|
else if (styler.Match(i, "=head")) |
|
podHeading = PodHeadingLevel(i, styler); |
|
// if package used or unclosed brace, level > SC_FOLDLEVELBASE! |
|
// reset needed as level test is vs. SC_FOLDLEVELBASE |
|
else if (stylePrevCh != SCE_PL_DATASECTION) |
|
levelCurrent = SC_FOLDLEVELBASE; |
|
} |
|
} |
|
// package folding |
|
if (options.foldPackage && atLineStart) { |
|
if (IsPackageLine(lineCurrent, styler) |
|
&& !IsPackageLine(lineCurrent + 1, styler)) |
|
isPackageLine = true; |
|
} |
|
|
|
//heredoc folding |
|
switch (style) { |
|
case SCE_PL_HERE_QQ : |
|
case SCE_PL_HERE_Q : |
|
case SCE_PL_HERE_QX : |
|
switch (stylePrevCh) { |
|
case SCE_PL_HERE_QQ : |
|
case SCE_PL_HERE_Q : |
|
case SCE_PL_HERE_QX : |
|
//do nothing; |
|
break; |
|
default : |
|
levelCurrent++; |
|
break; |
|
} |
|
break; |
|
default: |
|
switch (stylePrevCh) { |
|
case SCE_PL_HERE_QQ : |
|
case SCE_PL_HERE_Q : |
|
case SCE_PL_HERE_QX : |
|
levelCurrent--; |
|
break; |
|
default : |
|
//do nothing; |
|
break; |
|
} |
|
break; |
|
} |
|
|
|
//explicit folding |
|
if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') { |
|
if (chNext == '{') { |
|
levelCurrent++; |
|
} else if (levelCurrent > SC_FOLDLEVELBASE && chNext == '}') { |
|
levelCurrent--; |
|
} |
|
} |
|
|
|
if (atEOL) { |
|
int lev = levelPrev; |
|
// POD headings occupy bits 7-4, leaving some breathing room for |
|
// non-standard practice -- POD sections stuck in blocks, etc. |
|
if (podHeading > 0) { |
|
levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT); |
|
lev = levelCurrent - 1; |
|
lev |= SC_FOLDLEVELHEADERFLAG; |
|
podHeading = 0; |
|
} |
|
// Check if line was a package declaration |
|
// because packages need "special" treatment |
|
if (isPackageLine) { |
|
lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG; |
|
levelCurrent = SC_FOLDLEVELBASE + 1; |
|
isPackageLine = false; |
|
} |
|
lev |= levelCurrent << 16; |
|
if (visibleChars == 0 && options.foldCompact) |
|
lev |= SC_FOLDLEVELWHITEFLAG; |
|
if ((levelCurrent > levelPrev) && (visibleChars > 0)) |
|
lev |= SC_FOLDLEVELHEADERFLAG; |
|
if (lev != styler.LevelAt(lineCurrent)) { |
|
styler.SetLevel(lineCurrent, lev); |
|
} |
|
lineCurrent++; |
|
levelPrev = levelCurrent; |
|
visibleChars = 0; |
|
} |
|
if (!isspacechar(ch)) |
|
visibleChars++; |
|
chPrev = ch; |
|
} |
|
// Fill in the real level of the next line, keeping the current flags as they will be filled in later |
|
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; |
|
styler.SetLevel(lineCurrent, levelPrev | flagsNext); |
|
} |
|
|
|
} |
|
|
|
extern const LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);
|
|
|