2022-01-04 23:07:50 +00:00
|
|
|
// Scintilla source code edit control
|
|
|
|
/** @file LexR.cxx
|
|
|
|
** Lexer for R, S, SPlus Statistics Program (Heavily derived from CPP Lexer).
|
|
|
|
**
|
|
|
|
**/
|
|
|
|
// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
|
|
|
|
// The License.txt file describes the conditions under which this software may be distributed.
|
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
#include <cassert>
|
|
|
|
#include <cctype>
|
2022-01-04 23:07:50 +00:00
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <string_view>
|
|
|
|
|
|
|
|
#include "ILexer.h"
|
|
|
|
#include "Scintilla.h"
|
|
|
|
#include "SciLexer.h"
|
|
|
|
|
|
|
|
#include "WordList.h"
|
|
|
|
#include "LexAccessor.h"
|
|
|
|
#include "Accessor.h"
|
|
|
|
#include "StyleContext.h"
|
|
|
|
#include "CharacterSet.h"
|
|
|
|
#include "LexerModule.h"
|
|
|
|
|
|
|
|
using namespace Lexilla;
|
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
inline bool IsAWordChar(int ch) noexcept {
|
2022-01-04 23:07:50 +00:00
|
|
|
return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
|
|
|
|
}
|
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
inline bool IsAWordStart(int ch) noexcept {
|
2022-01-04 23:07:50 +00:00
|
|
|
return (ch < 0x80) && (isalnum(ch) || ch == '_');
|
|
|
|
}
|
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
constexpr bool IsAnOperator(int ch) noexcept {
|
2022-01-04 23:07:50 +00:00
|
|
|
// '.' left out as it is used to make up numbers
|
|
|
|
if (ch == '-' || ch == '+' || ch == '!' || ch == '~' ||
|
|
|
|
ch == '?' || ch == ':' || ch == '*' || ch == '/' ||
|
|
|
|
ch == '^' || ch == '<' || ch == '>' || ch == '=' ||
|
|
|
|
ch == '&' || ch == '|' || ch == '$' || ch == '(' ||
|
|
|
|
ch == ')' || ch == '}' || ch == '{' || ch == '[' ||
|
|
|
|
ch == ']')
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
constexpr bool IsOctalOrHex(int ch, bool hex) noexcept {
|
|
|
|
return IsAnOctalDigit(ch) || (hex && IsAHeXDigit(ch));
|
|
|
|
}
|
2022-01-04 23:07:50 +00:00
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
// https://search.r-project.org/R/refmans/base/html/Quotes.html
|
|
|
|
struct EscapeSequence {
|
|
|
|
int outerState = SCE_R_DEFAULT;
|
|
|
|
int digitsLeft = 0;
|
|
|
|
bool hex = false;
|
|
|
|
bool brace = false;
|
2022-01-04 23:07:50 +00:00
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
// highlight any character as escape sequence, unrecognized escape sequence is syntax error.
|
|
|
|
void resetEscapeState(int state, int chNext) noexcept {
|
|
|
|
outerState = state;
|
|
|
|
digitsLeft = 1;
|
|
|
|
hex = true;
|
|
|
|
brace = false;
|
|
|
|
if (chNext == 'x') {
|
|
|
|
digitsLeft = 3;
|
|
|
|
} else if (chNext == 'u') {
|
|
|
|
digitsLeft = 5;
|
|
|
|
} else if (chNext == 'U') {
|
|
|
|
digitsLeft = 9;
|
|
|
|
} else if (IsAnOctalDigit(chNext)) {
|
|
|
|
digitsLeft = 3;
|
|
|
|
hex = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bool atEscapeEnd(int ch) noexcept {
|
|
|
|
--digitsLeft;
|
|
|
|
return digitsLeft <= 0 || !IsOctalOrHex(ch, hex);
|
|
|
|
}
|
|
|
|
};
|
2022-01-04 23:07:50 +00:00
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
int CheckRawString(LexAccessor &styler, Sci_Position pos, int &dashCount) {
|
|
|
|
dashCount = 0;
|
|
|
|
while (true) {
|
|
|
|
const char ch = styler.SafeGetCharAt(pos++);
|
|
|
|
switch (ch) {
|
|
|
|
case '-':
|
|
|
|
++dashCount;
|
|
|
|
break;
|
|
|
|
case '(':
|
|
|
|
return ')';
|
|
|
|
case '[':
|
|
|
|
return ']';
|
|
|
|
case '{':
|
|
|
|
return '}';
|
|
|
|
default:
|
|
|
|
dashCount = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-01-04 23:07:50 +00:00
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
void ColouriseRDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
|
|
|
|
Accessor &styler) {
|
2022-01-04 23:07:50 +00:00
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
const WordList &keywords = *keywordlists[0];
|
|
|
|
const WordList &keywords2 = *keywordlists[1];
|
|
|
|
const WordList &keywords3 = *keywordlists[2];
|
|
|
|
// state for raw string
|
|
|
|
int matchingDelimiter = 0;
|
|
|
|
int dashCount = 0;
|
2022-01-04 23:07:50 +00:00
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
// property lexer.r.escape.sequence
|
|
|
|
// Set to 1 to enable highlighting of escape sequences in strings.
|
|
|
|
const bool escapeSequence = styler.GetPropertyInt("lexer.r.escape.sequence", 0) != 0;
|
|
|
|
EscapeSequence escapeSeq;
|
2022-01-04 23:07:50 +00:00
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
StyleContext sc(startPos, length, initStyle, styler);
|
|
|
|
if (sc.currentLine > 0) {
|
|
|
|
const int lineState = styler.GetLineState(sc.currentLine - 1);
|
|
|
|
matchingDelimiter = lineState & 0xff;
|
|
|
|
dashCount = lineState >> 8;
|
|
|
|
}
|
2022-01-04 23:07:50 +00:00
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
for (; sc.More(); sc.Forward()) {
|
2022-01-04 23:07:50 +00:00
|
|
|
// Determine if the current state should terminate.
|
2022-10-12 18:45:40 +00:00
|
|
|
switch (sc.state) {
|
|
|
|
case SCE_R_OPERATOR:
|
2022-01-04 23:07:50 +00:00
|
|
|
sc.SetState(SCE_R_DEFAULT);
|
2022-10-12 18:45:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case SCE_R_NUMBER:
|
|
|
|
// https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Literal-constants
|
|
|
|
if (AnyOf(sc.ch, 'e', 'E', 'p', 'P') && (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
|
|
|
|
sc.Forward(); // exponent part
|
|
|
|
} else if (!(IsAHeXDigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext)))) {
|
|
|
|
if (AnyOf(sc.ch, 'L', 'i')) {
|
|
|
|
sc.Forward(); // integer and complex qualifier
|
|
|
|
}
|
2022-01-04 23:07:50 +00:00
|
|
|
sc.SetState(SCE_R_DEFAULT);
|
|
|
|
}
|
2022-10-12 18:45:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case SCE_R_IDENTIFIER:
|
2022-01-04 23:07:50 +00:00
|
|
|
if (!IsAWordChar(sc.ch)) {
|
|
|
|
char s[100];
|
|
|
|
sc.GetCurrent(s, sizeof(s));
|
|
|
|
if (keywords.InList(s)) {
|
|
|
|
sc.ChangeState(SCE_R_KWORD);
|
|
|
|
} else if (keywords2.InList(s)) {
|
|
|
|
sc.ChangeState(SCE_R_BASEKWORD);
|
|
|
|
} else if (keywords3.InList(s)) {
|
|
|
|
sc.ChangeState(SCE_R_OTHERKWORD);
|
|
|
|
}
|
|
|
|
sc.SetState(SCE_R_DEFAULT);
|
|
|
|
}
|
2022-10-12 18:45:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case SCE_R_COMMENT:
|
|
|
|
if (sc.MatchLineEnd()) {
|
2022-01-04 23:07:50 +00:00
|
|
|
sc.SetState(SCE_R_DEFAULT);
|
|
|
|
}
|
2022-10-12 18:45:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case SCE_R_STRING:
|
|
|
|
case SCE_R_STRING2:
|
|
|
|
case SCE_R_BACKTICKS:
|
2022-01-04 23:07:50 +00:00
|
|
|
if (sc.ch == '\\') {
|
2022-10-12 18:45:40 +00:00
|
|
|
if (escapeSequence) {
|
|
|
|
escapeSeq.resetEscapeState(sc.state, sc.chNext);
|
|
|
|
sc.SetState(SCE_R_ESCAPESEQUENCE);
|
2022-01-04 23:07:50 +00:00
|
|
|
sc.Forward();
|
2022-10-12 18:45:40 +00:00
|
|
|
if (sc.chNext == '{' && AnyOf(sc.ch, 'u', 'U')) {
|
|
|
|
escapeSeq.brace = true;
|
|
|
|
sc.Forward();
|
|
|
|
} else if (sc.MatchLineEnd()) {
|
|
|
|
// don't highlight line ending as escape sequence:
|
|
|
|
// escapeSeq.outerState is lost when editing on next line.
|
|
|
|
sc.SetState(escapeSeq.outerState);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
sc.Forward(); // Skip all characters after the backslash
|
2022-01-04 23:07:50 +00:00
|
|
|
}
|
2022-10-12 18:45:40 +00:00
|
|
|
} else if ((sc.state == SCE_R_STRING && sc.ch == '\"')
|
|
|
|
|| (sc.state == SCE_R_STRING2 && sc.ch == '\'')
|
|
|
|
|| (sc.state == SCE_R_BACKTICKS && sc.ch == '`')) {
|
2022-01-04 23:07:50 +00:00
|
|
|
sc.ForwardSetState(SCE_R_DEFAULT);
|
|
|
|
}
|
2022-10-12 18:45:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case SCE_R_ESCAPESEQUENCE:
|
|
|
|
if (escapeSeq.atEscapeEnd(sc.ch)) {
|
|
|
|
if (escapeSeq.brace && sc.ch == '}') {
|
|
|
|
sc.Forward();
|
|
|
|
}
|
|
|
|
sc.SetState(escapeSeq.outerState);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SCE_R_RAWSTRING:
|
|
|
|
case SCE_R_RAWSTRING2:
|
|
|
|
while (sc.ch == matchingDelimiter) {
|
|
|
|
sc.Forward();
|
|
|
|
int count = dashCount;
|
|
|
|
while (count != 0 && sc.ch == '-') {
|
|
|
|
--count;
|
|
|
|
sc.Forward();
|
|
|
|
}
|
|
|
|
if (count == 0 && sc.ch == ((sc.state == SCE_R_RAWSTRING) ? '\"' : '\'')) {
|
|
|
|
matchingDelimiter = 0;
|
|
|
|
dashCount = 0;
|
|
|
|
sc.ForwardSetState(SCE_R_DEFAULT);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SCE_R_INFIX:
|
2022-01-04 23:07:50 +00:00
|
|
|
if (sc.ch == '%') {
|
|
|
|
sc.ForwardSetState(SCE_R_DEFAULT);
|
|
|
|
} else if (sc.atLineEnd) {
|
|
|
|
sc.ChangeState(SCE_R_INFIXEOL);
|
|
|
|
}
|
2022-10-12 18:45:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case SCE_R_INFIXEOL:
|
|
|
|
if (sc.atLineStart) {
|
|
|
|
sc.SetState(SCE_R_DEFAULT);
|
2022-01-04 23:07:50 +00:00
|
|
|
}
|
2022-10-12 18:45:40 +00:00
|
|
|
break;
|
2022-01-04 23:07:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Determine if a new state should be entered.
|
|
|
|
if (sc.state == SCE_R_DEFAULT) {
|
|
|
|
if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
|
|
|
|
sc.SetState(SCE_R_NUMBER);
|
2022-10-12 18:45:40 +00:00
|
|
|
if (sc.ch == '0' && AnyOf(sc.chNext, 'x', 'X')) {
|
|
|
|
sc.Forward();
|
|
|
|
}
|
|
|
|
} else if (AnyOf(sc.ch, 'r', 'R') && AnyOf(sc.chNext, '\"', '\'')) {
|
|
|
|
const int chNext = sc.chNext;
|
|
|
|
matchingDelimiter = CheckRawString(styler, sc.currentPos + 2, dashCount);
|
|
|
|
if (matchingDelimiter) {
|
|
|
|
sc.SetState((chNext == '\"') ? SCE_R_RAWSTRING : SCE_R_RAWSTRING2);
|
|
|
|
sc.Forward(dashCount + 2);
|
|
|
|
} else {
|
|
|
|
// syntax error
|
|
|
|
sc.SetState(SCE_R_IDENTIFIER);
|
|
|
|
sc.ForwardSetState((chNext == '\"') ? SCE_R_STRING : SCE_R_STRING2);
|
|
|
|
}
|
2022-01-04 23:07:50 +00:00
|
|
|
} else if (IsAWordStart(sc.ch) ) {
|
|
|
|
sc.SetState(SCE_R_IDENTIFIER);
|
|
|
|
} else if (sc.Match('#')) {
|
2022-10-12 18:45:40 +00:00
|
|
|
sc.SetState(SCE_R_COMMENT);
|
2022-01-04 23:07:50 +00:00
|
|
|
} else if (sc.ch == '\"') {
|
|
|
|
sc.SetState(SCE_R_STRING);
|
|
|
|
} else if (sc.ch == '%') {
|
|
|
|
sc.SetState(SCE_R_INFIX);
|
|
|
|
} else if (sc.ch == '\'') {
|
|
|
|
sc.SetState(SCE_R_STRING2);
|
2022-10-12 18:45:40 +00:00
|
|
|
} else if (sc.ch == '`') {
|
|
|
|
sc.SetState(SCE_R_BACKTICKS);
|
2022-01-04 23:07:50 +00:00
|
|
|
} else if (IsAnOperator(sc.ch)) {
|
|
|
|
sc.SetState(SCE_R_OPERATOR);
|
|
|
|
}
|
|
|
|
}
|
2022-10-12 18:45:40 +00:00
|
|
|
|
|
|
|
if (sc.atLineEnd) {
|
|
|
|
const int lineState = matchingDelimiter | (dashCount << 8);
|
|
|
|
styler.SetLineState(sc.currentLine, lineState);
|
|
|
|
}
|
2022-01-04 23:07:50 +00:00
|
|
|
}
|
|
|
|
sc.Complete();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Store both the current line's fold level and the next lines in the
|
|
|
|
// level store to make it easy to pick up with each increment
|
|
|
|
// and to make it possible to fiddle the current level for "} else {".
|
2022-10-12 18:45:40 +00:00
|
|
|
void FoldRDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[],
|
2022-01-04 23:07:50 +00:00
|
|
|
Accessor &styler) {
|
2022-10-12 18:45:40 +00:00
|
|
|
const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
|
|
|
|
const bool foldAtElse = styler.GetPropertyInt("fold.at.else", 0) != 0;
|
|
|
|
const Sci_PositionU endPos = startPos + length;
|
2022-01-04 23:07:50 +00:00
|
|
|
int visibleChars = 0;
|
|
|
|
Sci_Position lineCurrent = styler.GetLine(startPos);
|
|
|
|
int levelCurrent = SC_FOLDLEVELBASE;
|
|
|
|
if (lineCurrent > 0)
|
|
|
|
levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
|
|
|
|
int levelMinCurrent = levelCurrent;
|
|
|
|
int levelNext = levelCurrent;
|
|
|
|
char chNext = styler[startPos];
|
|
|
|
int styleNext = styler.StyleAt(startPos);
|
|
|
|
for (Sci_PositionU i = startPos; i < endPos; i++) {
|
2022-10-12 18:45:40 +00:00
|
|
|
const char ch = chNext;
|
2022-01-04 23:07:50 +00:00
|
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
2022-10-12 18:45:40 +00:00
|
|
|
const int style = styleNext;
|
2022-01-04 23:07:50 +00:00
|
|
|
styleNext = styler.StyleAt(i + 1);
|
2022-10-12 18:45:40 +00:00
|
|
|
const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
|
2022-01-04 23:07:50 +00:00
|
|
|
if (style == SCE_R_OPERATOR) {
|
|
|
|
if (ch == '{') {
|
|
|
|
// Measure the minimum before a '{' to allow
|
|
|
|
// folding on "} else {"
|
|
|
|
if (levelMinCurrent > levelNext) {
|
|
|
|
levelMinCurrent = levelNext;
|
|
|
|
}
|
|
|
|
levelNext++;
|
|
|
|
} else if (ch == '}') {
|
|
|
|
levelNext--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (atEOL) {
|
|
|
|
int levelUse = levelCurrent;
|
|
|
|
if (foldAtElse) {
|
|
|
|
levelUse = levelMinCurrent;
|
|
|
|
}
|
|
|
|
int lev = levelUse | levelNext << 16;
|
|
|
|
if (visibleChars == 0 && foldCompact)
|
|
|
|
lev |= SC_FOLDLEVELWHITEFLAG;
|
|
|
|
if (levelUse < levelNext)
|
|
|
|
lev |= SC_FOLDLEVELHEADERFLAG;
|
|
|
|
if (lev != styler.LevelAt(lineCurrent)) {
|
|
|
|
styler.SetLevel(lineCurrent, lev);
|
|
|
|
}
|
|
|
|
lineCurrent++;
|
|
|
|
levelCurrent = levelNext;
|
|
|
|
levelMinCurrent = levelCurrent;
|
|
|
|
visibleChars = 0;
|
|
|
|
}
|
|
|
|
if (!isspacechar(ch))
|
|
|
|
visibleChars++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
const char * const RWordLists[] = {
|
|
|
|
"Language Keywords",
|
|
|
|
"Base / Default package function",
|
|
|
|
"Other Package Functions",
|
|
|
|
"Unused",
|
|
|
|
"Unused",
|
|
|
|
nullptr,
|
|
|
|
};
|
2022-01-04 23:07:50 +00:00
|
|
|
|
2022-10-12 18:45:40 +00:00
|
|
|
}
|
2022-01-04 23:07:50 +00:00
|
|
|
|
|
|
|
LexerModule lmR(SCLEX_R, ColouriseRDoc, "r", FoldRDoc, RWordLists);
|