1774 lines
64 KiB
C++
1774 lines
64 KiB
C++
// Scintilla source code edit control
|
|
/** @file LexRuby.cxx
|
|
** Lexer for Ruby.
|
|
**/
|
|
// Copyright 2001- by Clemens Wyss <wys@helbling.ch>
|
|
// The License.txt file describes the conditions under which this software may be distributed.
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
|
|
#include "ILexer.h"
|
|
#include "Scintilla.h"
|
|
#include "SciLexer.h"
|
|
|
|
#include "WordList.h"
|
|
#include "LexAccessor.h"
|
|
#include "Accessor.h"
|
|
#include "StyleContext.h"
|
|
#include "CharacterSet.h"
|
|
#include "LexerModule.h"
|
|
|
|
#ifdef SCI_NAMESPACE
|
|
using namespace Scintilla;
|
|
#endif
|
|
|
|
//XXX Identical to Perl, put in common area
|
|
static inline bool isEOLChar(char ch) {
|
|
return (ch == '\r') || (ch == '\n');
|
|
}
|
|
|
|
#define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
|
|
// This one's redundant, but makes for more readable code
|
|
#define isHighBitChar(ch) ((unsigned int)(ch) > 127)
|
|
|
|
static inline bool isSafeAlpha(char ch) {
|
|
return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
|
|
}
|
|
|
|
static inline bool isSafeAlnum(char ch) {
|
|
return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
|
|
}
|
|
|
|
static inline bool isSafeAlnumOrHigh(char ch) {
|
|
return isHighBitChar(ch) || isalnum(ch) || ch == '_';
|
|
}
|
|
|
|
static inline bool isSafeDigit(char ch) {
|
|
return isSafeASCII(ch) && isdigit(ch);
|
|
}
|
|
|
|
static inline bool isSafeWordcharOrHigh(char ch) {
|
|
// Error: scintilla's KeyWords.h includes '.' as a word-char
|
|
// we want to separate things that can take methods from the
|
|
// methods.
|
|
return isHighBitChar(ch) || isalnum(ch) || ch == '_';
|
|
}
|
|
|
|
static bool inline iswhitespace(char ch) {
|
|
return ch == ' ' || ch == '\t';
|
|
}
|
|
|
|
#define MAX_KEYWORD_LENGTH 200
|
|
|
|
#define STYLE_MASK 63
|
|
#define actual_style(style) (style & STYLE_MASK)
|
|
|
|
static bool followsDot(unsigned int pos, Accessor &styler) {
|
|
styler.Flush();
|
|
for (; pos >= 1; --pos) {
|
|
int style = actual_style(styler.StyleAt(pos));
|
|
char ch;
|
|
switch (style) {
|
|
case SCE_RB_DEFAULT:
|
|
ch = styler[pos];
|
|
if (ch == ' ' || ch == '\t') {
|
|
//continue
|
|
} else {
|
|
return false;
|
|
}
|
|
break;
|
|
|
|
case SCE_RB_OPERATOR:
|
|
return styler[pos] == '.';
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Forward declarations
|
|
static bool keywordIsAmbiguous(const char *prevWord);
|
|
static bool keywordDoStartsLoop(int pos,
|
|
Accessor &styler);
|
|
static bool keywordIsModifier(const char *word,
|
|
int pos,
|
|
Accessor &styler);
|
|
|
|
static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
|
|
char s[MAX_KEYWORD_LENGTH];
|
|
unsigned int i, j;
|
|
unsigned int lim = end - start + 1; // num chars to copy
|
|
if (lim >= MAX_KEYWORD_LENGTH) {
|
|
lim = MAX_KEYWORD_LENGTH - 1;
|
|
}
|
|
for (i = start, j = 0; j < lim; i++, j++) {
|
|
s[j] = styler[i];
|
|
}
|
|
s[j] = '\0';
|
|
int chAttr;
|
|
if (0 == strcmp(prevWord, "class"))
|
|
chAttr = SCE_RB_CLASSNAME;
|
|
else if (0 == strcmp(prevWord, "module"))
|
|
chAttr = SCE_RB_MODULE_NAME;
|
|
else if (0 == strcmp(prevWord, "def"))
|
|
chAttr = SCE_RB_DEFNAME;
|
|
else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
|
|
if (keywordIsAmbiguous(s)
|
|
&& keywordIsModifier(s, start, styler)) {
|
|
|
|
// Demoted keywords are colored as keywords,
|
|
// but do not affect changes in indentation.
|
|
//
|
|
// Consider the word 'if':
|
|
// 1. <<if test ...>> : normal
|
|
// 2. <<stmt if test>> : demoted
|
|
// 3. <<lhs = if ...>> : normal: start a new indent level
|
|
// 4. <<obj.if = 10>> : color as identifer, since it follows '.'
|
|
|
|
chAttr = SCE_RB_WORD_DEMOTED;
|
|
} else {
|
|
chAttr = SCE_RB_WORD;
|
|
}
|
|
} else
|
|
chAttr = SCE_RB_IDENTIFIER;
|
|
styler.ColourTo(end, chAttr);
|
|
if (chAttr == SCE_RB_WORD) {
|
|
strcpy(prevWord, s);
|
|
} else {
|
|
prevWord[0] = 0;
|
|
}
|
|
return chAttr;
|
|
}
|
|
|
|
|
|
//XXX Identical to Perl, put in common area
|
|
static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
|
|
if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
|
|
return false;
|
|
}
|
|
while (*val) {
|
|
if (*val != styler[pos++]) {
|
|
return false;
|
|
}
|
|
val++;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Do Ruby better -- find the end of the line, work back,
|
|
// and then check for leading white space
|
|
|
|
// Precondition: the here-doc target can be indented
|
|
static bool lookingAtHereDocDelim(Accessor &styler,
|
|
int pos,
|
|
int lengthDoc,
|
|
const char *HereDocDelim)
|
|
{
|
|
if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
|
|
return false;
|
|
}
|
|
while (--pos > 0) {
|
|
char ch = styler[pos];
|
|
if (isEOLChar(ch)) {
|
|
return true;
|
|
} else if (ch != ' ' && ch != '\t') {
|
|
return false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
//XXX Identical to Perl, put in common area
|
|
static char opposite(char ch) {
|
|
if (ch == '(')
|
|
return ')';
|
|
if (ch == '[')
|
|
return ']';
|
|
if (ch == '{')
|
|
return '}';
|
|
if (ch == '<')
|
|
return '>';
|
|
return ch;
|
|
}
|
|
|
|
// Null transitions when we see we've reached the end
|
|
// and need to relex the curr char.
|
|
|
|
static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
|
|
int &state) {
|
|
i--;
|
|
chNext2 = chNext;
|
|
chNext = ch;
|
|
state = SCE_RB_DEFAULT;
|
|
}
|
|
|
|
static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
|
|
i++;
|
|
ch = chNext;
|
|
chNext = chNext2;
|
|
}
|
|
|
|
// precondition: startPos points to one after the EOL char
|
|
static bool currLineContainsHereDelims(int& startPos,
|
|
Accessor &styler) {
|
|
if (startPos <= 1)
|
|
return false;
|
|
|
|
int pos;
|
|
for (pos = startPos - 1; pos > 0; pos--) {
|
|
char ch = styler.SafeGetCharAt(pos);
|
|
if (isEOLChar(ch)) {
|
|
// Leave the pointers where they are -- there are no
|
|
// here doc delims on the current line, even if
|
|
// the EOL isn't default style
|
|
|
|
return false;
|
|
} else {
|
|
styler.Flush();
|
|
if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (pos == 0) {
|
|
return false;
|
|
}
|
|
// Update the pointers so we don't have to re-analyze the string
|
|
startPos = pos;
|
|
return true;
|
|
}
|
|
|
|
// This class is used by the enter and exit methods, so it needs
|
|
// to be hoisted out of the function.
|
|
|
|
class QuoteCls {
|
|
public:
|
|
int Count;
|
|
char Up;
|
|
char Down;
|
|
QuoteCls() {
|
|
this->New();
|
|
}
|
|
void New() {
|
|
Count = 0;
|
|
Up = '\0';
|
|
Down = '\0';
|
|
}
|
|
void Open(char u) {
|
|
Count++;
|
|
Up = u;
|
|
Down = opposite(Up);
|
|
}
|
|
QuoteCls(const QuoteCls& q) {
|
|
// copy constructor -- use this for copying in
|
|
Count = q.Count;
|
|
Up = q.Up;
|
|
Down = q.Down;
|
|
}
|
|
QuoteCls& operator=(const QuoteCls& q) { // assignment constructor
|
|
if (this != &q) {
|
|
Count = q.Count;
|
|
Up = q.Up;
|
|
Down = q.Down;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
};
|
|
|
|
|
|
static void enterInnerExpression(int *p_inner_string_types,
|
|
int *p_inner_expn_brace_counts,
|
|
QuoteCls *p_inner_quotes,
|
|
int& inner_string_count,
|
|
int& state,
|
|
int& brace_counts,
|
|
QuoteCls curr_quote
|
|
) {
|
|
p_inner_string_types[inner_string_count] = state;
|
|
state = SCE_RB_DEFAULT;
|
|
p_inner_expn_brace_counts[inner_string_count] = brace_counts;
|
|
brace_counts = 0;
|
|
p_inner_quotes[inner_string_count] = curr_quote;
|
|
++inner_string_count;
|
|
}
|
|
|
|
static void exitInnerExpression(int *p_inner_string_types,
|
|
int *p_inner_expn_brace_counts,
|
|
QuoteCls *p_inner_quotes,
|
|
int& inner_string_count,
|
|
int& state,
|
|
int& brace_counts,
|
|
QuoteCls& curr_quote
|
|
) {
|
|
--inner_string_count;
|
|
state = p_inner_string_types[inner_string_count];
|
|
brace_counts = p_inner_expn_brace_counts[inner_string_count];
|
|
curr_quote = p_inner_quotes[inner_string_count];
|
|
}
|
|
|
|
static bool isEmptyLine(int pos,
|
|
Accessor &styler) {
|
|
int spaceFlags = 0;
|
|
int lineCurrent = styler.GetLine(pos);
|
|
int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
|
|
return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
|
|
}
|
|
|
|
static bool RE_CanFollowKeyword(const char *keyword) {
|
|
if (!strcmp(keyword, "and")
|
|
|| !strcmp(keyword, "begin")
|
|
|| !strcmp(keyword, "break")
|
|
|| !strcmp(keyword, "case")
|
|
|| !strcmp(keyword, "do")
|
|
|| !strcmp(keyword, "else")
|
|
|| !strcmp(keyword, "elsif")
|
|
|| !strcmp(keyword, "if")
|
|
|| !strcmp(keyword, "next")
|
|
|| !strcmp(keyword, "return")
|
|
|| !strcmp(keyword, "when")
|
|
|| !strcmp(keyword, "unless")
|
|
|| !strcmp(keyword, "until")
|
|
|| !strcmp(keyword, "not")
|
|
|| !strcmp(keyword, "or")) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Look at chars up to but not including endPos
|
|
// Don't look at styles in case we're looking forward
|
|
|
|
static int skipWhitespace(int startPos,
|
|
int endPos,
|
|
Accessor &styler) {
|
|
for (int i = startPos; i < endPos; i++) {
|
|
if (!iswhitespace(styler[i])) {
|
|
return i;
|
|
}
|
|
}
|
|
return endPos;
|
|
}
|
|
|
|
// This routine looks for false positives like
|
|
// undef foo, <<
|
|
// There aren't too many.
|
|
//
|
|
// iPrev points to the start of <<
|
|
|
|
static bool sureThisIsHeredoc(int iPrev,
|
|
Accessor &styler,
|
|
char *prevWord) {
|
|
|
|
// Not so fast, since Ruby's so dynamic. Check the context
|
|
// to make sure we're OK.
|
|
int prevStyle;
|
|
int lineStart = styler.GetLine(iPrev);
|
|
int lineStartPosn = styler.LineStart(lineStart);
|
|
styler.Flush();
|
|
|
|
// Find the first word after some whitespace
|
|
int firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
|
|
if (firstWordPosn >= iPrev) {
|
|
// Have something like {^ <<}
|
|
//XXX Look at the first previous non-comment non-white line
|
|
// to establish the context. Not too likely though.
|
|
return true;
|
|
} else {
|
|
switch (prevStyle = styler.StyleAt(firstWordPosn)) {
|
|
case SCE_RB_WORD:
|
|
case SCE_RB_WORD_DEMOTED:
|
|
case SCE_RB_IDENTIFIER:
|
|
break;
|
|
default:
|
|
return true;
|
|
}
|
|
}
|
|
int firstWordEndPosn = firstWordPosn;
|
|
char *dst = prevWord;
|
|
for (;;) {
|
|
if (firstWordEndPosn >= iPrev ||
|
|
styler.StyleAt(firstWordEndPosn) != prevStyle) {
|
|
*dst = 0;
|
|
break;
|
|
}
|
|
*dst++ = styler[firstWordEndPosn];
|
|
firstWordEndPosn += 1;
|
|
}
|
|
//XXX Write a style-aware thing to regex scintilla buffer objects
|
|
if (!strcmp(prevWord, "undef")
|
|
|| !strcmp(prevWord, "def")
|
|
|| !strcmp(prevWord, "alias")) {
|
|
// These keywords are what we were looking for
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Routine that saves us from allocating a buffer for the here-doc target
|
|
// targetEndPos points one past the end of the current target
|
|
static bool haveTargetMatch(int currPos,
|
|
int lengthDoc,
|
|
int targetStartPos,
|
|
int targetEndPos,
|
|
Accessor &styler) {
|
|
if (lengthDoc - currPos < targetEndPos - targetStartPos) {
|
|
return false;
|
|
}
|
|
int i, j;
|
|
for (i = targetStartPos, j = currPos;
|
|
i < targetEndPos && j < lengthDoc;
|
|
i++, j++) {
|
|
if (styler[i] != styler[j]) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// We need a check because the form
|
|
// [identifier] <<[target]
|
|
// is ambiguous. The Ruby lexer/parser resolves it by
|
|
// looking to see if [identifier] names a variable or a
|
|
// function. If it's the first, it's the start of a here-doc.
|
|
// If it's a var, it's an operator. This lexer doesn't
|
|
// maintain a symbol table, so it looks ahead to see what's
|
|
// going on, in cases where we have
|
|
// ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
|
|
//
|
|
// If there's no occurrence of [target] on a line, assume we don't.
|
|
|
|
// return true == yes, we have no heredocs
|
|
|
|
static bool sureThisIsNotHeredoc(int lt2StartPos,
|
|
Accessor &styler) {
|
|
int prevStyle;
|
|
// Use full document, not just part we're styling
|
|
int lengthDoc = styler.Length();
|
|
int lineStart = styler.GetLine(lt2StartPos);
|
|
int lineStartPosn = styler.LineStart(lineStart);
|
|
styler.Flush();
|
|
const bool definitely_not_a_here_doc = true;
|
|
const bool looks_like_a_here_doc = false;
|
|
|
|
// Find the first word after some whitespace
|
|
int firstWordPosn = skipWhitespace(lineStartPosn, lt2StartPos, styler);
|
|
if (firstWordPosn >= lt2StartPos) {
|
|
return definitely_not_a_here_doc;
|
|
}
|
|
prevStyle = styler.StyleAt(firstWordPosn);
|
|
// If we have '<<' following a keyword, it's not a heredoc
|
|
if (prevStyle != SCE_RB_IDENTIFIER) {
|
|
return definitely_not_a_here_doc;
|
|
}
|
|
int newStyle = prevStyle;
|
|
// Some compilers incorrectly warn about uninit newStyle
|
|
for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
|
|
// Inner loop looks at the name
|
|
for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
|
|
newStyle = styler.StyleAt(firstWordPosn);
|
|
if (newStyle != prevStyle) {
|
|
break;
|
|
}
|
|
}
|
|
// Do we have '::' or '.'?
|
|
if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
|
|
char ch = styler[firstWordPosn];
|
|
if (ch == '.') {
|
|
// yes
|
|
} else if (ch == ':') {
|
|
if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
|
|
return definitely_not_a_here_doc;
|
|
} else if (styler[firstWordPosn] != ':') {
|
|
return definitely_not_a_here_doc;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
// Skip next batch of white-space
|
|
firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
|
|
if (firstWordPosn != lt2StartPos) {
|
|
// Have [[^ws[identifier]ws[*something_else*]ws<<
|
|
return definitely_not_a_here_doc;
|
|
}
|
|
// OK, now 'j' will point to the current spot moving ahead
|
|
int j = firstWordPosn + 1;
|
|
if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
|
|
// This shouldn't happen
|
|
return definitely_not_a_here_doc;
|
|
}
|
|
int nextLineStartPosn = styler.LineStart(lineStart + 1);
|
|
if (nextLineStartPosn >= lengthDoc) {
|
|
return definitely_not_a_here_doc;
|
|
}
|
|
j = skipWhitespace(j + 1, nextLineStartPosn, styler);
|
|
if (j >= lengthDoc) {
|
|
return definitely_not_a_here_doc;
|
|
}
|
|
bool allow_indent;
|
|
int target_start, target_end;
|
|
// From this point on no more styling, since we're looking ahead
|
|
if (styler[j] == '-') {
|
|
allow_indent = true;
|
|
j++;
|
|
} else {
|
|
allow_indent = false;
|
|
}
|
|
|
|
// Allow for quoted targets.
|
|
char target_quote = 0;
|
|
switch (styler[j]) {
|
|
case '\'':
|
|
case '"':
|
|
case '`':
|
|
target_quote = styler[j];
|
|
j += 1;
|
|
}
|
|
|
|
if (isSafeAlnum(styler[j])) {
|
|
// Init target_end because some compilers think it won't
|
|
// be initialized by the time it's used
|
|
target_start = target_end = j;
|
|
j++;
|
|
} else {
|
|
return definitely_not_a_here_doc;
|
|
}
|
|
for (; j < lengthDoc; j++) {
|
|
if (!isSafeAlnum(styler[j])) {
|
|
if (target_quote && styler[j] != target_quote) {
|
|
// unquoted end
|
|
return definitely_not_a_here_doc;
|
|
}
|
|
|
|
// And for now make sure that it's a newline
|
|
// don't handle arbitrary expressions yet
|
|
|
|
target_end = j;
|
|
if (target_quote) {
|
|
// Now we can move to the character after the string delimiter.
|
|
j += 1;
|
|
}
|
|
j = skipWhitespace(j, lengthDoc, styler);
|
|
if (j >= lengthDoc) {
|
|
return definitely_not_a_here_doc;
|
|
} else {
|
|
char ch = styler[j];
|
|
if (ch == '#' || isEOLChar(ch)) {
|
|
// This is OK, so break and continue;
|
|
break;
|
|
} else {
|
|
return definitely_not_a_here_doc;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Just look at the start of each line
|
|
int last_line = styler.GetLine(lengthDoc - 1);
|
|
// But don't go too far
|
|
if (last_line > lineStart + 50) {
|
|
last_line = lineStart + 50;
|
|
}
|
|
for (int line_num = lineStart + 1; line_num <= last_line; line_num++) {
|
|
if (allow_indent) {
|
|
j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
|
|
} else {
|
|
j = styler.LineStart(line_num);
|
|
}
|
|
// target_end is one past the end
|
|
if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
|
|
// We got it
|
|
return looks_like_a_here_doc;
|
|
}
|
|
}
|
|
return definitely_not_a_here_doc;
|
|
}
|
|
|
|
//todo: if we aren't looking at a stdio character,
|
|
// move to the start of the first line that is not in a
|
|
// multi-line construct
|
|
|
|
static void synchronizeDocStart(unsigned int& startPos,
|
|
int &length,
|
|
int &initStyle,
|
|
Accessor &styler,
|
|
bool skipWhiteSpace=false) {
|
|
|
|
styler.Flush();
|
|
int style = actual_style(styler.StyleAt(startPos));
|
|
switch (style) {
|
|
case SCE_RB_STDIN:
|
|
case SCE_RB_STDOUT:
|
|
case SCE_RB_STDERR:
|
|
// Don't do anything else with these.
|
|
return;
|
|
}
|
|
|
|
int pos = startPos;
|
|
// Quick way to characterize each line
|
|
int lineStart;
|
|
for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
|
|
// Now look at the style before the previous line's EOL
|
|
pos = styler.LineStart(lineStart) - 1;
|
|
if (pos <= 10) {
|
|
lineStart = 0;
|
|
break;
|
|
}
|
|
char ch = styler.SafeGetCharAt(pos);
|
|
char chPrev = styler.SafeGetCharAt(pos - 1);
|
|
if (ch == '\n' && chPrev == '\r') {
|
|
pos--;
|
|
}
|
|
if (styler.SafeGetCharAt(pos - 1) == '\\') {
|
|
// Continuation line -- keep going
|
|
} else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
|
|
// Part of multi-line construct -- keep going
|
|
} else if (currLineContainsHereDelims(pos, styler)) {
|
|
// Keep going, with pos and length now pointing
|
|
// at the end of the here-doc delimiter
|
|
} else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
|
|
// Keep going
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
pos = styler.LineStart(lineStart);
|
|
length += (startPos - pos);
|
|
startPos = pos;
|
|
initStyle = SCE_RB_DEFAULT;
|
|
}
|
|
|
|
static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
|
|
WordList *keywordlists[], Accessor &styler) {
|
|
|
|
// Lexer for Ruby often has to backtrack to start of current style to determine
|
|
// which characters are being used as quotes, how deeply nested is the
|
|
// start position and what the termination string is for here documents
|
|
|
|
WordList &keywords = *keywordlists[0];
|
|
|
|
class HereDocCls {
|
|
public:
|
|
int State;
|
|
// States
|
|
// 0: '<<' encountered
|
|
// 1: collect the delimiter
|
|
// 1b: text between the end of the delimiter and the EOL
|
|
// 2: here doc text (lines after the delimiter)
|
|
char Quote; // the char after '<<'
|
|
bool Quoted; // true if Quote in ('\'','"','`')
|
|
int DelimiterLength; // strlen(Delimiter)
|
|
char Delimiter[256]; // the Delimiter, limit of 256: from Perl
|
|
bool CanBeIndented;
|
|
HereDocCls() {
|
|
State = 0;
|
|
DelimiterLength = 0;
|
|
Delimiter[0] = '\0';
|
|
CanBeIndented = false;
|
|
}
|
|
};
|
|
HereDocCls HereDoc;
|
|
|
|
QuoteCls Quote;
|
|
|
|
int numDots = 0; // For numbers --
|
|
// Don't start lexing in the middle of a num
|
|
|
|
synchronizeDocStart(startPos, length, initStyle, styler, // ref args
|
|
false);
|
|
|
|
bool preferRE = true;
|
|
int state = initStyle;
|
|
int lengthDoc = startPos + length;
|
|
|
|
char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
|
|
prevWord[0] = '\0';
|
|
if (length == 0)
|
|
return;
|
|
|
|
char chPrev = styler.SafeGetCharAt(startPos - 1);
|
|
char chNext = styler.SafeGetCharAt(startPos);
|
|
bool is_real_number = true; // Differentiate between constants and ?-sequences.
|
|
// Ruby uses a different mask because bad indentation is marked by oring with 32
|
|
styler.StartAt(startPos, 127);
|
|
styler.StartSegment(startPos);
|
|
|
|
static int q_states[] = {SCE_RB_STRING_Q,
|
|
SCE_RB_STRING_QQ,
|
|
SCE_RB_STRING_QR,
|
|
SCE_RB_STRING_QW,
|
|
SCE_RB_STRING_QW,
|
|
SCE_RB_STRING_QX};
|
|
static const char* q_chars = "qQrwWx";
|
|
|
|
// In most cases a value of 2 should be ample for the code in the
|
|
// Ruby library, and the code the user is likely to enter.
|
|
// For example,
|
|
// fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
|
|
// if options[:verbose]
|
|
// from fileutils.rb nests to a level of 2
|
|
// If the user actually hits a 6th occurrence of '#{' in a double-quoted
|
|
// string (including regex'es, %Q, %<sym>, %w, and other strings
|
|
// that interpolate), it will stay as a string. The problem with this
|
|
// is that quotes might flip, a 7th '#{' will look like a comment,
|
|
// and code-folding might be wrong.
|
|
|
|
// If anyone runs into this problem, I recommend raising this
|
|
// value slightly higher to replacing the fixed array with a linked
|
|
// list. Keep in mind this code will be called everytime the lexer
|
|
// is invoked.
|
|
|
|
#define INNER_STRINGS_MAX_COUNT 5
|
|
// These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
|
|
int inner_string_types[INNER_STRINGS_MAX_COUNT];
|
|
// Track # braces when we push a new #{ thing
|
|
int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT];
|
|
QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT];
|
|
int inner_string_count = 0;
|
|
int brace_counts = 0; // Number of #{ ... } things within an expression
|
|
|
|
int i;
|
|
for (i = 0; i < INNER_STRINGS_MAX_COUNT; i++) {
|
|
inner_string_types[i] = 0;
|
|
inner_expn_brace_counts[i] = 0;
|
|
}
|
|
for (i = startPos; i < lengthDoc; i++) {
|
|
char ch = chNext;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
char chNext2 = styler.SafeGetCharAt(i + 2);
|
|
|
|
if (styler.IsLeadByte(ch)) {
|
|
chNext = chNext2;
|
|
chPrev = ' ';
|
|
i += 1;
|
|
continue;
|
|
}
|
|
|
|
// skip on DOS/Windows
|
|
//No, don't, because some things will get tagged on,
|
|
// so we won't recognize keywords, for example
|
|
#if 0
|
|
if (ch == '\r' && chNext == '\n') {
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
if (HereDoc.State == 1 && isEOLChar(ch)) {
|
|
// Begin of here-doc (the line after the here-doc delimiter):
|
|
HereDoc.State = 2;
|
|
styler.ColourTo(i-1, state);
|
|
// Don't check for a missing quote, just jump into
|
|
// the here-doc state
|
|
state = SCE_RB_HERE_Q;
|
|
}
|
|
|
|
// Regular transitions
|
|
if (state == SCE_RB_DEFAULT) {
|
|
if (isSafeDigit(ch)) {
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_NUMBER;
|
|
is_real_number = true;
|
|
numDots = 0;
|
|
} else if (isHighBitChar(ch) || iswordstart(ch)) {
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_WORD;
|
|
} else if (ch == '#') {
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_COMMENTLINE;
|
|
} else if (ch == '=') {
|
|
// =begin indicates the start of a comment (doc) block
|
|
if (i == 0 || (isEOLChar(chPrev)
|
|
&& chNext == 'b'
|
|
&& styler.SafeGetCharAt(i + 2) == 'e'
|
|
&& styler.SafeGetCharAt(i + 3) == 'g'
|
|
&& styler.SafeGetCharAt(i + 4) == 'i'
|
|
&& styler.SafeGetCharAt(i + 5) == 'n'
|
|
&& !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6)))) {
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_POD;
|
|
} else {
|
|
styler.ColourTo(i - 1, state);
|
|
styler.ColourTo(i, SCE_RB_OPERATOR);
|
|
preferRE = true;
|
|
}
|
|
} else if (ch == '"') {
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_STRING;
|
|
Quote.New();
|
|
Quote.Open(ch);
|
|
} else if (ch == '\'') {
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_CHARACTER;
|
|
Quote.New();
|
|
Quote.Open(ch);
|
|
} else if (ch == '`') {
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_BACKTICKS;
|
|
Quote.New();
|
|
Quote.Open(ch);
|
|
} else if (ch == '@') {
|
|
// Instance or class var
|
|
styler.ColourTo(i - 1, state);
|
|
if (chNext == '@') {
|
|
state = SCE_RB_CLASS_VAR;
|
|
advance_char(i, ch, chNext, chNext2); // pass by ref
|
|
} else {
|
|
state = SCE_RB_INSTANCE_VAR;
|
|
}
|
|
} else if (ch == '$') {
|
|
// Check for a builtin global
|
|
styler.ColourTo(i - 1, state);
|
|
// Recognize it bit by bit
|
|
state = SCE_RB_GLOBAL;
|
|
} else if (ch == '/' && preferRE) {
|
|
// Ambigous operator
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_REGEX;
|
|
Quote.New();
|
|
Quote.Open(ch);
|
|
} else if (ch == '<' && chNext == '<' && chNext2 != '=') {
|
|
|
|
// Recognise the '<<' symbol - either a here document or a binary op
|
|
styler.ColourTo(i - 1, state);
|
|
i++;
|
|
chNext = chNext2;
|
|
styler.ColourTo(i, SCE_RB_OPERATOR);
|
|
|
|
if (! (strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) {
|
|
// It's definitely not a here-doc,
|
|
// based on Ruby's lexer/parser in the
|
|
// heredoc_identifier routine.
|
|
// Nothing else to do.
|
|
} else if (preferRE) {
|
|
if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
|
|
state = SCE_RB_HERE_DELIM;
|
|
HereDoc.State = 0;
|
|
}
|
|
// else leave it in default state
|
|
} else {
|
|
if (sureThisIsNotHeredoc(i - 1, styler)) {
|
|
// leave state as default
|
|
// We don't have all the heuristics Perl has for indications
|
|
// of a here-doc, because '<<' is overloadable and used
|
|
// for so many other classes.
|
|
} else {
|
|
state = SCE_RB_HERE_DELIM;
|
|
HereDoc.State = 0;
|
|
}
|
|
}
|
|
preferRE = (state != SCE_RB_HERE_DELIM);
|
|
} else if (ch == ':') {
|
|
styler.ColourTo(i - 1, state);
|
|
if (chNext == ':') {
|
|
// Mark "::" as an operator, not symbol start
|
|
styler.ColourTo(i + 1, SCE_RB_OPERATOR);
|
|
advance_char(i, ch, chNext, chNext2); // pass by ref
|
|
state = SCE_RB_DEFAULT;
|
|
preferRE = false;
|
|
} else if (isSafeWordcharOrHigh(chNext)) {
|
|
state = SCE_RB_SYMBOL;
|
|
} else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
|
|
// Do the operator analysis in-line, looking ahead
|
|
// Based on the table in pickaxe 2nd ed., page 339
|
|
bool doColoring = true;
|
|
switch (chNext) {
|
|
case '[':
|
|
if (chNext2 == ']' ) {
|
|
char ch_tmp = styler.SafeGetCharAt(i + 3);
|
|
if (ch_tmp == '=') {
|
|
i += 3;
|
|
ch = ch_tmp;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
} else {
|
|
i += 2;
|
|
ch = chNext2;
|
|
chNext = ch_tmp;
|
|
}
|
|
} else {
|
|
doColoring = false;
|
|
}
|
|
break;
|
|
|
|
case '*':
|
|
if (chNext2 == '*') {
|
|
i += 2;
|
|
ch = chNext2;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
} else {
|
|
advance_char(i, ch, chNext, chNext2);
|
|
}
|
|
break;
|
|
|
|
case '!':
|
|
if (chNext2 == '=' || chNext2 == '~') {
|
|
i += 2;
|
|
ch = chNext2;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
} else {
|
|
advance_char(i, ch, chNext, chNext2);
|
|
}
|
|
break;
|
|
|
|
case '<':
|
|
if (chNext2 == '<') {
|
|
i += 2;
|
|
ch = chNext2;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
} else if (chNext2 == '=') {
|
|
char ch_tmp = styler.SafeGetCharAt(i + 3);
|
|
if (ch_tmp == '>') { // <=> operator
|
|
i += 3;
|
|
ch = ch_tmp;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
} else {
|
|
i += 2;
|
|
ch = chNext2;
|
|
chNext = ch_tmp;
|
|
}
|
|
} else {
|
|
advance_char(i, ch, chNext, chNext2);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
// Simple one-character operators
|
|
advance_char(i, ch, chNext, chNext2);
|
|
break;
|
|
}
|
|
if (doColoring) {
|
|
styler.ColourTo(i, SCE_RB_SYMBOL);
|
|
state = SCE_RB_DEFAULT;
|
|
}
|
|
} else if (!preferRE) {
|
|
// Don't color symbol strings (yet)
|
|
// Just color the ":" and color rest as string
|
|
styler.ColourTo(i, SCE_RB_SYMBOL);
|
|
state = SCE_RB_DEFAULT;
|
|
} else {
|
|
styler.ColourTo(i, SCE_RB_OPERATOR);
|
|
state = SCE_RB_DEFAULT;
|
|
preferRE = true;
|
|
}
|
|
} else if (ch == '%') {
|
|
styler.ColourTo(i - 1, state);
|
|
bool have_string = false;
|
|
if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
|
|
Quote.New();
|
|
const char *hit = strchr(q_chars, chNext);
|
|
if (hit != NULL) {
|
|
state = q_states[hit - q_chars];
|
|
Quote.Open(chNext2);
|
|
i += 2;
|
|
ch = chNext2;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
have_string = true;
|
|
}
|
|
} else if (preferRE && !isSafeWordcharOrHigh(chNext)) {
|
|
// Ruby doesn't allow high bit chars here,
|
|
// but the editor host might
|
|
Quote.New();
|
|
state = SCE_RB_STRING_QQ;
|
|
Quote.Open(chNext);
|
|
advance_char(i, ch, chNext, chNext2); // pass by ref
|
|
have_string = true;
|
|
} else if (!isSafeWordcharOrHigh(chNext) && !iswhitespace(chNext) && !isEOLChar(chNext)) {
|
|
// Ruby doesn't allow high bit chars here,
|
|
// but the editor host might
|
|
Quote.New();
|
|
state = SCE_RB_STRING_QQ;
|
|
Quote.Open(chNext);
|
|
advance_char(i, ch, chNext, chNext2); // pass by ref
|
|
have_string = true;
|
|
}
|
|
if (!have_string) {
|
|
styler.ColourTo(i, SCE_RB_OPERATOR);
|
|
// stay in default
|
|
preferRE = true;
|
|
}
|
|
} else if (ch == '?') {
|
|
styler.ColourTo(i - 1, state);
|
|
if (iswhitespace(chNext) || chNext == '\n' || chNext == '\r') {
|
|
styler.ColourTo(i, SCE_RB_OPERATOR);
|
|
} else {
|
|
// It's the start of a character code escape sequence
|
|
// Color it as a number.
|
|
state = SCE_RB_NUMBER;
|
|
is_real_number = false;
|
|
}
|
|
} else if (isoperator(ch) || ch == '.') {
|
|
styler.ColourTo(i - 1, state);
|
|
styler.ColourTo(i, SCE_RB_OPERATOR);
|
|
// If we're ending an expression or block,
|
|
// assume it ends an object, and the ambivalent
|
|
// constructs are binary operators
|
|
//
|
|
// So if we don't have one of these chars,
|
|
// we aren't ending an object exp'n, and ops
|
|
// like : << / are unary operators.
|
|
|
|
if (ch == '{') {
|
|
++brace_counts;
|
|
preferRE = true;
|
|
} else if (ch == '}' && --brace_counts < 0
|
|
&& inner_string_count > 0) {
|
|
styler.ColourTo(i, SCE_RB_OPERATOR);
|
|
exitInnerExpression(inner_string_types,
|
|
inner_expn_brace_counts,
|
|
inner_quotes,
|
|
inner_string_count,
|
|
state, brace_counts, Quote);
|
|
} else {
|
|
preferRE = (strchr(")}].", ch) == NULL);
|
|
}
|
|
// Stay in default state
|
|
} else if (isEOLChar(ch)) {
|
|
// Make sure it's a true line-end, with no backslash
|
|
if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
|
|
&& chPrev != '\\') {
|
|
// Assume we've hit the end of the statement.
|
|
preferRE = true;
|
|
}
|
|
}
|
|
} else if (state == SCE_RB_WORD) {
|
|
if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
|
|
// Words include x? in all contexts,
|
|
// and <letters>= after either 'def' or a dot
|
|
// Move along until a complete word is on our left
|
|
|
|
// Default accessor treats '.' as word-chars,
|
|
// but we don't for now.
|
|
|
|
if (ch == '='
|
|
&& isSafeWordcharOrHigh(chPrev)
|
|
&& (chNext == '('
|
|
|| strchr(" \t\n\r", chNext) != NULL)
|
|
&& (!strcmp(prevWord, "def")
|
|
|| followsDot(styler.GetStartSegment(), styler))) {
|
|
// <name>= is a name only when being def'd -- Get it the next time
|
|
// This means that <name>=<name> is always lexed as
|
|
// <name>, (op, =), <name>
|
|
} else if ((ch == '?' || ch == '!')
|
|
&& isSafeWordcharOrHigh(chPrev)
|
|
&& !isSafeWordcharOrHigh(chNext)) {
|
|
// <name>? is a name -- Get it the next time
|
|
// But <name>?<name> is always lexed as
|
|
// <name>, (op, ?), <name>
|
|
// Same with <name>! to indicate a method that
|
|
// modifies its target
|
|
} else if (isEOLChar(ch)
|
|
&& isMatch(styler, lengthDoc, i - 7, "__END__")) {
|
|
styler.ColourTo(i, SCE_RB_DATASECTION);
|
|
state = SCE_RB_DATASECTION;
|
|
// No need to handle this state -- we'll just move to the end
|
|
preferRE = false;
|
|
} else {
|
|
int wordStartPos = styler.GetStartSegment();
|
|
int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
|
|
switch (word_style) {
|
|
case SCE_RB_WORD:
|
|
preferRE = RE_CanFollowKeyword(prevWord);
|
|
break;
|
|
|
|
case SCE_RB_WORD_DEMOTED:
|
|
preferRE = true;
|
|
break;
|
|
|
|
case SCE_RB_IDENTIFIER:
|
|
if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
|
|
preferRE = true;
|
|
} else if (isEOLChar(ch)) {
|
|
preferRE = true;
|
|
} else {
|
|
preferRE = false;
|
|
}
|
|
break;
|
|
default:
|
|
preferRE = false;
|
|
}
|
|
if (ch == '.') {
|
|
// We might be redefining an operator-method
|
|
preferRE = false;
|
|
}
|
|
// And if it's the first
|
|
redo_char(i, ch, chNext, chNext2, state); // pass by ref
|
|
}
|
|
}
|
|
} else if (state == SCE_RB_NUMBER) {
|
|
if (!is_real_number) {
|
|
if (ch != '\\') {
|
|
styler.ColourTo(i, state);
|
|
state = SCE_RB_DEFAULT;
|
|
preferRE = false;
|
|
} else if (strchr("\\ntrfvaebs", chNext)) {
|
|
// Terminal escape sequence -- handle it next time
|
|
// Nothing more to do this time through the loop
|
|
} else if (chNext == 'C' || chNext == 'M') {
|
|
if (chNext2 != '-') {
|
|
// \C or \M ends the sequence -- handle it next time
|
|
} else {
|
|
// Move from abc?\C-x
|
|
// ^
|
|
// to
|
|
// ^
|
|
i += 2;
|
|
ch = chNext2;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
}
|
|
} else if (chNext == 'c') {
|
|
// Stay here, \c is a combining sequence
|
|
advance_char(i, ch, chNext, chNext2); // pass by ref
|
|
} else {
|
|
// ?\x, including ?\\ is final.
|
|
styler.ColourTo(i + 1, state);
|
|
state = SCE_RB_DEFAULT;
|
|
preferRE = false;
|
|
advance_char(i, ch, chNext, chNext2);
|
|
}
|
|
} else if (isSafeAlnumOrHigh(ch) || ch == '_') {
|
|
// Keep going
|
|
} else if (ch == '.' && chNext == '.') {
|
|
++numDots;
|
|
styler.ColourTo(i - 1, state);
|
|
redo_char(i, ch, chNext, chNext2, state); // pass by ref
|
|
} else if (ch == '.' && ++numDots == 1) {
|
|
// Keep going
|
|
} else {
|
|
styler.ColourTo(i - 1, state);
|
|
redo_char(i, ch, chNext, chNext2, state); // pass by ref
|
|
preferRE = false;
|
|
}
|
|
} else if (state == SCE_RB_COMMENTLINE) {
|
|
if (isEOLChar(ch)) {
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_DEFAULT;
|
|
// Use whatever setting we had going into the comment
|
|
}
|
|
} else if (state == SCE_RB_HERE_DELIM) {
|
|
// See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
|
|
// Slightly different: if we find an immediate '-',
|
|
// the target can appear indented.
|
|
|
|
if (HereDoc.State == 0) { // '<<' encountered
|
|
HereDoc.State = 1;
|
|
HereDoc.DelimiterLength = 0;
|
|
if (ch == '-') {
|
|
HereDoc.CanBeIndented = true;
|
|
advance_char(i, ch, chNext, chNext2); // pass by ref
|
|
} else {
|
|
HereDoc.CanBeIndented = false;
|
|
}
|
|
if (isEOLChar(ch)) {
|
|
// Bail out of doing a here doc if there's no target
|
|
state = SCE_RB_DEFAULT;
|
|
preferRE = false;
|
|
} else {
|
|
HereDoc.Quote = ch;
|
|
|
|
if (ch == '\'' || ch == '"' || ch == '`') {
|
|
HereDoc.Quoted = true;
|
|
HereDoc.Delimiter[0] = '\0';
|
|
} else {
|
|
HereDoc.Quoted = false;
|
|
HereDoc.Delimiter[0] = ch;
|
|
HereDoc.Delimiter[1] = '\0';
|
|
HereDoc.DelimiterLength = 1;
|
|
}
|
|
}
|
|
} else if (HereDoc.State == 1) { // collect the delimiter
|
|
if (isEOLChar(ch)) {
|
|
// End the quote now, and go back for more
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_DEFAULT;
|
|
i--;
|
|
chNext = ch;
|
|
chNext2 = chNext;
|
|
preferRE = false;
|
|
} else if (HereDoc.Quoted) {
|
|
if (ch == HereDoc.Quote) { // closing quote => end of delimiter
|
|
styler.ColourTo(i, state);
|
|
state = SCE_RB_DEFAULT;
|
|
preferRE = false;
|
|
} else {
|
|
if (ch == '\\' && !isEOLChar(chNext)) {
|
|
advance_char(i, ch, chNext, chNext2);
|
|
}
|
|
HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
|
|
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
|
|
}
|
|
} else { // an unquoted here-doc delimiter
|
|
if (isSafeAlnumOrHigh(ch) || ch == '_') {
|
|
HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
|
|
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
|
|
} else {
|
|
styler.ColourTo(i - 1, state);
|
|
redo_char(i, ch, chNext, chNext2, state);
|
|
preferRE = false;
|
|
}
|
|
}
|
|
if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_ERROR;
|
|
preferRE = false;
|
|
}
|
|
}
|
|
} else if (state == SCE_RB_HERE_Q) {
|
|
// Not needed: HereDoc.State == 2
|
|
// Indentable here docs: look backwards
|
|
// Non-indentable: look forwards, like in Perl
|
|
//
|
|
// Why: so we can quickly resolve things like <<-" abc"
|
|
|
|
if (!HereDoc.CanBeIndented) {
|
|
if (isEOLChar(chPrev)
|
|
&& isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
|
|
styler.ColourTo(i - 1, state);
|
|
i += HereDoc.DelimiterLength - 1;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
if (isEOLChar(chNext)) {
|
|
styler.ColourTo(i, SCE_RB_HERE_DELIM);
|
|
state = SCE_RB_DEFAULT;
|
|
HereDoc.State = 0;
|
|
preferRE = false;
|
|
}
|
|
// Otherwise we skipped through the here doc faster.
|
|
}
|
|
} else if (isEOLChar(chNext)
|
|
&& lookingAtHereDocDelim(styler,
|
|
i - HereDoc.DelimiterLength + 1,
|
|
lengthDoc,
|
|
HereDoc.Delimiter)) {
|
|
styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
|
|
styler.ColourTo(i, SCE_RB_HERE_DELIM);
|
|
state = SCE_RB_DEFAULT;
|
|
preferRE = false;
|
|
HereDoc.State = 0;
|
|
}
|
|
} else if (state == SCE_RB_CLASS_VAR
|
|
|| state == SCE_RB_INSTANCE_VAR
|
|
|| state == SCE_RB_SYMBOL) {
|
|
if (!isSafeWordcharOrHigh(ch)) {
|
|
styler.ColourTo(i - 1, state);
|
|
redo_char(i, ch, chNext, chNext2, state); // pass by ref
|
|
preferRE = false;
|
|
}
|
|
} else if (state == SCE_RB_GLOBAL) {
|
|
if (!isSafeWordcharOrHigh(ch)) {
|
|
// handle special globals here as well
|
|
if (chPrev == '$') {
|
|
if (ch == '-') {
|
|
// Include the next char, like $-a
|
|
advance_char(i, ch, chNext, chNext2);
|
|
}
|
|
styler.ColourTo(i, state);
|
|
state = SCE_RB_DEFAULT;
|
|
} else {
|
|
styler.ColourTo(i - 1, state);
|
|
redo_char(i, ch, chNext, chNext2, state); // pass by ref
|
|
}
|
|
preferRE = false;
|
|
}
|
|
} else if (state == SCE_RB_POD) {
|
|
// PODs end with ^=end\s, -- any whitespace can follow =end
|
|
if (strchr(" \t\n\r", ch) != NULL
|
|
&& i > 5
|
|
&& isEOLChar(styler[i - 5])
|
|
&& isMatch(styler, lengthDoc, i - 4, "=end")) {
|
|
styler.ColourTo(i - 1, state);
|
|
state = SCE_RB_DEFAULT;
|
|
preferRE = false;
|
|
}
|
|
} else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
|
|
if (ch == '\\' && Quote.Up != '\\') {
|
|
// Skip one
|
|
advance_char(i, ch, chNext, chNext2);
|
|
} else if (ch == Quote.Down) {
|
|
Quote.Count--;
|
|
if (Quote.Count == 0) {
|
|
// Include the options
|
|
while (isSafeAlpha(chNext)) {
|
|
i++;
|
|
ch = chNext;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
}
|
|
styler.ColourTo(i, state);
|
|
state = SCE_RB_DEFAULT;
|
|
preferRE = false;
|
|
}
|
|
} else if (ch == Quote.Up) {
|
|
// Only if close quoter != open quoter
|
|
Quote.Count++;
|
|
|
|
} else if (ch == '#' ) {
|
|
if (chNext == '{'
|
|
&& inner_string_count < INNER_STRINGS_MAX_COUNT) {
|
|
// process #{ ... }
|
|
styler.ColourTo(i - 1, state);
|
|
styler.ColourTo(i + 1, SCE_RB_OPERATOR);
|
|
enterInnerExpression(inner_string_types,
|
|
inner_expn_brace_counts,
|
|
inner_quotes,
|
|
inner_string_count,
|
|
state,
|
|
brace_counts,
|
|
Quote);
|
|
preferRE = true;
|
|
// Skip one
|
|
advance_char(i, ch, chNext, chNext2);
|
|
} else {
|
|
//todo: distinguish comments from pound chars
|
|
// for now, handle as comment
|
|
styler.ColourTo(i - 1, state);
|
|
bool inEscape = false;
|
|
while (++i < lengthDoc) {
|
|
ch = styler.SafeGetCharAt(i);
|
|
if (ch == '\\') {
|
|
inEscape = true;
|
|
} else if (isEOLChar(ch)) {
|
|
// Comment inside a regex
|
|
styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
|
|
break;
|
|
} else if (inEscape) {
|
|
inEscape = false; // don't look at char
|
|
} else if (ch == Quote.Down) {
|
|
// Have the regular handler deal with this
|
|
// to get trailing modifiers.
|
|
i--;
|
|
ch = styler[i];
|
|
break;
|
|
}
|
|
}
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
chNext2 = styler.SafeGetCharAt(i + 2);
|
|
}
|
|
}
|
|
// Quotes of all kinds...
|
|
} else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
|
|
state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
|
|
state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
|
|
state == SCE_RB_BACKTICKS) {
|
|
if (!Quote.Down && !isspacechar(ch)) {
|
|
Quote.Open(ch);
|
|
} else if (ch == '\\' && Quote.Up != '\\') {
|
|
//Riddle me this: Is it safe to skip *every* escaped char?
|
|
advance_char(i, ch, chNext, chNext2);
|
|
} else if (ch == Quote.Down) {
|
|
Quote.Count--;
|
|
if (Quote.Count == 0) {
|
|
styler.ColourTo(i, state);
|
|
state = SCE_RB_DEFAULT;
|
|
preferRE = false;
|
|
}
|
|
} else if (ch == Quote.Up) {
|
|
Quote.Count++;
|
|
} else if (ch == '#' && chNext == '{'
|
|
&& inner_string_count < INNER_STRINGS_MAX_COUNT
|
|
&& state != SCE_RB_CHARACTER
|
|
&& state != SCE_RB_STRING_Q) {
|
|
// process #{ ... }
|
|
styler.ColourTo(i - 1, state);
|
|
styler.ColourTo(i + 1, SCE_RB_OPERATOR);
|
|
enterInnerExpression(inner_string_types,
|
|
inner_expn_brace_counts,
|
|
inner_quotes,
|
|
inner_string_count,
|
|
state,
|
|
brace_counts,
|
|
Quote);
|
|
preferRE = true;
|
|
// Skip one
|
|
advance_char(i, ch, chNext, chNext2);
|
|
}
|
|
}
|
|
|
|
if (state == SCE_RB_ERROR) {
|
|
break;
|
|
}
|
|
chPrev = ch;
|
|
}
|
|
if (state == SCE_RB_WORD) {
|
|
// We've ended on a word, possibly at EOF, and need to
|
|
// classify it.
|
|
(void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
|
|
} else {
|
|
styler.ColourTo(lengthDoc - 1, state);
|
|
}
|
|
}
|
|
|
|
// Helper functions for folding, disambiguation keywords
|
|
// Assert that there are no high-bit chars
|
|
|
|
static void getPrevWord(int pos,
|
|
char *prevWord,
|
|
Accessor &styler,
|
|
int word_state)
|
|
{
|
|
int i;
|
|
styler.Flush();
|
|
for (i = pos - 1; i > 0; i--) {
|
|
if (actual_style(styler.StyleAt(i)) != word_state) {
|
|
i++;
|
|
break;
|
|
}
|
|
}
|
|
if (i < pos - MAX_KEYWORD_LENGTH) // overflow
|
|
i = pos - MAX_KEYWORD_LENGTH;
|
|
char *dst = prevWord;
|
|
for (; i <= pos; i++) {
|
|
*dst++ = styler[i];
|
|
}
|
|
*dst = 0;
|
|
}
|
|
|
|
static bool keywordIsAmbiguous(const char *prevWord)
|
|
{
|
|
// Order from most likely used to least likely
|
|
// Lots of ways to do a loop in Ruby besides 'while/until'
|
|
if (!strcmp(prevWord, "if")
|
|
|| !strcmp(prevWord, "do")
|
|
|| !strcmp(prevWord, "while")
|
|
|| !strcmp(prevWord, "unless")
|
|
|| !strcmp(prevWord, "until")) {
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Demote keywords in the following conditions:
|
|
// if, while, unless, until modify a statement
|
|
// do after a while or until, as a noise word (like then after if)
|
|
|
|
static bool keywordIsModifier(const char *word,
|
|
int pos,
|
|
Accessor &styler)
|
|
{
|
|
if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
|
|
return keywordDoStartsLoop(pos, styler);
|
|
}
|
|
char ch, chPrev, chPrev2;
|
|
int style = SCE_RB_DEFAULT;
|
|
int lineStart = styler.GetLine(pos);
|
|
int lineStartPosn = styler.LineStart(lineStart);
|
|
// We want to step backwards until we don't care about the current
|
|
// position. But first move lineStartPosn back behind any
|
|
// continuations immediately above word.
|
|
while (lineStartPosn > 0) {
|
|
ch = styler[lineStartPosn-1];
|
|
if (ch == '\n' || ch == '\r') {
|
|
chPrev = styler.SafeGetCharAt(lineStartPosn-2);
|
|
chPrev2 = styler.SafeGetCharAt(lineStartPosn-3);
|
|
lineStart = styler.GetLine(lineStartPosn-1);
|
|
// If we find a continuation line, include it in our analysis.
|
|
if (chPrev == '\\') {
|
|
lineStartPosn = styler.LineStart(lineStart);
|
|
} else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
|
|
lineStartPosn = styler.LineStart(lineStart);
|
|
} else {
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
styler.Flush();
|
|
while (--pos >= lineStartPosn) {
|
|
style = actual_style(styler.StyleAt(pos));
|
|
if (style == SCE_RB_DEFAULT) {
|
|
if (iswhitespace(ch = styler[pos])) {
|
|
//continue
|
|
} else if (ch == '\r' || ch == '\n') {
|
|
// Scintilla's LineStart() and GetLine() routines aren't
|
|
// platform-independent, so if we have text prepared with
|
|
// a different system we can't rely on it.
|
|
|
|
// Also, lineStartPosn may have been moved to more than one
|
|
// line above word's line while pushing past continuations.
|
|
chPrev = styler.SafeGetCharAt(pos - 1);
|
|
chPrev2 = styler.SafeGetCharAt(pos - 2);
|
|
if (chPrev == '\\') {
|
|
pos-=1; // gloss over the "\\"
|
|
//continue
|
|
} else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
|
|
pos-=2; // gloss over the "\\\r"
|
|
//continue
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
if (pos < lineStartPosn) {
|
|
return false;
|
|
}
|
|
// First things where the action is unambiguous
|
|
switch (style) {
|
|
case SCE_RB_DEFAULT:
|
|
case SCE_RB_COMMENTLINE:
|
|
case SCE_RB_POD:
|
|
case SCE_RB_CLASSNAME:
|
|
case SCE_RB_DEFNAME:
|
|
case SCE_RB_MODULE_NAME:
|
|
return false;
|
|
case SCE_RB_OPERATOR:
|
|
break;
|
|
case SCE_RB_WORD:
|
|
// Watch out for uses of 'else if'
|
|
//XXX: Make a list of other keywords where 'if' isn't a modifier
|
|
// and can appear legitimately
|
|
// Formulate this to avoid warnings from most compilers
|
|
if (strcmp(word, "if") == 0) {
|
|
char prevWord[MAX_KEYWORD_LENGTH + 1];
|
|
getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
|
|
return strcmp(prevWord, "else") != 0;
|
|
}
|
|
return true;
|
|
default:
|
|
return true;
|
|
}
|
|
// Assume that if the keyword follows an operator,
|
|
// usually it's a block assignment, like
|
|
// a << if x then y else z
|
|
|
|
ch = styler[pos];
|
|
switch (ch) {
|
|
case ')':
|
|
case ']':
|
|
case '}':
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
#define WHILE_BACKWARDS "elihw"
|
|
#define UNTIL_BACKWARDS "litnu"
|
|
|
|
// Nothing fancy -- look to see if we follow a while/until somewhere
|
|
// on the current line
|
|
|
|
static bool keywordDoStartsLoop(int pos,
|
|
Accessor &styler)
|
|
{
|
|
char ch;
|
|
int style;
|
|
int lineStart = styler.GetLine(pos);
|
|
int lineStartPosn = styler.LineStart(lineStart);
|
|
styler.Flush();
|
|
while (--pos >= lineStartPosn) {
|
|
style = actual_style(styler.StyleAt(pos));
|
|
if (style == SCE_RB_DEFAULT) {
|
|
if ((ch = styler[pos]) == '\r' || ch == '\n') {
|
|
// Scintilla's LineStart() and GetLine() routines aren't
|
|
// platform-independent, so if we have text prepared with
|
|
// a different system we can't rely on it.
|
|
return false;
|
|
}
|
|
} else if (style == SCE_RB_WORD) {
|
|
// Check for while or until, but write the word in backwards
|
|
char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
|
|
char *dst = prevWord;
|
|
int wordLen = 0;
|
|
int start_word;
|
|
for (start_word = pos;
|
|
start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
|
|
start_word--) {
|
|
if (++wordLen < MAX_KEYWORD_LENGTH) {
|
|
*dst++ = styler[start_word];
|
|
}
|
|
}
|
|
*dst = 0;
|
|
// Did we see our keyword?
|
|
if (!strcmp(prevWord, WHILE_BACKWARDS)
|
|
|| !strcmp(prevWord, UNTIL_BACKWARDS)) {
|
|
return true;
|
|
}
|
|
// We can move pos to the beginning of the keyword, and then
|
|
// accept another decrement, as we can never have two contiguous
|
|
// keywords:
|
|
// word1 word2
|
|
// ^
|
|
// <- move to start_word
|
|
// ^
|
|
// <- loop decrement
|
|
// ^ # pointing to end of word1 is fine
|
|
pos = start_word;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Folding Ruby
|
|
*
|
|
* The language is quite complex to analyze without a full parse.
|
|
* For example, this line shouldn't affect fold level:
|
|
*
|
|
* print "hello" if feeling_friendly?
|
|
*
|
|
* Neither should this:
|
|
*
|
|
* print "hello" \
|
|
* if feeling_friendly?
|
|
*
|
|
*
|
|
* But this should:
|
|
*
|
|
* if feeling_friendly? #++
|
|
* print "hello" \
|
|
* print "goodbye"
|
|
* end #--
|
|
*
|
|
* So we cheat, by actually looking at the existing indentation
|
|
* levels for each line, and just echoing it back. Like Python.
|
|
* Then if we get better at it, we'll take braces into consideration,
|
|
* which always affect folding levels.
|
|
|
|
* How the keywords should work:
|
|
* No effect:
|
|
* __FILE__ __LINE__ BEGIN END alias and
|
|
* defined? false in nil not or self super then
|
|
* true undef
|
|
|
|
* Always increment:
|
|
* begin class def do for module when {
|
|
*
|
|
* Always decrement:
|
|
* end }
|
|
*
|
|
* Increment if these start a statement
|
|
* if unless until while -- do nothing if they're modifiers
|
|
|
|
* These end a block if there's no modifier, but don't bother
|
|
* break next redo retry return yield
|
|
*
|
|
* These temporarily de-indent, but re-indent
|
|
* case else elsif ensure rescue
|
|
*
|
|
* This means that the folder reflects indentation rather
|
|
* than setting it. The language-service updates indentation
|
|
* when users type return and finishes entering de-denters.
|
|
*
|
|
* Later offer to fold POD, here-docs, strings, and blocks of comments
|
|
*/
|
|
|
|
static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
|
|
WordList *[], Accessor &styler) {
|
|
const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
|
|
bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
|
|
|
|
synchronizeDocStart(startPos, length, initStyle, styler, // ref args
|
|
false);
|
|
unsigned int endPos = startPos + length;
|
|
int visibleChars = 0;
|
|
int lineCurrent = styler.GetLine(startPos);
|
|
int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
|
|
& SC_FOLDLEVELNUMBERMASK
|
|
& ~SC_FOLDLEVELBASE);
|
|
int levelCurrent = levelPrev;
|
|
char chNext = styler[startPos];
|
|
int styleNext = styler.StyleAt(startPos);
|
|
int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
|
|
bool buffer_ends_with_eol = false;
|
|
for (unsigned int i = startPos; i < endPos; i++) {
|
|
char ch = chNext;
|
|
chNext = styler.SafeGetCharAt(i + 1);
|
|
int style = styleNext;
|
|
styleNext = styler.StyleAt(i + 1);
|
|
bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
|
|
if (style == SCE_RB_COMMENTLINE) {
|
|
if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
|
|
if (chNext == '{') {
|
|
levelCurrent++;
|
|
} else if (chNext == '}' && levelCurrent > 0) {
|
|
levelCurrent--;
|
|
}
|
|
}
|
|
} else if (style == SCE_RB_OPERATOR) {
|
|
if (strchr("[{(", ch)) {
|
|
levelCurrent++;
|
|
} else if (strchr(")}]", ch)) {
|
|
// Don't decrement below 0
|
|
if (levelCurrent > 0)
|
|
levelCurrent--;
|
|
}
|
|
} else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
|
|
// Look at the keyword on the left and decide what to do
|
|
char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
|
|
prevWord[0] = 0;
|
|
getPrevWord(i, prevWord, styler, SCE_RB_WORD);
|
|
if (!strcmp(prevWord, "end")) {
|
|
// Don't decrement below 0
|
|
if (levelCurrent > 0)
|
|
levelCurrent--;
|
|
} else if ( !strcmp(prevWord, "if")
|
|
|| !strcmp(prevWord, "def")
|
|
|| !strcmp(prevWord, "class")
|
|
|| !strcmp(prevWord, "module")
|
|
|| !strcmp(prevWord, "begin")
|
|
|| !strcmp(prevWord, "case")
|
|
|| !strcmp(prevWord, "do")
|
|
|| !strcmp(prevWord, "while")
|
|
|| !strcmp(prevWord, "unless")
|
|
|| !strcmp(prevWord, "until")
|
|
|| !strcmp(prevWord, "for")
|
|
) {
|
|
levelCurrent++;
|
|
}
|
|
} else if (style == SCE_RB_HERE_DELIM) {
|
|
if (styler.SafeGetCharAt(i-2) == '<' && styler.SafeGetCharAt(i-1) == '<') {
|
|
levelCurrent++;
|
|
} else if (styleNext == SCE_RB_DEFAULT) {
|
|
levelCurrent--;
|
|
}
|
|
}
|
|
if (atEOL) {
|
|
int lev = levelPrev;
|
|
if (visibleChars == 0 && foldCompact)
|
|
lev |= SC_FOLDLEVELWHITEFLAG;
|
|
if ((levelCurrent > levelPrev) && (visibleChars > 0))
|
|
lev |= SC_FOLDLEVELHEADERFLAG;
|
|
styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
|
|
lineCurrent++;
|
|
levelPrev = levelCurrent;
|
|
visibleChars = 0;
|
|
buffer_ends_with_eol = true;
|
|
} else if (!isspacechar(ch)) {
|
|
visibleChars++;
|
|
buffer_ends_with_eol = false;
|
|
}
|
|
stylePrev = style;
|
|
}
|
|
// Fill in the real level of the next line, keeping the current flags as they will be filled in later
|
|
if (!buffer_ends_with_eol) {
|
|
lineCurrent++;
|
|
int new_lev = levelCurrent;
|
|
if (visibleChars == 0 && foldCompact)
|
|
new_lev |= SC_FOLDLEVELWHITEFLAG;
|
|
if ((levelCurrent > levelPrev) && (visibleChars > 0))
|
|
new_lev |= SC_FOLDLEVELHEADERFLAG;
|
|
levelCurrent = new_lev;
|
|
}
|
|
styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
|
|
}
|
|
|
|
static const char * const rubyWordListDesc[] = {
|
|
"Keywords",
|
|
0
|
|
};
|
|
|
|
LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc, 6);
|