notepad-plus-plus/lexilla/lexers/LexBash.cxx

1291 lines
41 KiB
C++

// Scintilla source code edit control
/** @file LexBash.cxx
** Lexer for Bash.
**/
// Copyright 2004-2012 by Neil Hodgson <neilh@scintilla.org>
// Adapted from LexPerl by Kein-Hong Man 2004
// The License.txt file describes the conditions under which this software may be distributed.
#include <cstdlib>
#include <cassert>
#include <cstring>
#include <cstdio>
#include <cstdarg>
#include <string>
#include <string_view>
#include <vector>
#include <map>
#include <initializer_list>
#include <functional>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "StringCopy.h"
#include "InList.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "SubStyles.h"
#include "DefaultLexer.h"
using namespace Scintilla;
using namespace Lexilla;
namespace {
#define HERE_DELIM_MAX 256
// define this if you want 'invalid octals' to be marked as errors
// usually, this is not a good idea, permissive lexing is better
#undef PEDANTIC_OCTAL
#define BASH_BASE_ERROR 65
#define BASH_BASE_DECIMAL 66
#define BASH_BASE_HEX 67
#ifdef PEDANTIC_OCTAL
#define BASH_BASE_OCTAL 68
#define BASH_BASE_OCTAL_ERROR 69
#endif
// state constants for parts of a bash command segment
enum class CmdState {
Body,
Start,
Word,
Test, // test
SingleBracket, // []
DoubleBracket, // [[]]
Arithmetic,
Delimiter,
};
enum class CommandSubstitution : int {
Backtick,
Inside,
InsideTrack,
};
// state constants for nested delimiter pairs, used by
// SCE_SH_STRING, SCE_SH_PARAM and SCE_SH_BACKTICKS processing
enum class QuoteStyle {
Literal, // ''
CString, // $''
String, // ""
LString, // $""
HereDoc, // here document
Backtick, // ``
Parameter, // ${}
Command, // $()
CommandInside, // $() with styling inside
Arithmetic, // $(()), $[]
};
#define BASH_QUOTE_STACK_MAX 7
#define BASH_SPECIAL_PARAMETER "*@#?-$!"
constexpr int commandSubstitutionFlag = 0x40;
constexpr int MaskCommand(int state) noexcept {
return state & ~commandSubstitutionFlag;
}
constexpr int translateBashDigit(int ch) noexcept {
if (ch >= '0' && ch <= '9') {
return ch - '0';
} else if (ch >= 'a' && ch <= 'z') {
return ch - 'a' + 10;
} else if (ch >= 'A' && ch <= 'Z') {
return ch - 'A' + 36;
} else if (ch == '@') {
return 62;
} else if (ch == '_') {
return 63;
}
return BASH_BASE_ERROR;
}
int getBashNumberBase(char *s) noexcept {
int i = 0;
int base = 0;
while (*s) {
base = base * 10 + (*s++ - '0');
i++;
}
if (base > 64 || i > 2) {
return BASH_BASE_ERROR;
}
return base;
}
constexpr int opposite(int ch) noexcept {
if (ch == '(') return ')';
if (ch == '[') return ']';
if (ch == '{') return '}';
if (ch == '<') return '>';
return ch;
}
int GlobScan(StyleContext &sc) {
// forward scan for zsh globs, disambiguate versus bash arrays
// complex expressions may still fail, e.g. unbalanced () '' "" etc
int c = 0;
int sLen = 0;
int pCount = 0;
int hash = 0;
while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
if (IsASpace(c)) {
return 0;
} else if (c == '\'' || c == '\"') {
if (hash != 2) return 0;
} else if (c == '#' && hash == 0) {
hash = (sLen == 1) ? 2:1;
} else if (c == '(') {
pCount++;
} else if (c == ')') {
if (pCount == 0) {
if (hash) return sLen;
return 0;
}
pCount--;
}
}
return 0;
}
bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
const Sci_Position pos = styler.LineStart(line);
const Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
for (Sci_Position i = pos; i < eol_pos; i++) {
const char ch = styler[i];
if (ch == '#')
return true;
else if (ch != ' ' && ch != '\t')
return false;
}
return false;
}
constexpr bool StyleForceBacktrack(int state) noexcept {
return AnyOf(state, SCE_SH_CHARACTER, SCE_SH_STRING, SCE_SH_BACKTICKS, SCE_SH_HERE_Q, SCE_SH_PARAM);
}
struct OptionsBash {
bool fold = false;
bool foldComment = false;
bool foldCompact = true;
bool stylingInsideString = false;
bool stylingInsideBackticks = false;
bool stylingInsideParameter = false;
bool stylingInsideHeredoc = false;
bool nestedBackticks = true;
CommandSubstitution commandSubstitution = CommandSubstitution::Backtick;
std::string specialParameter = BASH_SPECIAL_PARAMETER;
[[nodiscard]] bool stylingInside(int state) const noexcept {
switch (state) {
case SCE_SH_STRING:
return stylingInsideString;
case SCE_SH_BACKTICKS:
return stylingInsideBackticks;
case SCE_SH_PARAM:
return stylingInsideParameter;
case SCE_SH_HERE_Q:
return stylingInsideHeredoc;
default:
return false;
}
}
};
const char * const bashWordListDesc[] = {
"Keywords",
nullptr
};
struct OptionSetBash : public OptionSet<OptionsBash> {
OptionSetBash() {
DefineProperty("fold", &OptionsBash::fold);
DefineProperty("fold.comment", &OptionsBash::foldComment);
DefineProperty("fold.compact", &OptionsBash::foldCompact);
DefineProperty("lexer.bash.styling.inside.string", &OptionsBash::stylingInsideString,
"Set this property to 1 to highlight shell expansions inside string.");
DefineProperty("lexer.bash.styling.inside.backticks", &OptionsBash::stylingInsideBackticks,
"Set this property to 1 to highlight shell expansions inside backticks.");
DefineProperty("lexer.bash.styling.inside.parameter", &OptionsBash::stylingInsideParameter,
"Set this property to 1 to highlight shell expansions inside ${} parameter expansion.");
DefineProperty("lexer.bash.styling.inside.heredoc", &OptionsBash::stylingInsideHeredoc,
"Set this property to 1 to highlight shell expansions inside here document.");
DefineProperty("lexer.bash.command.substitution", &OptionsBash::commandSubstitution,
"Set how to highlight $() command substitution. "
"0 (the default) highlighted as backticks. "
"1 highlighted inside. "
"2 highlighted inside with extra scope tracking.");
DefineProperty("lexer.bash.nested.backticks", &OptionsBash::nestedBackticks,
"Set this property to 0 to disable nested backquoted command substitution.");
DefineProperty("lexer.bash.special.parameter", &OptionsBash::specialParameter,
"Set shell (default is Bash) special parameters.");
DefineWordListSets(bashWordListDesc);
}
};
class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl)
public:
int Count = 0;
int Up = '\0';
int Down = '\0';
QuoteStyle Style = QuoteStyle::Literal;
int Outer = SCE_SH_DEFAULT;
CmdState State = CmdState::Body;
void Clear() noexcept {
Count = 0;
Up = '\0';
Down = '\0';
Style = QuoteStyle::Literal;
Outer = SCE_SH_DEFAULT;
State = CmdState::Body;
}
void Start(int u, QuoteStyle s, int outer, CmdState state) noexcept {
Count = 1;
Up = u;
Down = opposite(Up);
Style = s;
Outer = outer;
State = state;
}
};
class QuoteStackCls { // Class to manage quote pairs that nest
public:
int Depth = 0;
int State = SCE_SH_DEFAULT;
bool lineContinuation = false;
bool nestedBackticks = false;
CommandSubstitution commandSubstitution = CommandSubstitution::Backtick;
int insideCommand = 0;
unsigned backtickLevel = 0;
QuoteCls Current;
QuoteCls Stack[BASH_QUOTE_STACK_MAX];
const CharacterSet &setParamStart;
QuoteStackCls(const CharacterSet &setParamStart_) noexcept : setParamStart{setParamStart_} {}
[[nodiscard]] bool Empty() const noexcept {
return Current.Up == '\0';
}
void Start(int u, QuoteStyle s, int outer, CmdState state) noexcept {
if (Empty()) {
Current.Start(u, s, outer, state);
if (s == QuoteStyle::Backtick) {
++backtickLevel;
}
} else {
Push(u, s, outer, state);
}
}
void Push(int u, QuoteStyle s, int outer, CmdState state) noexcept {
if (Depth >= BASH_QUOTE_STACK_MAX) {
return;
}
Stack[Depth] = Current;
Depth++;
Current.Start(u, s, outer, state);
if (s == QuoteStyle::Backtick) {
++backtickLevel;
}
}
void Pop() noexcept {
if (Depth == 0) {
Clear();
return;
}
if (backtickLevel != 0 && Current.Style == QuoteStyle::Backtick) {
--backtickLevel;
}
if (insideCommand != 0 && Current.Style == QuoteStyle::CommandInside) {
insideCommand = 0;
for (int i = 0; i < Depth; i++) {
if (Stack[i].Style == QuoteStyle::CommandInside) {
insideCommand = commandSubstitutionFlag;
break;
}
}
}
Depth--;
Current = Stack[Depth];
}
void Clear() noexcept {
Depth = 0;
State = SCE_SH_DEFAULT;
insideCommand = 0;
backtickLevel = 0;
Current.Clear();
}
bool CountDown(StyleContext &sc, CmdState &cmdState) {
Current.Count--;
while (Current.Count > 0 && sc.chNext == Current.Down) {
Current.Count--;
sc.Forward();
}
if (Current.Count == 0) {
cmdState = Current.State;
const int outer = Current.Outer;
Pop();
sc.ForwardSetState(outer | insideCommand);
return true;
}
return false;
}
void Expand(StyleContext &sc, CmdState &cmdState, bool stylingInside) {
const CmdState current = cmdState;
const int state = sc.state;
QuoteStyle style = QuoteStyle::Literal;
State = state;
sc.SetState(SCE_SH_SCALAR);
sc.Forward();
if (sc.ch == '{') {
style = QuoteStyle::Parameter;
sc.ChangeState(SCE_SH_PARAM);
} else if (sc.ch == '\'') {
style = QuoteStyle::CString;
sc.ChangeState(SCE_SH_STRING);
} else if (sc.ch == '"') {
style = QuoteStyle::LString;
sc.ChangeState(SCE_SH_STRING);
} else if (sc.ch == '(' || sc.ch == '[') {
if (sc.ch == '[' || sc.chNext == '(') {
style = QuoteStyle::Arithmetic;
cmdState = CmdState::Arithmetic;
sc.ChangeState(SCE_SH_OPERATOR);
} else {
if (stylingInside && commandSubstitution >= CommandSubstitution::Inside) {
style = QuoteStyle::CommandInside;
cmdState = CmdState::Delimiter;
sc.ChangeState(SCE_SH_OPERATOR);
if (commandSubstitution == CommandSubstitution::InsideTrack) {
insideCommand = commandSubstitutionFlag;
}
} else {
style = QuoteStyle::Command;
sc.ChangeState(SCE_SH_BACKTICKS);
}
}
} else {
// scalar has no delimiter pair
if (!setParamStart.Contains(sc.ch)) {
stylingInside = false; // not scalar
}
}
if (!stylingInside) {
sc.ChangeState(state);
} else {
sc.ChangeState(sc.state | insideCommand);
}
if (style != QuoteStyle::Literal) {
Start(sc.ch, style, state, current);
sc.Forward();
}
}
void Escape(StyleContext &sc) {
unsigned count = 1;
while (sc.chNext == '\\') {
++count;
sc.Forward();
}
bool escaped = count & 1U; // odd backslash escape next character
if (escaped && (sc.chNext == '\r' || sc.chNext == '\n')) {
lineContinuation = true;
if (sc.state == SCE_SH_IDENTIFIER) {
sc.SetState(SCE_SH_OPERATOR | insideCommand);
}
return;
}
if (backtickLevel > 0 && nestedBackticks) {
/*
for $k$ level substitution with $N$ backslashes:
* when $N/2^k$ is odd, following dollar is escaped.
* when $(N - 1)/2^k$ is even, following quote is escaped.
* when $N = n\times 2^{k + 1} - 1$, following backtick is escaped.
* when $N = n\times 2^{k + 1} + 2^k - 1$, following backtick starts inner substitution.
* when $N = m\times 2^k + 2^{k - 1} - 1$ and $k > 1$, following backtick ends current substitution.
*/
if (sc.chNext == '$') {
escaped = (count >> backtickLevel) & 1U;
} else if (sc.chNext == '\"' || sc.chNext == '\'') {
escaped = (((count - 1) >> backtickLevel) & 1U) == 0;
} else if (sc.chNext == '`' && escaped) {
unsigned mask = 1U << (backtickLevel + 1);
count += 1;
escaped = (count & (mask - 1)) == 0;
if (!escaped) {
unsigned remain = count - (mask >> 1U);
if (static_cast<int>(remain) >= 0 && (remain & (mask - 1)) == 0) {
escaped = true;
++backtickLevel;
} else if (backtickLevel > 1) {
mask >>= 1U;
remain = count - (mask >> 1U);
if (static_cast<int>(remain) >= 0 && (remain & (mask - 1)) == 0) {
escaped = true;
--backtickLevel;
}
}
}
}
}
if (escaped) {
sc.Forward();
}
}
};
const char styleSubable[] = { SCE_SH_IDENTIFIER, SCE_SH_SCALAR, 0 };
const LexicalClass lexicalClasses[] = {
// Lexer Bash SCLEX_BASH SCE_SH_:
0, "SCE_SH_DEFAULT", "default", "White space",
1, "SCE_SH_ERROR", "error", "Error",
2, "SCE_SH_COMMENTLINE", "comment line", "Line comment: #",
3, "SCE_SH_NUMBER", "literal numeric", "Number",
4, "SCE_SH_WORD", "keyword", "Keyword",
5, "SCE_SH_STRING", "literal string", "String",
6, "SCE_SH_CHARACTER", "literal string", "Single quoted string",
7, "SCE_SH_OPERATOR", "operator", "Operators",
8, "SCE_SH_IDENTIFIER", "identifier", "Identifiers",
9, "SCE_SH_SCALAR", "identifier", "Scalar variable",
10, "SCE_SH_PARAM", "identifier", "Parameter",
11, "SCE_SH_BACKTICKS", "literal string", "Backtick quoted command",
12, "SCE_SH_HERE_DELIM", "operator", "Heredoc delimiter",
13, "SCE_SH_HERE_Q", "here-doc literal string", "Heredoc quoted string",
};
}
class LexerBash final : public DefaultLexer {
WordList keywords;
WordList cmdDelimiter;
WordList bashStruct;
WordList bashStruct_in;
WordList testOperator;
OptionsBash options;
OptionSetBash osBash;
CharacterSet setParamStart;
enum { ssIdentifier, ssScalar };
SubStyles subStyles{styleSubable};
public:
LexerBash() :
DefaultLexer("bash", SCLEX_BASH, lexicalClasses, std::size(lexicalClasses)),
setParamStart(CharacterSet::setAlphaNum, "_" BASH_SPECIAL_PARAMETER) {
cmdDelimiter.Set("| || |& & && ; ;; ( ) { }");
bashStruct.Set("if elif fi while until else then do done esac eval");
bashStruct_in.Set("for case select");
testOperator.Set("eq ge gt le lt ne ef nt ot");
}
void SCI_METHOD Release() override {
delete this;
}
int SCI_METHOD Version() const override {
return lvRelease5;
}
const char * SCI_METHOD PropertyNames() override {
return osBash.PropertyNames();
}
int SCI_METHOD PropertyType(const char* name) override {
return osBash.PropertyType(name);
}
const char * SCI_METHOD DescribeProperty(const char *name) override {
return osBash.DescribeProperty(name);
}
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
const char * SCI_METHOD PropertyGet(const char* key) override {
return osBash.PropertyGet(key);
}
const char * SCI_METHOD DescribeWordListSets() override {
return osBash.DescribeWordListSets();
}
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
void SCI_METHOD Fold(Sci_PositionU startPos_, Sci_Position length, int initStyle, IDocument *pAccess) override;
void * SCI_METHOD PrivateCall(int, void *) override {
return nullptr;
}
int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
return subStyles.Allocate(styleBase, numberStyles);
}
int SCI_METHOD SubStylesStart(int styleBase) override {
return subStyles.Start(styleBase);
}
int SCI_METHOD SubStylesLength(int styleBase) override {
return subStyles.Length(styleBase);
}
int SCI_METHOD StyleFromSubStyle(int subStyle) override {
const int styleBase = subStyles.BaseStyle(subStyle);
return styleBase;
}
int SCI_METHOD PrimaryStyleFromStyle(int style) override {
return style;
}
void SCI_METHOD FreeSubStyles() override {
subStyles.Free();
}
void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
subStyles.SetIdentifiers(style, identifiers);
}
int SCI_METHOD DistanceToSecondaryStyles() override {
return 0;
}
const char *SCI_METHOD GetSubStyleBases() override {
return styleSubable;
}
bool IsTestOperator(const char *s, const CharacterSet &setSingleCharOp) const noexcept {
return (s[2] == '\0' && setSingleCharOp.Contains(s[1]))
|| testOperator.InList(s + 1);
}
static ILexer5 *LexerFactoryBash() {
return new LexerBash();
}
};
Sci_Position SCI_METHOD LexerBash::PropertySet(const char *key, const char *val) {
if (osBash.PropertySet(&options, key, val)) {
if (strcmp(key, "lexer.bash.special.parameter") == 0) {
setParamStart = CharacterSet(CharacterSet::setAlphaNum, "_");
setParamStart.AddString(options.specialParameter.empty() ? BASH_SPECIAL_PARAMETER : options.specialParameter.c_str());
}
return 0;
}
return -1;
}
Sci_Position SCI_METHOD LexerBash::WordListSet(int n, const char *wl) {
WordList *wordListN = nullptr;
switch (n) {
case 0:
wordListN = &keywords;
break;
}
Sci_Position firstModification = -1;
if (wordListN) {
if (wordListN->Set(wl)) {
firstModification = 0;
}
}
return firstModification;
}
void SCI_METHOD LexerBash::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
const CharacterSet setWordStart(CharacterSet::setAlpha, "_");
// note that [+-] are often parts of identifiers in shell scripts
const CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
CharacterSet setMetaCharacter(CharacterSet::setNone, "|&;()<> \t\r\n");
setMetaCharacter.Add(0);
const CharacterSet setBashOperator(CharacterSet::setNone, "^&%()-+=|{}[]:;>,*/<?!.~@");
const CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
const CharacterSet setParam(CharacterSet::setAlphaNum, "_");
const CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!%*,./:?@[]^`{}~");
const CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!%*,./:=?@[]^`{}~");
const CharacterSet setLeftShift(CharacterSet::setDigits, "$");
class HereDocCls { // Class to manage HERE document elements
public:
int State = 0; // 0: '<<' encountered
// 1: collect the delimiter
// 2: here doc text (lines after the delimiter)
int Quote = '\0'; // the char after '<<'
bool Quoted = false; // true if Quote in ('\'','"','`')
bool Escaped = false; // backslash in delimiter, common in configure script
bool Indent = false; // indented delimiter (for <<-)
int DelimiterLength = 0; // strlen(Delimiter)
char Delimiter[HERE_DELIM_MAX]{}; // the Delimiter
HereDocCls() noexcept = default;
void Append(int ch) {
Delimiter[DelimiterLength++] = static_cast<char>(ch);
Delimiter[DelimiterLength] = '\0';
}
};
HereDocCls HereDoc;
QuoteStackCls QuoteStack(setParamStart);
QuoteStack.nestedBackticks = options.nestedBackticks;
QuoteStack.commandSubstitution = options.commandSubstitution;
const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_SH_IDENTIFIER);
const WordClassifier &classifierScalars = subStyles.Classifier(SCE_SH_SCALAR);
int numBase = 0;
int digit = 0;
const Sci_PositionU endPos = startPos + length;
CmdState cmdState = CmdState::Start;
LexAccessor styler(pAccess);
// Always backtracks to the start of a line that is not a continuation
// of the previous line (i.e. start of a bash command segment)
Sci_Position ln = styler.GetLine(startPos);
if (ln > 0 && startPos == static_cast<Sci_PositionU>(styler.LineStart(ln)))
ln--;
for (;;) {
startPos = styler.LineStart(ln);
if (ln == 0 || styler.GetLineState(ln) == static_cast<int>(CmdState::Start))
break;
ln--;
}
initStyle = SCE_SH_DEFAULT;
StyleContext sc(startPos, endPos - startPos, initStyle, styler);
while (sc.More()) {
// handle line continuation, updates per-line stored state
if (sc.atLineStart) {
CmdState state = CmdState::Body; // force backtrack while retaining cmdState
if (!StyleForceBacktrack(MaskCommand(sc.state))) {
// retain last line's state
// arithmetic expression and double bracket test can span multiline without line continuation
if (!QuoteStack.lineContinuation && !AnyOf(cmdState, CmdState::DoubleBracket, CmdState::Arithmetic)) {
cmdState = CmdState::Start;
}
if (QuoteStack.Empty()) { // force backtrack when nesting
state = cmdState;
}
}
QuoteStack.lineContinuation = false;
styler.SetLineState(sc.currentLine, static_cast<int>(state));
}
// controls change of cmdState at the end of a non-whitespace element
// states Body|Test|Arithmetic persist until the end of a command segment
// state Word persist, but ends with 'in' or 'do' construct keywords
CmdState cmdStateNew = CmdState::Body;
if (cmdState >= CmdState::Word && cmdState <= CmdState::Arithmetic)
cmdStateNew = cmdState;
const int stylePrev = MaskCommand(sc.state);
const int insideCommand = QuoteStack.insideCommand;
// Determine if the current state should terminate.
switch (MaskCommand(sc.state)) {
case SCE_SH_OPERATOR:
sc.SetState(SCE_SH_DEFAULT | insideCommand);
if (cmdState == CmdState::Delimiter) // if command delimiter, start new command
cmdStateNew = CmdState::Start;
else if (sc.chPrev == '\\') // propagate command state if line continued
cmdStateNew = cmdState;
break;
case SCE_SH_WORD:
// "." never used in Bash variable names but used in file names
if (!setWord.Contains(sc.ch) || sc.Match('+', '=') || sc.Match('.', '.')) {
char s[500];
sc.GetCurrent(s, sizeof(s));
int identifierStyle = SCE_SH_IDENTIFIER | insideCommand;
const int subStyle = classifierIdentifiers.ValueFor(s);
if (subStyle >= 0) {
identifierStyle = subStyle | insideCommand;
}
// allow keywords ending in a whitespace, meta character or command delimiter
char s2[10];
s2[0] = static_cast<char>(sc.ch);
s2[1] = '\0';
const bool keywordEnds = IsASpace(sc.ch) || setMetaCharacter.Contains(sc.ch) || cmdDelimiter.InList(s2);
// 'in' or 'do' may be construct keywords
if (cmdState == CmdState::Word) {
if (strcmp(s, "in") == 0 && keywordEnds)
cmdStateNew = CmdState::Body;
else if (strcmp(s, "do") == 0 && keywordEnds)
cmdStateNew = CmdState::Start;
else
sc.ChangeState(identifierStyle);
sc.SetState(SCE_SH_DEFAULT | insideCommand);
break;
}
// a 'test' keyword starts a test expression
if (strcmp(s, "test") == 0) {
if (cmdState == CmdState::Start && keywordEnds) {
cmdStateNew = CmdState::Test;
} else
sc.ChangeState(identifierStyle);
}
// detect bash construct keywords
else if (bashStruct.InList(s)) {
if (cmdState == CmdState::Start && keywordEnds)
cmdStateNew = CmdState::Start;
else
sc.ChangeState(identifierStyle);
}
// 'for'|'case'|'select' needs 'in'|'do' to be highlighted later
else if (bashStruct_in.InList(s)) {
if (cmdState == CmdState::Start && keywordEnds)
cmdStateNew = CmdState::Word;
else
sc.ChangeState(identifierStyle);
}
// disambiguate option items and file test operators
else if (s[0] == '-') {
if (!AnyOf(cmdState, CmdState::Test, CmdState::SingleBracket, CmdState::DoubleBracket)
|| !keywordEnds || !IsTestOperator(s, setSingleCharOp))
sc.ChangeState(identifierStyle);
}
// disambiguate keywords and identifiers
else if (cmdState != CmdState::Start
|| !(keywords.InList(s) && keywordEnds)) {
sc.ChangeState(identifierStyle);
}
sc.SetState(SCE_SH_DEFAULT | insideCommand);
}
break;
case SCE_SH_IDENTIFIER:
if (!setWord.Contains(sc.ch) ||
(cmdState == CmdState::Arithmetic && !setWordStart.Contains(sc.ch))) {
char s[500];
sc.GetCurrent(s, sizeof(s));
const int subStyle = classifierIdentifiers.ValueFor(s);
if (subStyle >= 0) {
sc.ChangeState(subStyle | insideCommand);
}
sc.SetState(SCE_SH_DEFAULT | insideCommand);
}
break;
case SCE_SH_NUMBER:
digit = translateBashDigit(sc.ch);
if (numBase == BASH_BASE_DECIMAL) {
if (sc.ch == '#') {
char s[10];
sc.GetCurrent(s, sizeof(s));
numBase = getBashNumberBase(s);
if (numBase != BASH_BASE_ERROR)
break;
} else if (IsADigit(sc.ch))
break;
} else if (numBase == BASH_BASE_HEX) {
if (IsADigit(sc.ch, 16))
break;
#ifdef PEDANTIC_OCTAL
} else if (numBase == BASH_BASE_OCTAL ||
numBase == BASH_BASE_OCTAL_ERROR) {
if (digit <= 7)
break;
if (digit <= 9) {
numBase = BASH_BASE_OCTAL_ERROR;
break;
}
#endif
} else if (numBase == BASH_BASE_ERROR) {
if (digit <= 9)
break;
} else { // DD#DDDD number style handling
if (digit != BASH_BASE_ERROR) {
if (numBase <= 36) {
// case-insensitive if base<=36
if (digit >= 36) digit -= 26;
}
if (digit < numBase)
break;
if (digit <= 9) {
numBase = BASH_BASE_ERROR;
break;
}
}
}
// fallthrough when number is at an end or error
if (numBase == BASH_BASE_ERROR
#ifdef PEDANTIC_OCTAL
|| numBase == BASH_BASE_OCTAL_ERROR
#endif
) {
sc.ChangeState(SCE_SH_ERROR | insideCommand);
} else if (digit < 62 || digit == 63 || (cmdState != CmdState::Arithmetic &&
(sc.ch == '-' || (sc.ch == '.' && sc.chNext != '.')))) {
// current character is alpha numeric, underscore, hyphen or dot
sc.ChangeState(SCE_SH_IDENTIFIER | insideCommand);
break;
}
sc.SetState(SCE_SH_DEFAULT | insideCommand);
break;
case SCE_SH_COMMENTLINE:
if (sc.MatchLineEnd()) {
sc.SetState(SCE_SH_DEFAULT | insideCommand);
}
break;
case SCE_SH_HERE_DELIM:
// From Bash info:
// ---------------
// Specifier format is: <<[-]WORD
// Optional '-' is for removal of leading tabs from here-doc.
// Whitespace acceptable after <<[-] operator
//
if (HereDoc.State == 0) { // '<<' encountered
HereDoc.Quote = sc.chNext;
HereDoc.Quoted = false;
HereDoc.Escaped = false;
HereDoc.DelimiterLength = 0;
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ")
sc.Forward();
HereDoc.Quoted = true;
HereDoc.State = 1;
} else if (setHereDoc.Contains(sc.chNext) ||
(sc.chNext == '=' && cmdState != CmdState::Arithmetic)) {
// an unquoted here-doc delimiter, no special handling
HereDoc.State = 1;
} else if (sc.chNext == '<') { // HERE string <<<
sc.Forward();
sc.ForwardSetState(SCE_SH_DEFAULT | insideCommand);
} else if (IsASpace(sc.chNext)) {
// eat whitespace
} else if (setLeftShift.Contains(sc.chNext) ||
(sc.chNext == '=' && cmdState == CmdState::Arithmetic)) {
// left shift <<$var or <<= cases
sc.ChangeState(SCE_SH_OPERATOR | insideCommand);
sc.ForwardSetState(SCE_SH_DEFAULT | insideCommand);
} else {
// symbols terminates; deprecated zero-length delimiter
HereDoc.State = 1;
}
} else if (HereDoc.State == 1) { // collect the delimiter
// * if single quoted, there's no escape
// * if double quoted, there are \\ and \" escapes
if ((HereDoc.Quote == '\'' && sc.ch != HereDoc.Quote) ||
(HereDoc.Quoted && sc.ch != HereDoc.Quote && sc.ch != '\\') ||
(HereDoc.Quote != '\'' && sc.chPrev == '\\') ||
(setHereDoc2.Contains(sc.ch))) {
HereDoc.Append(sc.ch);
} else if (HereDoc.Quoted && sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
sc.ForwardSetState(SCE_SH_DEFAULT);
} else if (sc.ch == '\\') {
HereDoc.Escaped = true;
if (HereDoc.Quoted && sc.chNext != HereDoc.Quote && sc.chNext != '\\') {
// in quoted prefixes only \ and the quote eat the escape
HereDoc.Append(sc.ch);
} else {
// skip escape prefix
}
} else if (!HereDoc.Quoted) {
sc.SetState(SCE_SH_DEFAULT | insideCommand);
}
if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup
sc.SetState(SCE_SH_ERROR | insideCommand);
HereDoc.State = 0;
}
}
break;
case SCE_SH_SCALAR: // variable names
if (!setParam.Contains(sc.ch)) {
char s[500];
sc.GetCurrent(s, sizeof(s));
const int subStyle = classifierScalars.ValueFor(&s[1]); // skip the $
if (subStyle >= 0) {
sc.ChangeState(subStyle | insideCommand);
}
if (sc.LengthCurrent() == 1) {
// Special variable
sc.Forward();
}
sc.SetState(QuoteStack.State | insideCommand);
continue;
}
break;
case SCE_SH_HERE_Q:
// HereDoc.State == 2
if (sc.atLineStart && QuoteStack.Current.Style == QuoteStyle::HereDoc) {
sc.SetState(SCE_SH_HERE_Q | insideCommand);
if (HereDoc.Indent) { // tabulation prefix
while (sc.ch == '\t') {
sc.Forward();
}
}
if ((static_cast<Sci_Position>(sc.currentPos + HereDoc.DelimiterLength) == sc.lineEnd) &&
(HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter))) {
if (HereDoc.DelimiterLength != 0) {
sc.SetState(SCE_SH_HERE_DELIM | insideCommand);
while (!sc.MatchLineEnd()) {
sc.Forward();
}
}
QuoteStack.Pop();
sc.SetState(SCE_SH_DEFAULT | QuoteStack.insideCommand);
break;
}
}
if (HereDoc.Quoted || HereDoc.Escaped) {
break;
}
// fall through to handle nested shell expansions
[[fallthrough]];
case SCE_SH_STRING: // delimited styles, can nest
case SCE_SH_PARAM: // ${parameter}
case SCE_SH_BACKTICKS:
if (sc.ch == '\\') {
if (QuoteStack.Current.Style != QuoteStyle::Literal)
QuoteStack.Escape(sc);
} else if (sc.ch == QuoteStack.Current.Down) {
if (QuoteStack.CountDown(sc, cmdState)) {
continue;
}
} else if (sc.ch == QuoteStack.Current.Up) {
if (QuoteStack.Current.Style != QuoteStyle::Parameter) {
QuoteStack.Current.Count++;
}
} else {
if (QuoteStack.Current.Style == QuoteStyle::String ||
QuoteStack.Current.Style == QuoteStyle::HereDoc ||
QuoteStack.Current.Style == QuoteStyle::LString
) { // do nesting for "string", $"locale-string", heredoc
const bool stylingInside = options.stylingInside(MaskCommand(sc.state));
if (sc.ch == '`') {
QuoteStack.Push(sc.ch, QuoteStyle::Backtick, sc.state, cmdState);
if (stylingInside) {
sc.SetState(SCE_SH_BACKTICKS | insideCommand);
}
} else if (sc.ch == '$' && !AnyOf(sc.chNext, '\"', '\'')) {
QuoteStack.Expand(sc, cmdState, stylingInside);
continue;
}
} else if (QuoteStack.Current.Style == QuoteStyle::Command ||
QuoteStack.Current.Style == QuoteStyle::Parameter ||
QuoteStack.Current.Style == QuoteStyle::Backtick
) { // do nesting for $(command), `command`, ${parameter}
const bool stylingInside = options.stylingInside(MaskCommand(sc.state));
if (sc.ch == '\'') {
if (stylingInside) {
QuoteStack.State = sc.state;
sc.SetState(SCE_SH_CHARACTER | insideCommand);
} else {
QuoteStack.Push(sc.ch, QuoteStyle::Literal, sc.state, cmdState);
}
} else if (sc.ch == '\"') {
QuoteStack.Push(sc.ch, QuoteStyle::String, sc.state, cmdState);
if (stylingInside) {
sc.SetState(SCE_SH_STRING | insideCommand);
}
} else if (sc.ch == '`') {
QuoteStack.Push(sc.ch, QuoteStyle::Backtick, sc.state, cmdState);
if (stylingInside) {
sc.SetState(SCE_SH_BACKTICKS | insideCommand);
}
} else if (sc.ch == '$') {
QuoteStack.Expand(sc, cmdState, stylingInside);
continue;
}
}
}
break;
case SCE_SH_CHARACTER: // singly-quoted strings
if (sc.ch == '\'') {
sc.ForwardSetState(QuoteStack.State | insideCommand);
continue;
}
break;
}
// Must check end of HereDoc state 1 before default state is handled
if (HereDoc.State == 1 && sc.MatchLineEnd()) {
// Begin of here-doc (the line after the here-doc delimiter):
// Lexically, the here-doc starts from the next line after the >>, but the
// first line of here-doc seem to follow the style of the last EOL sequence
HereDoc.State = 2;
if (HereDoc.Quoted) {
if (MaskCommand(sc.state) == SCE_SH_HERE_DELIM) {
// Missing quote at end of string! Syntax error in bash 4.3
// Mark this bit as an error, do not colour any here-doc
sc.ChangeState(SCE_SH_ERROR | insideCommand);
sc.SetState(SCE_SH_DEFAULT | insideCommand);
} else {
// HereDoc.Quote always == '\''
sc.SetState(SCE_SH_HERE_Q | insideCommand);
QuoteStack.Start(-1, QuoteStyle::HereDoc, SCE_SH_DEFAULT, cmdState);
}
} else if (HereDoc.DelimiterLength == 0) {
// no delimiter, illegal (but '' and "" are legal)
sc.ChangeState(SCE_SH_ERROR | insideCommand);
sc.SetState(SCE_SH_DEFAULT | insideCommand);
} else {
sc.SetState(SCE_SH_HERE_Q | insideCommand);
QuoteStack.Start(-1, QuoteStyle::HereDoc, SCE_SH_DEFAULT, cmdState);
}
}
// update cmdState about the current command segment
if (stylePrev != SCE_SH_DEFAULT && MaskCommand(sc.state) == SCE_SH_DEFAULT) {
cmdState = cmdStateNew;
}
// Determine if a new state should be entered.
if (MaskCommand(sc.state) == SCE_SH_DEFAULT) {
if (sc.ch == '\\') {
// Bash can escape any non-newline as a literal
sc.SetState(SCE_SH_IDENTIFIER | insideCommand);
QuoteStack.Escape(sc);
} else if (IsADigit(sc.ch)) {
sc.SetState(SCE_SH_NUMBER | insideCommand);
numBase = BASH_BASE_DECIMAL;
if (sc.ch == '0') { // hex,octal
if (sc.chNext == 'x' || sc.chNext == 'X') {
numBase = BASH_BASE_HEX;
sc.Forward();
} else if (IsADigit(sc.chNext)) {
#ifdef PEDANTIC_OCTAL
numBase = BASH_BASE_OCTAL;
#endif
}
}
} else if (setWordStart.Contains(sc.ch)) {
sc.SetState(((cmdState == CmdState::Arithmetic)? SCE_SH_IDENTIFIER : SCE_SH_WORD) | insideCommand);
} else if (sc.ch == '#') {
if (stylePrev != SCE_SH_WORD && stylePrev != SCE_SH_IDENTIFIER &&
(sc.currentPos == 0 || setMetaCharacter.Contains(sc.chPrev))) {
sc.SetState(SCE_SH_COMMENTLINE | insideCommand);
} else {
sc.SetState(SCE_SH_WORD | insideCommand);
}
// handle some zsh features within arithmetic expressions only
if (cmdState == CmdState::Arithmetic) {
if (sc.chPrev == '[') { // [#8] [##8] output digit setting
sc.SetState(SCE_SH_WORD | insideCommand);
if (sc.chNext == '#') {
sc.Forward();
}
} else if (sc.Match("##^") && IsUpperCase(sc.GetRelative(3))) { // ##^A
sc.SetState(SCE_SH_IDENTIFIER | insideCommand);
sc.Forward(3);
} else if (sc.chNext == '#' && !IsASpace(sc.GetRelative(2))) { // ##a
sc.SetState(SCE_SH_IDENTIFIER | insideCommand);
sc.Forward(2);
} else if (setWordStart.Contains(sc.chNext)) { // #name
sc.SetState(SCE_SH_IDENTIFIER | insideCommand);
}
}
} else if (sc.ch == '\"') {
sc.SetState(SCE_SH_STRING | insideCommand);
QuoteStack.Start(sc.ch, QuoteStyle::String, SCE_SH_DEFAULT, cmdState);
} else if (sc.ch == '\'') {
QuoteStack.State = SCE_SH_DEFAULT;
sc.SetState(SCE_SH_CHARACTER | insideCommand);
} else if (sc.ch == '`') {
sc.SetState(SCE_SH_BACKTICKS | insideCommand);
QuoteStack.Start(sc.ch, QuoteStyle::Backtick, SCE_SH_DEFAULT, cmdState);
} else if (sc.ch == '$') {
QuoteStack.Expand(sc, cmdState, true);
continue;
} else if (cmdState != CmdState::Arithmetic && sc.Match('<', '<')) {
sc.SetState(SCE_SH_HERE_DELIM | insideCommand);
HereDoc.State = 0;
if (sc.GetRelative(2) == '-') { // <<- indent case
HereDoc.Indent = true;
sc.Forward();
} else {
HereDoc.Indent = false;
}
} else if (sc.ch == '-' && // test operator or short and long option
cmdState != CmdState::Arithmetic &&
sc.chPrev != '~' && !IsADigit(sc.chNext)) {
if (IsASpace(sc.chPrev) || setMetaCharacter.Contains(sc.chPrev)) {
sc.SetState(SCE_SH_WORD | insideCommand);
} else {
sc.SetState(SCE_SH_IDENTIFIER | insideCommand);
}
} else if (setBashOperator.Contains(sc.ch)) {
bool isCmdDelim = false;
sc.SetState(SCE_SH_OPERATOR | insideCommand);
// arithmetic expansion and command substitution
if (QuoteStack.Current.Style == QuoteStyle::Arithmetic || QuoteStack.Current.Style == QuoteStyle::CommandInside) {
if (sc.ch == QuoteStack.Current.Down) {
if (QuoteStack.CountDown(sc, cmdState)) {
continue;
}
} else if (sc.ch == QuoteStack.Current.Up) {
QuoteStack.Current.Count++;
}
}
// globs have no whitespace, do not appear in arithmetic expressions
if (cmdState != CmdState::Arithmetic && sc.ch == '(' && sc.chNext != '(') {
const int i = GlobScan(sc);
if (i > 1) {
sc.SetState(SCE_SH_IDENTIFIER | insideCommand);
sc.Forward(i + 1);
continue;
}
}
// handle opening delimiters for test/arithmetic expressions - ((,[[,[
if (cmdState == CmdState::Start
|| cmdState == CmdState::Body) {
if (sc.Match('(', '(')) {
cmdState = CmdState::Arithmetic;
sc.Forward();
} else if (sc.Match('[', '[') && IsASpace(sc.GetRelative(2))) {
cmdState = CmdState::DoubleBracket;
sc.Forward();
} else if (sc.ch == '[' && IsASpace(sc.chNext)) {
cmdState = CmdState::SingleBracket;
}
}
// special state -- for ((x;y;z)) in ... looping
if (cmdState == CmdState::Word && sc.Match('(', '(')) {
cmdState = CmdState::Arithmetic;
sc.Forward(2);
continue;
}
// handle command delimiters in command Start|Body|Word state, also Test if 'test' or '[]'
if (cmdState < CmdState::DoubleBracket) {
char s[10];
s[0] = static_cast<char>(sc.ch);
if (setBashOperator.Contains(sc.chNext)) {
s[1] = static_cast<char>(sc.chNext);
s[2] = '\0';
isCmdDelim = cmdDelimiter.InList(s);
if (isCmdDelim)
sc.Forward();
}
if (!isCmdDelim) {
s[1] = '\0';
isCmdDelim = cmdDelimiter.InList(s);
}
if (isCmdDelim) {
cmdState = CmdState::Delimiter;
sc.Forward();
continue;
}
}
// handle closing delimiters for test/arithmetic expressions - )),]],]
if (cmdState == CmdState::Arithmetic && sc.Match(')', ')')) {
cmdState = CmdState::Body;
sc.Forward();
} else if (sc.ch == ']' && IsASpace(sc.chPrev)) {
if (cmdState == CmdState::SingleBracket) {
cmdState = CmdState::Body;
} else if (cmdState == CmdState::DoubleBracket && sc.chNext == ']') {
cmdState = CmdState::Body;
sc.Forward();
}
}
}
}// sc.state
sc.Forward();
}
sc.Complete();
if (MaskCommand(sc.state) == SCE_SH_HERE_Q) {
styler.ChangeLexerState(sc.currentPos, styler.Length());
}
sc.Complete();
}
void SCI_METHOD LexerBash::Fold(Sci_PositionU startPos_, Sci_Position length, int initStyle, IDocument *pAccess) {
if(!options.fold)
return;
LexAccessor styler(pAccess);
Sci_Position startPos = startPos_;
const Sci_Position endPos = startPos + length;
int visibleChars = 0;
Sci_Position lineCurrent = styler.GetLine(startPos);
// Backtrack to previous line in case need to fix its fold status
if (lineCurrent > 0) {
lineCurrent--;
startPos = styler.LineStart(lineCurrent);
initStyle = (startPos > 0) ? styler.StyleIndexAt(startPos - 1) : 0;
}
int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
int levelCurrent = levelPrev;
char chNext = styler[startPos];
int styleNext = MaskCommand(styler.StyleIndexAt(startPos));
int style = MaskCommand(initStyle);
char word[8] = { '\0' }; // we're not interested in long words anyway
size_t wordlen = 0;
for (Sci_Position i = startPos; i < endPos; i++) {
const char ch = chNext;
chNext = styler.SafeGetCharAt(i + 1);
const int stylePrev = style;
style = styleNext;
styleNext = MaskCommand(styler.StyleIndexAt(i + 1));
const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
// Comment folding
if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler))
{
if (!IsCommentLine(lineCurrent - 1, styler)
&& IsCommentLine(lineCurrent + 1, styler))
levelCurrent++;
else if (IsCommentLine(lineCurrent - 1, styler)
&& !IsCommentLine(lineCurrent + 1, styler))
levelCurrent--;
}
switch (style) {
case SCE_SH_WORD:
if ((wordlen + 1) < sizeof(word))
word[wordlen++] = ch;
if (styleNext != style) {
word[wordlen] = '\0';
wordlen = 0;
if (InList(word, {"if", "case", "do"})) {
levelCurrent++;
} else if (InList(word, {"fi", "esac", "done"})) {
levelCurrent--;
}
}
break;
case SCE_SH_OPERATOR:
if (ch == '{') {
levelCurrent++;
} else if (ch == '}') {
levelCurrent--;
}
break;
// Here Document folding
case SCE_SH_HERE_DELIM:
if (stylePrev == SCE_SH_HERE_Q) {
levelCurrent--;
} else if (stylePrev != SCE_SH_HERE_DELIM) {
if (ch == '<' && chNext == '<') {
if (styler.SafeGetCharAt(i + 2) != '<') {
levelCurrent++;
}
}
}
break;
case SCE_SH_HERE_Q:
if (styleNext == SCE_SH_DEFAULT) {
levelCurrent--;
}
break;
}
if (atEOL) {
int lev = levelPrev;
if (visibleChars == 0 && options.foldCompact)
lev |= SC_FOLDLEVELWHITEFLAG;
if ((levelCurrent > levelPrev) && (visibleChars > 0))
lev |= SC_FOLDLEVELHEADERFLAG;
if (lev != styler.LevelAt(lineCurrent)) {
styler.SetLevel(lineCurrent, lev);
}
lineCurrent++;
levelPrev = levelCurrent;
visibleChars = 0;
}
if (!isspacechar(ch))
visibleChars++;
}
// Fill in the real level of the next line, keeping the current flags as they will be filled in later
const int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
styler.SetLevel(lineCurrent, levelPrev | flagsNext);
}
LexerModule lmBash(SCLEX_BASH, LexerBash::LexerFactoryBash, "bash", bashWordListDesc);