You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
387 lines
13 KiB
387 lines
13 KiB
// Scintilla source code edit control |
|
/** @file LexCOBOL.cxx |
|
** Lexer for COBOL |
|
** Based on LexPascal.cxx |
|
** Written by Laurent le Tynevez |
|
** Updated by Simon Steele <s.steele@pnotepad.org> September 2002 |
|
** Updated by Mathias Rauen <scite@madshi.net> May 2003 (Delphi adjustments) |
|
** Updated by Rod Falck, Aug 2006 Converted to COBOL |
|
**/ |
|
|
|
#include <stdlib.h> |
|
#include <string.h> |
|
#include <stdio.h> |
|
#include <stdarg.h> |
|
#include <assert.h> |
|
#include <ctype.h> |
|
|
|
#include <string> |
|
#include <string_view> |
|
|
|
#include "ILexer.h" |
|
#include "Scintilla.h" |
|
#include "SciLexer.h" |
|
|
|
#include "WordList.h" |
|
#include "LexAccessor.h" |
|
#include "Accessor.h" |
|
#include "StyleContext.h" |
|
#include "CharacterSet.h" |
|
#include "LexerModule.h" |
|
|
|
using namespace Lexilla; |
|
|
|
#define IN_DIVISION 0x01 |
|
#define IN_DECLARATIVES 0x02 |
|
#define IN_SECTION 0x04 |
|
#define IN_PARAGRAPH 0x08 |
|
#define IN_FLAGS 0xF |
|
#define NOT_HEADER 0x10 |
|
|
|
inline bool isCOBOLoperator(char ch) |
|
{ |
|
return isoperator(ch); |
|
} |
|
|
|
inline bool isCOBOLwordchar(char ch) |
|
{ |
|
return IsASCII(ch) && (isalnum(ch) || ch == '-'); |
|
|
|
} |
|
|
|
inline bool isCOBOLwordstart(char ch) |
|
{ |
|
return IsASCII(ch) && isalnum(ch); |
|
} |
|
|
|
static int CountBits(int nBits) |
|
{ |
|
int count = 0; |
|
for (int i = 0; i < 32; ++i) |
|
{ |
|
count += nBits & 1; |
|
nBits >>= 1; |
|
} |
|
return count; |
|
} |
|
|
|
static void getRange(Sci_PositionU start, |
|
Sci_PositionU end, |
|
Accessor &styler, |
|
char *s, |
|
Sci_PositionU len) { |
|
Sci_PositionU i = 0; |
|
while ((i < end - start + 1) && (i < len-1)) { |
|
s[i] = static_cast<char>(tolower(styler[start + i])); |
|
i++; |
|
} |
|
s[i] = '\0'; |
|
} |
|
|
|
static void ColourTo(Accessor &styler, Sci_PositionU end, unsigned int attr) { |
|
styler.ColourTo(end, attr); |
|
} |
|
|
|
|
|
static int classifyWordCOBOL(Sci_PositionU start, Sci_PositionU end, /*WordList &keywords*/WordList *keywordlists[], Accessor &styler, int nContainment, bool *bAarea) { |
|
int ret = 0; |
|
|
|
char s[100]; |
|
s[0] = '\0'; |
|
s[1] = '\0'; |
|
getRange(start, end, styler, s, sizeof(s)); |
|
|
|
int chAttr = SCE_C_IDENTIFIER; |
|
if (isdigit(s[0]) || (s[0] == '.') || (s[0] == 'v')) { |
|
chAttr = SCE_C_NUMBER; |
|
char *p = s + 1; |
|
while (*p) { |
|
if ((!isdigit(*p) && (*p) != 'v') && isCOBOLwordchar(*p)) { |
|
chAttr = SCE_C_IDENTIFIER; |
|
break; |
|
} |
|
++p; |
|
} |
|
} |
|
if (chAttr == SCE_C_IDENTIFIER) { |
|
WordList& a_keywords = *keywordlists[0]; |
|
WordList& b_keywords = *keywordlists[1]; |
|
WordList& c_keywords = *keywordlists[2]; |
|
|
|
if (a_keywords.InList(s)) { |
|
chAttr = SCE_C_WORD; |
|
} |
|
else if (b_keywords.InList(s)) { |
|
chAttr = SCE_C_WORD2; |
|
} |
|
else if (c_keywords.InList(s)) { |
|
chAttr = SCE_C_UUID; |
|
} |
|
} |
|
if (*bAarea) { |
|
if (strcmp(s, "division") == 0) { |
|
ret = IN_DIVISION; |
|
// we've determined the containment, anything else is just ignored for those purposes |
|
*bAarea = false; |
|
} else if (strcmp(s, "declaratives") == 0) { |
|
ret = IN_DIVISION | IN_DECLARATIVES; |
|
if (nContainment & IN_DECLARATIVES) |
|
ret |= NOT_HEADER | IN_SECTION; |
|
// we've determined the containment, anything else is just ignored for those purposes |
|
*bAarea = false; |
|
} else if (strcmp(s, "section") == 0) { |
|
ret = (nContainment &~ IN_PARAGRAPH) | IN_SECTION; |
|
// we've determined the containment, anything else is just ignored for those purposes |
|
*bAarea = false; |
|
} else if (strcmp(s, "end") == 0 && (nContainment & IN_DECLARATIVES)) { |
|
ret = IN_DIVISION | IN_DECLARATIVES | IN_SECTION | NOT_HEADER; |
|
} else { |
|
ret = nContainment | IN_PARAGRAPH; |
|
} |
|
} |
|
ColourTo(styler, end, chAttr); |
|
return ret; |
|
} |
|
|
|
static void ColouriseCOBOLDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], |
|
Accessor &styler) { |
|
|
|
styler.StartAt(startPos); |
|
|
|
int state = initStyle; |
|
if (state == SCE_C_CHARACTER) // Does not leak onto next line |
|
state = SCE_C_DEFAULT; |
|
char chPrev = ' '; |
|
char chNext = styler[startPos]; |
|
Sci_PositionU lengthDoc = startPos + length; |
|
|
|
int nContainment; |
|
|
|
Sci_Position currentLine = styler.GetLine(startPos); |
|
if (currentLine > 0) { |
|
styler.SetLineState(currentLine, styler.GetLineState(currentLine-1)); |
|
nContainment = styler.GetLineState(currentLine); |
|
nContainment &= ~NOT_HEADER; |
|
} else { |
|
styler.SetLineState(currentLine, 0); |
|
nContainment = 0; |
|
} |
|
|
|
styler.StartSegment(startPos); |
|
bool bNewLine = true; |
|
bool bAarea = !isspacechar(chNext); |
|
int column = 0; |
|
for (Sci_PositionU i = startPos; i < lengthDoc; i++) { |
|
char ch = chNext; |
|
|
|
chNext = styler.SafeGetCharAt(i + 1); |
|
|
|
++column; |
|
|
|
if (bNewLine) { |
|
column = 0; |
|
} |
|
if (column <= 1 && !bAarea) { |
|
bAarea = !isspacechar(ch); |
|
} |
|
bool bSetNewLine = false; |
|
if ((ch == '\r' && chNext != '\n') || (ch == '\n')) { |
|
// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix) |
|
// Avoid triggering two times on Dos/Win |
|
// End of line |
|
if (state == SCE_C_CHARACTER) { |
|
ColourTo(styler, i, state); |
|
state = SCE_C_DEFAULT; |
|
} |
|
styler.SetLineState(currentLine, nContainment); |
|
currentLine++; |
|
bSetNewLine = true; |
|
if (nContainment & NOT_HEADER) |
|
nContainment &= ~(NOT_HEADER | IN_DECLARATIVES | IN_SECTION); |
|
} |
|
|
|
if (styler.IsLeadByte(ch)) { |
|
chNext = styler.SafeGetCharAt(i + 2); |
|
chPrev = ' '; |
|
i += 1; |
|
continue; |
|
} |
|
|
|
if (state == SCE_C_DEFAULT) { |
|
if (isCOBOLwordstart(ch) || (ch == '$' && IsASCII(chNext) && isalpha(chNext))) { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_IDENTIFIER; |
|
} else if (column == 6 && (ch == '*' || ch == '/')) { |
|
// Cobol comment line: asterisk in column 7. |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_COMMENTLINE; |
|
} else if (ch == '*' && chNext == '>') { |
|
// Cobol inline comment: asterisk, followed by greater than. |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_COMMENTLINE; |
|
} else if (column == 0 && ch == '*' && chNext != '*') { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_COMMENTLINE; |
|
} else if (column == 0 && ch == '/' && chNext != '*') { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_COMMENTLINE; |
|
} else if (column == 0 && ch == '*' && chNext == '*') { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_COMMENTDOC; |
|
} else if (column == 0 && ch == '/' && chNext == '*') { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_COMMENTDOC; |
|
} else if (ch == '"') { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_STRING; |
|
} else if (ch == '\'') { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_CHARACTER; |
|
} else if (ch == '?' && column == 0) { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_PREPROCESSOR; |
|
} else if (isCOBOLoperator(ch)) { |
|
ColourTo(styler, i-1, state); |
|
ColourTo(styler, i, SCE_C_OPERATOR); |
|
} |
|
} else if (state == SCE_C_IDENTIFIER) { |
|
if (!isCOBOLwordchar(ch)) { |
|
int lStateChange = classifyWordCOBOL(styler.GetStartSegment(), i - 1, keywordlists, styler, nContainment, &bAarea); |
|
|
|
if(lStateChange != 0) { |
|
styler.SetLineState(currentLine, lStateChange); |
|
nContainment = lStateChange; |
|
} |
|
|
|
state = SCE_C_DEFAULT; |
|
chNext = styler.SafeGetCharAt(i + 1); |
|
if (column == 6 && (ch == '*' || ch == '/')) { |
|
state = SCE_C_COMMENTLINE; |
|
} else if (ch == '"') { |
|
state = SCE_C_STRING; |
|
} else if (ch == '\'') { |
|
state = SCE_C_CHARACTER; |
|
} else if (isCOBOLoperator(ch)) { |
|
ColourTo(styler, i, SCE_C_OPERATOR); |
|
} |
|
} |
|
} else { |
|
if (state == SCE_C_PREPROCESSOR) { |
|
if ((ch == '\r' || ch == '\n') && !(chPrev == '\\' || chPrev == '\r')) { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_DEFAULT; |
|
} |
|
} else if (state == SCE_C_COMMENT) { |
|
if (ch == '\r' || ch == '\n') { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_DEFAULT; |
|
} |
|
} else if (state == SCE_C_COMMENTDOC) { |
|
if (ch == '\r' || ch == '\n') { |
|
if (((i > styler.GetStartSegment() + 2) || ( |
|
(initStyle == SCE_C_COMMENTDOC) && |
|
(styler.GetStartSegment() == static_cast<Sci_PositionU>(startPos))))) { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_DEFAULT; |
|
} |
|
} |
|
} else if (state == SCE_C_COMMENTLINE) { |
|
if (ch == '\r' || ch == '\n') { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_DEFAULT; |
|
} |
|
} else if (state == SCE_C_STRING) { |
|
if (ch == '"') { |
|
ColourTo(styler, i, state); |
|
state = SCE_C_DEFAULT; |
|
} else if (ch == '\r' || ch == '\n') { |
|
ColourTo(styler, i-1, state); |
|
state = SCE_C_DEFAULT; |
|
} |
|
} else if (state == SCE_C_CHARACTER) { |
|
if (ch == '\'') { |
|
ColourTo(styler, i, state); |
|
state = SCE_C_DEFAULT; |
|
} |
|
} |
|
} |
|
chPrev = ch; |
|
bNewLine = bSetNewLine; |
|
if (bNewLine) |
|
{ |
|
bAarea = false; |
|
} |
|
} |
|
ColourTo(styler, lengthDoc - 1, state); |
|
} |
|
|
|
static void FoldCOBOLDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], |
|
Accessor &styler) { |
|
bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; |
|
Sci_PositionU endPos = startPos + length; |
|
int visibleChars = 0; |
|
Sci_Position lineCurrent = styler.GetLine(startPos); |
|
int levelPrev = lineCurrent > 0 ? styler.LevelAt(lineCurrent - 1) & SC_FOLDLEVELNUMBERMASK : 0xFFF; |
|
char chNext = styler[startPos]; |
|
|
|
bool bNewLine = true; |
|
bool bAarea = !isspacechar(chNext); |
|
int column = 0; |
|
bool bComment = false; |
|
for (Sci_PositionU i = startPos; i < endPos; i++) { |
|
char ch = chNext; |
|
chNext = styler.SafeGetCharAt(i + 1); |
|
++column; |
|
|
|
if (bNewLine) { |
|
column = 0; |
|
bComment = (ch == '*' || ch == '/' || ch == '?'); |
|
} |
|
if (column <= 1 && !bAarea) { |
|
bAarea = !isspacechar(ch); |
|
} |
|
bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
|
if (atEOL) { |
|
int nContainment = styler.GetLineState(lineCurrent); |
|
int lev = CountBits(nContainment & IN_FLAGS) | SC_FOLDLEVELBASE; |
|
if (bAarea && !bComment) |
|
--lev; |
|
if (visibleChars == 0 && foldCompact) |
|
lev |= SC_FOLDLEVELWHITEFLAG; |
|
if ((bAarea) && (visibleChars > 0) && !(nContainment & NOT_HEADER) && !bComment) |
|
lev |= SC_FOLDLEVELHEADERFLAG; |
|
if (lev != styler.LevelAt(lineCurrent)) { |
|
styler.SetLevel(lineCurrent, lev); |
|
} |
|
if ((lev & SC_FOLDLEVELNUMBERMASK) <= (levelPrev & SC_FOLDLEVELNUMBERMASK)) { |
|
// this level is at the same level or less than the previous line |
|
// therefore these is nothing for the previous header to collapse, so remove the header |
|
styler.SetLevel(lineCurrent - 1, levelPrev & ~SC_FOLDLEVELHEADERFLAG); |
|
} |
|
levelPrev = lev; |
|
visibleChars = 0; |
|
bAarea = false; |
|
bNewLine = true; |
|
lineCurrent++; |
|
} else { |
|
bNewLine = false; |
|
} |
|
|
|
|
|
if (!isspacechar(ch)) |
|
visibleChars++; |
|
} |
|
|
|
// Fill in the real level of the next line, keeping the current flags as they will be filled in later |
|
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; |
|
styler.SetLevel(lineCurrent, levelPrev | flagsNext); |
|
} |
|
|
|
static const char * const COBOLWordListDesc[] = { |
|
"A Keywords", |
|
"B Keywords", |
|
"Extended Keywords", |
|
0 |
|
}; |
|
|
|
LexerModule lmCOBOL(SCLEX_COBOL, ColouriseCOBOLDoc, "COBOL", FoldCOBOLDoc, COBOLWordListDesc);
|
|
|