You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
405 lines
9.9 KiB
405 lines
9.9 KiB
// Scintilla Lexer for EDIFACT |
|
// @file LexEDIFACT.cxx |
|
// Written by Iain Clarke, IMCSoft & Inobiz AB. |
|
// EDIFACT documented here: https://www.unece.org/cefact/edifact/welcome.html |
|
// and more readably here: https://en.wikipedia.org/wiki/EDIFACT |
|
// This code is subject to the same license terms as the rest of the scintilla project: |
|
// The License.txt file describes the conditions under which this software may be distributed. |
|
// |
|
|
|
// Header order must match order in scripts/HeaderOrder.txt |
|
#include <cstdlib> |
|
#include <cassert> |
|
#include <cstring> |
|
#include <cctype> |
|
|
|
#include <string> |
|
#include <string_view> |
|
|
|
#include "ILexer.h" |
|
#include "Scintilla.h" |
|
#include "SciLexer.h" |
|
|
|
#include "LexAccessor.h" |
|
#include "LexerModule.h" |
|
#include "DefaultLexer.h" |
|
|
|
using namespace Scintilla; |
|
using namespace Lexilla; |
|
|
|
class LexerEDIFACT : public DefaultLexer |
|
{ |
|
public: |
|
LexerEDIFACT(); |
|
virtual ~LexerEDIFACT() {} // virtual destructor, as we inherit from ILexer |
|
|
|
static ILexer5 *Factory() { |
|
return new LexerEDIFACT; |
|
} |
|
|
|
int SCI_METHOD Version() const override |
|
{ |
|
return lvRelease5; |
|
} |
|
void SCI_METHOD Release() override |
|
{ |
|
delete this; |
|
} |
|
|
|
const char * SCI_METHOD PropertyNames() override |
|
{ |
|
return "fold\nlexer.edifact.highlight.un.all"; |
|
} |
|
int SCI_METHOD PropertyType(const char *) override |
|
{ |
|
return SC_TYPE_BOOLEAN; // Only one property! |
|
} |
|
const char * SCI_METHOD DescribeProperty(const char *name) override |
|
{ |
|
if (!strcmp(name, "fold")) |
|
return "Whether to apply folding to document or not"; |
|
if (!strcmp(name, "lexer.edifact.highlight.un.all")) |
|
return "Whether to apply UN* highlighting to all UN segments, or just to UNH"; |
|
return NULL; |
|
} |
|
|
|
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override |
|
{ |
|
if (!strcmp(key, "fold")) |
|
{ |
|
m_bFold = strcmp(val, "0") ? true : false; |
|
return 0; |
|
} |
|
if (!strcmp(key, "lexer.edifact.highlight.un.all")) // GetProperty |
|
{ |
|
m_bHighlightAllUN = strcmp(val, "0") ? true : false; |
|
return 0; |
|
} |
|
return -1; |
|
} |
|
|
|
const char * SCI_METHOD PropertyGet(const char *key) override |
|
{ |
|
m_lastPropertyValue = ""; |
|
if (!strcmp(key, "fold")) |
|
{ |
|
m_lastPropertyValue = m_bFold ? "1" : "0"; |
|
} |
|
if (!strcmp(key, "lexer.edifact.highlight.un.all")) // GetProperty |
|
{ |
|
m_lastPropertyValue = m_bHighlightAllUN ? "1" : "0"; |
|
} |
|
return m_lastPropertyValue.c_str(); |
|
} |
|
|
|
const char * SCI_METHOD DescribeWordListSets() override |
|
{ |
|
return NULL; |
|
} |
|
Sci_Position SCI_METHOD WordListSet(int, const char *) override |
|
{ |
|
return -1; |
|
} |
|
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
|
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
|
void * SCI_METHOD PrivateCall(int, void *) override |
|
{ |
|
return NULL; |
|
} |
|
|
|
protected: |
|
Sci_Position InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength); |
|
Sci_Position FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const; |
|
Sci_Position ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const; |
|
int DetectSegmentHeader(char SegmentHeader[3]) const; |
|
|
|
bool m_bFold; |
|
|
|
// property lexer.edifact.highlight.un.all |
|
// Set to 0 to highlight only UNA segments, or 1 to highlight all UNx segments. |
|
bool m_bHighlightAllUN; |
|
|
|
char m_chComponent; |
|
char m_chData; |
|
char m_chDecimal; |
|
char m_chRelease; |
|
char m_chSegment; |
|
|
|
std::string m_lastPropertyValue; |
|
}; |
|
|
|
LexerModule lmEDIFACT(SCLEX_EDIFACT, LexerEDIFACT::Factory, "edifact"); |
|
|
|
/////////////////////////////////////////////////////////////////////////////// |
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////////////////// |
|
|
|
LexerEDIFACT::LexerEDIFACT() : DefaultLexer("edifact", SCLEX_EDIFACT) |
|
{ |
|
m_bFold = false; |
|
m_bHighlightAllUN = false; |
|
m_chComponent = ':'; |
|
m_chData = '+'; |
|
m_chDecimal = '.'; |
|
m_chRelease = '?'; |
|
m_chSegment = '\''; |
|
} |
|
|
|
void LexerEDIFACT::Lex(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) |
|
{ |
|
Sci_PositionU posFinish = startPos + length; |
|
InitialiseFromUNA(pAccess, posFinish); |
|
|
|
// Look backwards for a ' or a document beginning |
|
Sci_PositionU posCurrent = FindPreviousEnd(pAccess, startPos); |
|
// And jump past the ' if this was not the beginning of the document |
|
if (posCurrent != 0) |
|
posCurrent++; |
|
|
|
// Style buffer, so we're not issuing loads of notifications |
|
LexAccessor styler (pAccess); |
|
pAccess->StartStyling(posCurrent); |
|
styler.StartSegment(posCurrent); |
|
Sci_Position posSegmentStart = -1; |
|
|
|
while ((posCurrent < posFinish) && (posSegmentStart == -1)) |
|
{ |
|
posCurrent = ForwardPastWhitespace(pAccess, posCurrent, posFinish); |
|
// Mark whitespace as default |
|
styler.ColourTo(posCurrent - 1, SCE_EDI_DEFAULT); |
|
if (posCurrent >= posFinish) |
|
break; |
|
|
|
// Does is start with 3 charaters? ie, UNH |
|
char SegmentHeader[4] = { 0 }; |
|
pAccess->GetCharRange(SegmentHeader, posCurrent, 3); |
|
|
|
int SegmentStyle = DetectSegmentHeader(SegmentHeader); |
|
if (SegmentStyle == SCE_EDI_BADSEGMENT) |
|
break; |
|
if (SegmentStyle == SCE_EDI_UNA) |
|
{ |
|
posCurrent += 9; |
|
styler.ColourTo(posCurrent - 1, SCE_EDI_UNA); // UNA |
|
continue; |
|
} |
|
posSegmentStart = posCurrent; |
|
posCurrent += 3; |
|
|
|
styler.ColourTo(posCurrent - 1, SegmentStyle); // UNH etc |
|
|
|
// Colour in the rest of the segment |
|
for (char c; posCurrent < posFinish; posCurrent++) |
|
{ |
|
pAccess->GetCharRange(&c, posCurrent, 1); |
|
|
|
if (c == m_chRelease) // ? escape character, check first, in case of ?' |
|
posCurrent++; |
|
else if (c == m_chSegment) // ' |
|
{ |
|
// Make sure the whole segment is on one line. styler won't let us go back in time, so we'll settle for marking the ' as bad. |
|
Sci_Position lineSegmentStart = pAccess->LineFromPosition(posSegmentStart); |
|
Sci_Position lineSegmentEnd = pAccess->LineFromPosition(posCurrent); |
|
if (lineSegmentStart == lineSegmentEnd) |
|
styler.ColourTo(posCurrent, SCE_EDI_SEGMENTEND); |
|
else |
|
styler.ColourTo(posCurrent, SCE_EDI_BADSEGMENT); |
|
posSegmentStart = -1; |
|
posCurrent++; |
|
break; |
|
} |
|
else if (c == m_chComponent) // : |
|
styler.ColourTo(posCurrent, SCE_EDI_SEP_COMPOSITE); |
|
else if (c == m_chData) // + |
|
styler.ColourTo(posCurrent, SCE_EDI_SEP_ELEMENT); |
|
else |
|
styler.ColourTo(posCurrent, SCE_EDI_DEFAULT); |
|
} |
|
} |
|
styler.Flush(); |
|
|
|
if (posSegmentStart == -1) |
|
return; |
|
|
|
pAccess->StartStyling(posSegmentStart); |
|
pAccess->SetStyleFor(posFinish - posSegmentStart, SCE_EDI_BADSEGMENT); |
|
} |
|
|
|
void LexerEDIFACT::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) |
|
{ |
|
if (!m_bFold) |
|
return; |
|
|
|
Sci_PositionU endPos = startPos + length; |
|
startPos = FindPreviousEnd(pAccess, startPos); |
|
char c; |
|
char SegmentHeader[4] = { 0 }; |
|
|
|
bool AwaitingSegment = true; |
|
Sci_PositionU currLine = pAccess->LineFromPosition(startPos); |
|
int levelCurrentStyle = SC_FOLDLEVELBASE; |
|
if (currLine > 0) |
|
levelCurrentStyle = pAccess->GetLevel(currLine - 1); // bottom 12 bits are level |
|
int indentCurrent = levelCurrentStyle & SC_FOLDLEVELNUMBERMASK; |
|
int indentNext = indentCurrent; |
|
|
|
while (startPos < endPos) |
|
{ |
|
pAccess->GetCharRange(&c, startPos, 1); |
|
switch (c) |
|
{ |
|
case '\t': |
|
case '\r': |
|
case ' ': |
|
startPos++; |
|
continue; |
|
case '\n': |
|
currLine = pAccess->LineFromPosition(startPos); |
|
pAccess->SetLevel(currLine, levelCurrentStyle | indentCurrent); |
|
startPos++; |
|
levelCurrentStyle = SC_FOLDLEVELBASE; |
|
indentCurrent = indentNext; |
|
continue; |
|
} |
|
if (c == m_chRelease) |
|
{ |
|
startPos += 2; |
|
continue; |
|
} |
|
if (c == m_chSegment) |
|
{ |
|
AwaitingSegment = true; |
|
startPos++; |
|
continue; |
|
} |
|
|
|
if (!AwaitingSegment) |
|
{ |
|
startPos++; |
|
continue; |
|
} |
|
|
|
// Segment! |
|
pAccess->GetCharRange(SegmentHeader, startPos, 3); |
|
if (SegmentHeader[0] != 'U' || SegmentHeader[1] != 'N') |
|
{ |
|
startPos++; |
|
continue; |
|
} |
|
|
|
AwaitingSegment = false; |
|
switch (SegmentHeader[2]) |
|
{ |
|
case 'H': |
|
case 'G': |
|
indentNext++; |
|
levelCurrentStyle = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG; |
|
break; |
|
|
|
case 'T': |
|
case 'E': |
|
if (indentNext > 0) |
|
indentNext--; |
|
break; |
|
} |
|
|
|
startPos += 3; |
|
} |
|
} |
|
|
|
Sci_Position LexerEDIFACT::InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength) |
|
{ |
|
MaxLength -= 9; // drop 9 chars, to give us room for UNA:+.? ' |
|
|
|
Sci_PositionU startPos = 0; |
|
startPos += ForwardPastWhitespace(pAccess, 0, MaxLength); |
|
if (startPos < MaxLength) |
|
{ |
|
char bufUNA[9]; |
|
pAccess->GetCharRange(bufUNA, startPos, 9); |
|
|
|
// Check it's UNA segment |
|
if (!memcmp(bufUNA, "UNA", 3)) |
|
{ |
|
m_chComponent = bufUNA[3]; |
|
m_chData = bufUNA[4]; |
|
m_chDecimal = bufUNA[5]; |
|
m_chRelease = bufUNA[6]; |
|
// bufUNA [7] should be space - reserved. |
|
m_chSegment = bufUNA[8]; |
|
|
|
return 0; // success! |
|
} |
|
} |
|
|
|
// We failed to find a UNA, so drop to defaults |
|
m_chComponent = ':'; |
|
m_chData = '+'; |
|
m_chDecimal = '.'; |
|
m_chRelease = '?'; |
|
m_chSegment = '\''; |
|
|
|
return -1; |
|
} |
|
|
|
Sci_Position LexerEDIFACT::ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const |
|
{ |
|
char c; |
|
|
|
while (startPos < MaxLength) |
|
{ |
|
pAccess->GetCharRange(&c, startPos, 1); |
|
switch (c) |
|
{ |
|
case '\t': |
|
case '\r': |
|
case '\n': |
|
case ' ': |
|
break; |
|
default: |
|
return startPos; |
|
} |
|
|
|
startPos++; |
|
} |
|
|
|
return MaxLength; |
|
} |
|
|
|
int LexerEDIFACT::DetectSegmentHeader(char SegmentHeader[3]) const |
|
{ |
|
if ( |
|
SegmentHeader[0] < 'A' || SegmentHeader[0] > 'Z' || |
|
SegmentHeader[1] < 'A' || SegmentHeader[1] > 'Z' || |
|
SegmentHeader[2] < 'A' || SegmentHeader[2] > 'Z') |
|
return SCE_EDI_BADSEGMENT; |
|
|
|
if (!memcmp(SegmentHeader, "UNA", 3)) |
|
return SCE_EDI_UNA; |
|
|
|
if (m_bHighlightAllUN && !memcmp(SegmentHeader, "UN", 2)) |
|
return SCE_EDI_UNH; |
|
else if (!memcmp(SegmentHeader, "UNH", 3)) |
|
return SCE_EDI_UNH; |
|
else if (!memcmp(SegmentHeader, "UNG", 3)) |
|
return SCE_EDI_UNH; |
|
|
|
return SCE_EDI_SEGMENTSTART; |
|
} |
|
|
|
// Look backwards for a ' or a document beginning |
|
Sci_Position LexerEDIFACT::FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const |
|
{ |
|
for (char c; startPos > 0; startPos--) |
|
{ |
|
pAccess->GetCharRange(&c, startPos, 1); |
|
if (c == m_chSegment) |
|
return startPos; |
|
} |
|
// We didn't find a ', so just go with the beginning |
|
return 0; |
|
} |
|
|
|
|
|
|