#!/usr/bin/env python3 # LexillaData.py - implemented 2013 by Neil Hodgson neilh@scintilla.org # Released to the public domain. # Requires FileGenerator from Scintilla so scintilla must be a peer directory of lexilla. """ Common code used by Lexilla and SciTE for source file regeneration. """ # The LexillaData object exposes information about Lexilla as properties: # Version properties # version # versionDotted # versionCommad # # Date last modified # dateModified # yearModified # mdyModified # dmyModified # myModified # # Information about lexers and properties defined in lexers # lexFiles # sorted list of lexer file stems like LexAbaqus # lexerModules # sorted list of module names like lmAbaqus # lexerProperties # sorted list of lexer properties like lexer.bash.command.substitution # propertyDocuments # dictionary of property documentation { name: document string } # like lexer.bash.special.parameter: Set shell (default is Bash) special parameters. # sclexFromName # dictionary of SCLEX_* IDs { name: SCLEX_ID } like ave: SCLEX_AVE # fileFromSclex # dictionary of file names { SCLEX_ID: file name } like SCLEX_AU3: LexAU3.cxx # lexersXcode # dictionary of project file UUIDs { file name: [build UUID, file UUID] } # like LexTCL: [28BA733B24E34D9700272C2D,28BA72C924E34D9100272C2D] # credits # list of names of contributors like Atsuo Ishimoto # This file can be run to see the data it provides. # Requires Python 3.6 or later import datetime, pathlib, sys, textwrap neutralEncoding = "iso-8859-1" # Each byte value is valid in iso-8859-1 def ReadFileAsList(path): """Read all the lnes in the file and return as a list of strings without line ends. """ with path.open(encoding="utf-8") as f: return [line.rstrip('\n') for line in f] def FindModules(lexFile): """ Return a list of modules found within a lexer implementation file. """ modules = [] partLine = "" with lexFile.open(encoding=neutralEncoding) as f: lineNum = 0 for line in f.readlines(): lineNum += 1 line = line.rstrip() if partLine or line.startswith("LexerModule"): if ")" in line: line = partLine + line original = line line = line.replace("(", " ") line = line.replace(")", " ") line = line.replace(",", " ") parts = line.split() lexerName = parts[4] if not (lexerName.startswith('"') and lexerName.endswith('"')): print(f"{lexFile}:{lineNum}: Bad LexerModule statement:\n{original}") sys.exit(1) lexerName = lexerName.strip('"') modules.append([parts[1], parts[2], lexerName]) partLine = "" else: partLine = partLine + line return modules def FindSectionInList(lines, markers): """Find a section defined by an initial start marker, an optional secondary marker and an end marker. The section is between the secondary/initial start and the end. Report as a slice object so the section can be extracted or replaced. Raises an exception if the markers can't be found. Currently only used for Xcode project files. """ start = -1 end = -1 state = 0 for i, line in enumerate(lines): if markers[0] in line: if markers[1]: state = 1 else: start = i+1 state = 2 elif state == 1: if markers[1] in line: start = i+1 state = 2 elif state == 2: if markers[2] in line: end = i state = 3 # Check that section was found if start == -1: raise ValueError("Could not find start marker(s) |" + markers[0] + "|" + markers[1] + "|") if end == -1: raise ValueError("Could not find end marker " + markers[2]) return slice(start, end) def FindLexersInXcode(xCodeProject): """ Return a dictionary { file name: [build UUID, file UUID] } of lexers in Xcode project. """ lines = ReadFileAsList(xCodeProject) # PBXBuildFile section is a list of all buildable files in the project so extract the file # basename and its build and file IDs uidsOfBuild = {} markersPBXBuildFile = ["Begin PBXBuildFile section", "", "End PBXBuildFile section"] for buildLine in lines[FindSectionInList(lines, markersPBXBuildFile)]: # Occurs for each file in the build. Find the UIDs used for the file. #\t\t[0-9A-F]+ /* [a-zA-Z]+.cxx in sources */ = {isa = PBXBuildFile; fileRef = [0-9A-F]+ /* [a-zA-Z]+ */; }; pieces = buildLine.split() uid1 = pieces[0] filename = pieces[2].split(".")[0] uid2 = pieces[12] uidsOfBuild[filename] = [uid1, uid2] # PBXGroup section contains the folders (Lexilla, Lexers, LexLib, ...) so is used to find the lexers lexers = {} markersLexers = ["/* Lexers */ =", "children", ");"] for lexerLine in lines[FindSectionInList(lines, markersLexers)]: #\t\t\t\t[0-9A-F]+ /* [a-zA-Z]+.cxx */, uid, _, rest = lexerLine.partition("/* ") uid = uid.strip() lexer, _, _ = rest.partition(".") lexers[lexer] = uidsOfBuild[lexer] return lexers # Properties that start with lexer. or fold. are automatically found but there are some # older properties that don't follow this pattern so must be explicitly listed. knownIrregularProperties = [ "fold", "styling.within.preprocessor", "tab.timmy.whinge.level", "asp.default.language", "html.tags.case.sensitive", "ps.level", "ps.tokenize", "sql.backslash.escapes", "nsis.uservars", "nsis.ignorecase" ] def FindProperties(lexFile): """ Return a set of property names in a lexer implementation file. """ properties = set() with open(lexFile, encoding=neutralEncoding) as f: for s in f.readlines(): if ("GetProperty" in s or "DefineProperty" in s) and "\"" in s: s = s.strip() if not s.startswith("//"): # Drop comments propertyName = s.split("\"")[1] if propertyName.lower() == propertyName: # Only allow lower case property names if propertyName in knownIrregularProperties or \ propertyName.startswith("fold.") or \ propertyName.startswith("lexer."): properties.add(propertyName) return properties def FindPropertyDocumentation(lexFile): """ Return a dictionary { name: document string } of property documentation in a lexer. """ documents = {} with lexFile.open(encoding=neutralEncoding) as f: name = "" for line in f.readlines(): line = line.strip() if "// property " in line: propertyName = line.split()[2] if propertyName.lower() == propertyName: # Only allow lower case property names name = propertyName documents[name] = "" elif "DefineProperty" in line and "\"" in line: propertyName = line.split("\"")[1] if propertyName.lower() == propertyName: # Only allow lower case property names name = propertyName documents[name] = "" elif name: if line.startswith("//"): if documents[name]: documents[name] += " " documents[name] += line[2:].strip() elif line.startswith("\""): line = line[1:].strip() if line.endswith(";"): line = line[:-1].strip() if line.endswith(")"): line = line[:-1].strip() if line.endswith("\""): line = line[:-1] # Fix escaped double quotes line = line.replace("\\\"", "\"") documents[name] += line else: name = "" for name in list(documents.keys()): if documents[name] == "": del documents[name] return documents def FindCredits(historyFile): """ Return a list of contributors in a history file. """ creditList = [] stage = 0 with historyFile.open(encoding="utf-8") as f: for line in f.readlines(): line = line.strip() if stage == 0 and line == "