notepad-plus-plus/scintilla/scripts/CheckMentioned.py

#!/usr/bin/env python3
# CheckMentioned.py
# Find all the symbols in scintilla/include/Scintilla.h and check if they
# are mentioned in scintilla/doc/ScintillaDoc.html.
# Requires Python 2.7 or later

import re, string, sys

srcRoot = "../.."

sys.path.append(srcRoot + "/scintilla/scripts")

import Face
import ScintillaData

uninteresting = {
	"SCINTILLA_H", "SCI_START", "SCI_LEXER_START", "SCI_OPTIONAL_START",
	# These archaic names are #defined to the Sci_ prefixed modern equivalents.
	# They are not documented so they are not used in new code.
	"CharacterRange", "TextRange", "TextToFind", "RangeToFormat", "NotifyHeader",
}

incFileName = srcRoot + "/scintilla/include/Scintilla.h"
docFileName = srcRoot + "/scintilla/doc/ScintillaDoc.html"
try:	# Old Python
	identCharacters = "_" + string.letters + string.digits
except AttributeError:	# Python 3.x
	identCharacters = "_" + string.ascii_letters + string.digits

# Convert all punctuation characters except '_' into spaces.
def depunctuate(s):
	d = ""
	for ch in s:
		if ch in identCharacters:
			d = d + ch
		else:
			d = d + " "
	return d

symbols = {}
with open(incFileName, "rt") as incFile:
	for line in incFile.readlines():
		if line.startswith("#define"):
			identifier = line.split()[1]
			symbols[identifier] = 0

with open(docFileName, "rt") as docFile:
	for line in docFile.readlines():
		for word in depunctuate(line).split():
			if word in symbols.keys():
				symbols[word] = 1

def convertIFaceTypeToC(t):
	if t == "keymod":
		return "int "
	elif t == "string":
		return "const char *"
	elif t == "stringresult":
		return "char *"
	elif t == "cells":
		return "cell *"
	elif t == "textrange":
		return "Sci_TextRange *"
	elif t == "findtext":
		return "Sci_TextToFind *"
	elif t == "formatrange":
		return "Sci_RangeToFormat *"
	elif Face.IsEnumeration(t):
		return "int "
	return t + " "

def makeParm(t, n, v):
	return (convertIFaceTypeToC(t) + n).rstrip()

def makeRet(params):
	retType = params["ReturnType"]
	if retType in ["void", "string", "stringresult"]:
		retType = ""
	if retType:
		retType = " &rarr; " + retType

	return retType

def makeSig(params):
	p1 = makeParm(params["Param1Type"], params["Param1Name"], params["Param1Value"])
	p2 = makeParm(params["Param2Type"], params["Param2Name"], params["Param2Value"])

	retType = params["ReturnType"]
	if retType in ["void", "string", "stringresult"]:
		retType = ""
	elif Face.IsEnumeration(retType):
		retType = "int"
	if retType:
		retType = " &rarr; " + retType

	if p1 == "" and p2 == "":
		return retType

	ret = ""
	if p1 == "":
		p1 = "&lt;unused&gt;"
	joiner = ""
	if p2 != "":
		joiner = ", "
	return "(" + p1 + joiner + p2 + ")" + retType

pathIface = srcRoot + "/scintilla/include/Scintilla.iface"

def retrieveFeatures():
	sci = ScintillaData.ScintillaData(srcRoot + "/scintilla/")
	face = Face.Face()
	face.ReadFromFile(pathIface)
	sciToFeature = {}
	sccToValue = { "true":"1", "false":"0", "EN_SETFOCUS":"256", "EN_KILLFOCUS":"512"}
	for name in face.order:
		v = face.features[name]
		if v["FeatureType"] in ["fun", "get", "set"]:
			featureDefineName = "SCI_" + name.upper()
			sciToFeature[featureDefineName] = name
		elif v["FeatureType"] in ["val"]:
			featureDefineName = name.upper()
			sccToValue[featureDefineName] = v["Value"]
		elif v["FeatureType"] in ["evt"]:
			featureDefineName = "SCN_" + name.upper()
			sccToValue[featureDefineName] = v["Value"]
	return (face, sciToFeature, sccToValue)

def flattenSpaces(s):
	return s.replace("\n", " ").replace("  ", " ").replace("  ", " ").replace("  ", " ").strip()

def printCtag(ident, path):
	print(ident.strip() + "\t" + path + "\t" + "/^" + ident + "$/")

showCTags = True

def checkDocumentation():
	with open(docFileName, "rt") as docFile:
		docs = docFile.read()

	face, sciToFeature, sccToValue = retrieveFeatures()

	headers = {}
	definitions = {}

	# Examine header sections which point to definitions
	#<a class="message" href="#SCI_SETLAYOUTCACHE">SCI_SETLAYOUTCACHE(int cacheMode)</a><br />
	dirPattern = re.compile(r'<a class="message" href="#([A-Z0-9_]+)">([A-Z][A-Za-z0-9_() *&;,\n]+)</a>')
	firstWord = re.compile(r'[A-Z0-9_]+')
	for api, sig in re.findall(dirPattern, docs):
		sigApi = re.split('\W+', sig)[0]
		sigFlat = flattenSpaces(sig)
		sigFlat = sigFlat.replace('alpha ', 'int ')
		sigFlat = sigFlat.replace("document *", "int ")
		sigFlat = sigFlat.rstrip()
		if '(' in sigFlat or api.startswith("SCI_"):
			name = sciToFeature[api]
			sigFromFace = api + makeSig(face.features[name])
			if sigFlat != sigFromFace:
				print(sigFlat, "|", sigFromFace)
				if showCTags:
					printCtag(api, docFileName)
				#~ printCtag(" " + name, pathIface)
		if api != sigApi:
			print(sigApi, ";;", sig, ";;", api)
		headers[api] = 1
	# Warns for most keyboard commands so not enabled
	#~ for api in sorted(sciToFeature.keys()):
		#~ if api not in headers:
			#~ print("No header for ", api)

	# Examine  definitions
	#<b id="SCI_SETLAYOUTCACHE">SCI_SETLAYOUTCACHE(int cacheMode)</b>
	defPattern = re.compile(r'<b id="([A-Z_0-9]+)">([A-Z][A-Za-z0-9_() *#\"=<>/&;,\n-]+?)</b>')
	for api, sig in re.findall(defPattern, docs):
		sigFlat = flattenSpaces(sig)
		if '<a' in sigFlat	:	# Remove anchors
			sigFlat = re.sub('<a.*>(.+)</a>', '\\1', sigFlat)
		sigFlat = sigFlat.replace('alpha ', 'int ')
		sigFlat = sigFlat.replace("document *", "int ")
		sigFlat = sigFlat.replace(' NUL-terminated', '')
		sigFlat = sigFlat.rstrip()
		#~ sigFlat = sigFlat.replace(' NUL-terminated', '')
		sigApi = re.split('\W+', sigFlat)[0]
		#~ print(sigFlat, ";;", sig, ";;", api)
		if '(' in sigFlat or api.startswith("SCI_"):
			try:
				name = sciToFeature[api]
				sigFromFace = api + makeSig(face.features[name])
				if sigFlat != sigFromFace:
					print(sigFlat, "|", sigFromFace)
					if showCTags:
						printCtag('="' + api, docFileName)
					#~ printCtag(" " + name, pathIface)
			except KeyError:
				pass		# Feature removed but still has documentation
		if api != sigApi:
			print(sigApi, ";;", sig, ";;", api)
		definitions[api] = 1
	# Warns for most keyboard commands so not enabled
	#~ for api in sorted(sciToFeature.keys()):
		#~ if api not in definitions:
			#~ print("No definition for ", api)

	outName = docFileName.replace("Doc", "Dox")
	with open(outName, "wt") as docFile:
		docFile.write(docs)

	# Examine  constant definitions
	#<code>SC_CARETSTICKY_WHITESPACE</code> (2)
	constPattern = re.compile(r'<code>(\w+)</code> *\((\w+)\)')
	for name, val in re.findall(constPattern, docs):
		try:
			valOfName = sccToValue[name]
			if val != valOfName:
				print(val, "<-", name, ";;", valOfName)
		except KeyError:
			print("***", val, "<-", name)

for identifier in sorted(symbols.keys()):
	if not symbols[identifier] and identifier not in uninteresting:
		print(identifier)

checkDocumentation()
Upgrade Scintilla from 4.1.4 to 4.2.0 Fix #5822 5 years ago			`#!/usr/bin/env python3`
			`# CheckMentioned.py`
			`# Find all the symbols in scintilla/include/Scintilla.h and check if they`
			`# are mentioned in scintilla/doc/ScintillaDoc.html.`
			`# Requires Python 2.7 or later`

			`import re, string, sys`

			`srcRoot = "../.."`

			`sys.path.append(srcRoot + "/scintilla/scripts")`

			`import Face`
			`import ScintillaData`

			`uninteresting = {`
			`"SCINTILLA_H", "SCI_START", "SCI_LEXER_START", "SCI_OPTIONAL_START",`
			`# These archaic names are #defined to the Sci_ prefixed modern equivalents.`
			`# They are not documented so they are not used in new code.`
			`"CharacterRange", "TextRange", "TextToFind", "RangeToFormat", "NotifyHeader",`
			`}`

			`incFileName = srcRoot + "/scintilla/include/Scintilla.h"`
			`docFileName = srcRoot + "/scintilla/doc/ScintillaDoc.html"`
			`try: # Old Python`
			`identCharacters = "_" + string.letters + string.digits`
			`except AttributeError: # Python 3.x`
			`identCharacters = "_" + string.ascii_letters + string.digits`

			`# Convert all punctuation characters except '_' into spaces.`
			`def depunctuate(s):`
			`d = ""`
			`for ch in s:`
			`if ch in identCharacters:`
			`d = d + ch`
			`else:`
			`d = d + " "`
			`return d`

			`symbols = {}`
			`with open(incFileName, "rt") as incFile:`
			`for line in incFile.readlines():`
			`if line.startswith("#define"):`
			`identifier = line.split()[1]`
			`symbols[identifier] = 0`

			`with open(docFileName, "rt") as docFile:`
			`for line in docFile.readlines():`
			`for word in depunctuate(line).split():`
			`if word in symbols.keys():`
			`symbols[word] = 1`

			`def convertIFaceTypeToC(t):`
			`if t == "keymod":`
			`return "int "`
			`elif t == "string":`
			`return "const char *"`
			`elif t == "stringresult":`
			`return "char *"`
			`elif t == "cells":`
			`return "cell *"`
			`elif t == "textrange":`
			`return "Sci_TextRange *"`
			`elif t == "findtext":`
			`return "Sci_TextToFind *"`
			`elif t == "formatrange":`
			`return "Sci_RangeToFormat *"`
			`elif Face.IsEnumeration(t):`
			`return "int "`
			`return t + " "`

			`def makeParm(t, n, v):`
			`return (convertIFaceTypeToC(t) + n).rstrip()`

			`def makeRet(params):`
			`retType = params["ReturnType"]`
			`if retType in ["void", "string", "stringresult"]:`
			`retType = ""`
			`if retType:`
			`retType = " → " + retType`

			`return retType`

			`def makeSig(params):`
			`p1 = makeParm(params["Param1Type"], params["Param1Name"], params["Param1Value"])`
			`p2 = makeParm(params["Param2Type"], params["Param2Name"], params["Param2Value"])`

			`retType = params["ReturnType"]`
			`if retType in ["void", "string", "stringresult"]:`
			`retType = ""`
			`elif Face.IsEnumeration(retType):`
			`retType = "int"`
			`if retType:`
			`retType = " → " + retType`

			`if p1 == "" and p2 == "":`
			`return retType`

			`ret = ""`
			`if p1 == "":`
			`p1 = "<unused>"`
			`joiner = ""`
			`if p2 != "":`
			`joiner = ", "`
			`return "(" + p1 + joiner + p2 + ")" + retType`

			`pathIface = srcRoot + "/scintilla/include/Scintilla.iface"`

			`def retrieveFeatures():`
			`sci = ScintillaData.ScintillaData(srcRoot + "/scintilla/")`
			`face = Face.Face()`
			`face.ReadFromFile(pathIface)`
			`sciToFeature = {}`
			`sccToValue = { "true":"1", "false":"0", "EN_SETFOCUS":"256", "EN_KILLFOCUS":"512"}`
			`for name in face.order:`
			`v = face.features[name]`
			`if v["FeatureType"] in ["fun", "get", "set"]:`
			`featureDefineName = "SCI_" + name.upper()`
			`sciToFeature[featureDefineName] = name`
			`elif v["FeatureType"] in ["val"]:`
			`featureDefineName = name.upper()`
			`sccToValue[featureDefineName] = v["Value"]`
			`elif v["FeatureType"] in ["evt"]:`
			`featureDefineName = "SCN_" + name.upper()`
			`sccToValue[featureDefineName] = v["Value"]`
			`return (face, sciToFeature, sccToValue)`

			`def flattenSpaces(s):`
			`return s.replace("\n", " ").replace(" ", " ").replace(" ", " ").replace(" ", " ").strip()`

			`def printCtag(ident, path):`
			`print(ident.strip() + "\t" + path + "\t" + "/^" + ident + "$/")`

			`showCTags = True`

			`def checkDocumentation():`
			`with open(docFileName, "rt") as docFile:`
			`docs = docFile.read()`

			`face, sciToFeature, sccToValue = retrieveFeatures()`

			`headers = {}`
			`definitions = {}`

			`# Examine header sections which point to definitions`
			`#<a class="message" href="#SCI_SETLAYOUTCACHE">SCI_SETLAYOUTCACHE(int cacheMode)</a><br />`
			`dirPattern = re.compile(r'<a class="message" href="#([A-Z0-9_]+)">([A-Z][A-Za-z0-9_() *&;,\n]+)</a>')`
			`firstWord = re.compile(r'[A-Z0-9_]+')`
			`for api, sig in re.findall(dirPattern, docs):`
			`sigApi = re.split('\W+', sig)[0]`
			`sigFlat = flattenSpaces(sig)`
			`sigFlat = sigFlat.replace('alpha ', 'int ')`
			`sigFlat = sigFlat.replace("document *", "int ")`
			`sigFlat = sigFlat.rstrip()`
			`if '(' in sigFlat or api.startswith("SCI_"):`
			`name = sciToFeature[api]`
			`sigFromFace = api + makeSig(face.features[name])`
			`if sigFlat != sigFromFace:`
			`print(sigFlat, "\|", sigFromFace)`
			`if showCTags:`
			`printCtag(api, docFileName)`
			`#~ printCtag(" " + name, pathIface)`
			`if api != sigApi:`
			`print(sigApi, ";;", sig, ";;", api)`
			`headers[api] = 1`
			`# Warns for most keyboard commands so not enabled`
			`#~ for api in sorted(sciToFeature.keys()):`
			`#~ if api not in headers:`
			`#~ print("No header for ", api)`

			`# Examine definitions`
			`#<b id="SCI_SETLAYOUTCACHE">SCI_SETLAYOUTCACHE(int cacheMode)</b>`
			`defPattern = re.compile(r'<b id="([A-Z_0-9]+)">([A-Z][A-Za-z0-9_() *#\"=<>/&;,\n-]+?)</b>')`
			`for api, sig in re.findall(defPattern, docs):`
			`sigFlat = flattenSpaces(sig)`
			`if '<a' in sigFlat : # Remove anchors`
			`sigFlat = re.sub('<a.*>(.+)</a>', '\\1', sigFlat)`
			`sigFlat = sigFlat.replace('alpha ', 'int ')`
			`sigFlat = sigFlat.replace("document *", "int ")`
			`sigFlat = sigFlat.replace(' NUL-terminated', '')`
			`sigFlat = sigFlat.rstrip()`
			`#~ sigFlat = sigFlat.replace(' NUL-terminated', '')`
			`sigApi = re.split('\W+', sigFlat)[0]`
			`#~ print(sigFlat, ";;", sig, ";;", api)`
			`if '(' in sigFlat or api.startswith("SCI_"):`
			`try:`
			`name = sciToFeature[api]`
			`sigFromFace = api + makeSig(face.features[name])`
			`if sigFlat != sigFromFace:`
			`print(sigFlat, "\|", sigFromFace)`
			`if showCTags:`
			`printCtag('="' + api, docFileName)`
			`#~ printCtag(" " + name, pathIface)`
			`except KeyError:`
			`pass # Feature removed but still has documentation`
			`if api != sigApi:`
			`print(sigApi, ";;", sig, ";;", api)`
			`definitions[api] = 1`
			`# Warns for most keyboard commands so not enabled`
			`#~ for api in sorted(sciToFeature.keys()):`
			`#~ if api not in definitions:`
			`#~ print("No definition for ", api)`

			`outName = docFileName.replace("Doc", "Dox")`
			`with open(outName, "wt") as docFile:`
			`docFile.write(docs)`

			`# Examine constant definitions`
			`#<code>SC_CARETSTICKY_WHITESPACE</code> (2)`
			`constPattern = re.compile(r'<code>(\w+)</code> *\((\w+)\)')`
			`for name, val in re.findall(constPattern, docs):`
			`try:`
			`valOfName = sccToValue[name]`
			`if val != valOfName:`
			`print(val, "<-", name, ";;", valOfName)`
			`except KeyError:`
			`print("***", val, "<-", name)`

			`for identifier in sorted(symbols.keys()):`
			`if not symbols[identifier] and identifier not in uninteresting:`
			`print(identifier)`

			`checkDocumentation()`