notepad-plus-plus/lexilla/lexlib/CharacterSet.h

// Scintilla source code edit control
/** @file CharacterSet.h
 ** Encapsulates a set of characters. Used to test if a character is within a set.
 **/
// Copyright 2007 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.

#ifndef CHARACTERSET_H
#define CHARACTERSET_H

namespace Lexilla {

template<int N>
class CharacterSetArray {
	unsigned char bset[(N-1)/8 + 1] = {};
	bool valueAfter = false;
public:
	enum setBase {
		setNone=0,
		setLower=1,
		setUpper=2,
		setDigits=4,
		setAlpha=setLower|setUpper,
		setAlphaNum=setAlpha|setDigits
	};
	CharacterSetArray(setBase base=setNone, const char *initialSet="", bool valueAfter_=false) noexcept {
		valueAfter = valueAfter_;
		AddString(initialSet);
		if (base & setLower)
			AddString("abcdefghijklmnopqrstuvwxyz");
		if (base & setUpper)
			AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
		if (base & setDigits)
			AddString("0123456789");
	}
	CharacterSetArray(const char *initialSet, bool valueAfter_=false) noexcept :
		CharacterSetArray(setNone, initialSet, valueAfter_) {
	}
	// For compatibility with previous version but should not be used in new code.
	CharacterSetArray(setBase base, const char *initialSet, [[maybe_unused]]int size_, bool valueAfter_=false) noexcept :
		CharacterSetArray(base, initialSet, valueAfter_) {
		assert(size_ == N);
	}
	void Add(int val) noexcept {
		assert(val >= 0);
		assert(val < N);
		bset[val >> 3] |= 1 << (val & 7);
	}
	void AddString(const char *setToAdd) noexcept {
		for (const char *cp=setToAdd; *cp; cp++) {
			const unsigned char uch = *cp;
			assert(uch < N);
			Add(uch);
		}
	}
	bool Contains(int val) const noexcept {
		assert(val >= 0);
		if (val < 0) return false;
		if (val >= N) return valueAfter;
		return bset[val >> 3] & (1 << (val & 7));
	}
	bool Contains(char ch) const noexcept {
		// Overload char as char may be signed
		const unsigned char uch = ch;
		return Contains(uch);
	}
};

using CharacterSet = CharacterSetArray<0x80>;

// Functions for classifying characters

template <typename T, typename... Args>
constexpr bool AnyOf(T t, Args... args) noexcept {
#if defined(__clang__)
	static_assert(__is_integral(T));
#endif
	return ((t == args) || ...);
}

// prevent pointer without <type_traits>
template <typename T, typename... Args>
constexpr void AnyOf([[maybe_unused]] T *t, [[maybe_unused]] Args... args) noexcept {}
template <typename T, typename... Args>
constexpr void AnyOf([[maybe_unused]] const T *t, [[maybe_unused]] Args... args) noexcept {}

constexpr bool IsASpace(int ch) noexcept {
    return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
}

constexpr bool IsASpaceOrTab(int ch) noexcept {
	return (ch == ' ') || (ch == '\t');
}

constexpr bool IsADigit(int ch) noexcept {
	return (ch >= '0') && (ch <= '9');
}

constexpr bool IsAHeXDigit(int ch) noexcept {
	return (ch >= '0' && ch <= '9')
		|| (ch >= 'A' && ch <= 'F')
		|| (ch >= 'a' && ch <= 'f');
}

constexpr bool IsAnOctalDigit(int ch) noexcept {
	return ch >= '0' && ch <= '7';
}

constexpr bool IsADigit(int ch, int base) noexcept {
	if (base <= 10) {
		return (ch >= '0') && (ch < '0' + base);
	} else {
		return ((ch >= '0') && (ch <= '9')) ||
		       ((ch >= 'A') && (ch < 'A' + base - 10)) ||
		       ((ch >= 'a') && (ch < 'a' + base - 10));
	}
}

constexpr bool IsASCII(int ch) noexcept {
	return (ch >= 0) && (ch < 0x80);
}

constexpr bool IsLowerCase(int ch) noexcept {
	return (ch >= 'a') && (ch <= 'z');
}

constexpr bool IsUpperCase(int ch) noexcept {
	return (ch >= 'A') && (ch <= 'Z');
}

constexpr bool IsUpperOrLowerCase(int ch) noexcept {
	return IsUpperCase(ch) || IsLowerCase(ch);
}

constexpr bool IsAlphaNumeric(int ch) noexcept {
	return
		((ch >= '0') && (ch <= '9')) ||
		((ch >= 'a') && (ch <= 'z')) ||
		((ch >= 'A') && (ch <= 'Z'));
}

/**
 * Check if a character is a space.
 * This is ASCII specific but is safe with chars >= 0x80.
 */
constexpr bool isspacechar(int ch) noexcept {
    return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
}

constexpr bool iswordchar(int ch) noexcept {
	return IsAlphaNumeric(ch) || ch == '.' || ch == '_';
}

constexpr bool iswordstart(int ch) noexcept {
	return IsAlphaNumeric(ch) || ch == '_';
}

constexpr bool isoperator(int ch) noexcept {
	if (IsAlphaNumeric(ch))
		return false;
	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
	        ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
		return true;
	return false;
}

// Simple case functions for ASCII supersets.

template <typename T>
constexpr T MakeUpperCase(T ch) noexcept {
	if (ch < 'a' || ch > 'z')
		return ch;
	else
		return ch - 'a' + 'A';
}

template <typename T>
constexpr T MakeLowerCase(T ch) noexcept {
	if (ch < 'A' || ch > 'Z')
		return ch;
	else
		return ch - 'A' + 'a';
}

int CompareCaseInsensitive(const char *a, const char *b) noexcept;
int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept;

}

#endif
Update Scintilla from v4.4.6 to v5.2.1 and add Lexilla v5.1.5 Update with https://www.scintilla.org/scintilla521.zip https://www.scintilla.org/lexilla515.zip - fix setting to bring Scintilla::PositionCR from ScintillaStructures.h inline with Sci_Position.h Sci_PositionCR - add workaround to enable lexer for searchResult commented out SCI_SETILEXER call on searchResult to get one result which is correctly handled by the lexer, added comment about the current problem with property @MarkingsStruct which seems to disappear after call to SCI_SETILEXER or CreateLexer - corrected usage of ObjC lexer - removed unnecessary filter stuff - use own sections for scintilla and lexilla build targets and allow parallel builds - as libscilex is no longer existing, changed to libscintilla - adapt makefiles and cmake - use VS2019 - started simple changes for createlexer adaptations, nullpointercheck missing on return of lexer name from deprecated LexerNameFromID -> undefined behaviour - movement from id -> lexer name, mostly done via LexerNameFromID + switching off corresponding compiler warning - changed to SCI_SETILEXER from SCI_SETLEXER, SCI_SETLEXERLANGUAGE needs to be corrected, see Scintilla5Migration.html - just commented out: SCI_LOADLEXERLIBRARY Fix #10504, close #11419 3 years ago			`// Scintilla source code edit control`
			`/** @file CharacterSet.h`
			`** Encapsulates a set of characters. Used to test if a character is within a set.`
			`**/`
			`// Copyright 2007 by Neil Hodgson <neilh@scintilla.org>`
			`// The License.txt file describes the conditions under which this software may be distributed.`

			`#ifndef CHARACTERSET_H`
			`#define CHARACTERSET_H`

			`namespace Lexilla {`

			`template<int N>`
			`class CharacterSetArray {`
			`unsigned char bset[(N-1)/8 + 1] = {};`
			`bool valueAfter = false;`
			`public:`
			`enum setBase {`
			`setNone=0,`
			`setLower=1,`
			`setUpper=2,`
			`setDigits=4,`
			`setAlpha=setLower\|setUpper,`
			`setAlphaNum=setAlpha\|setDigits`
			`};`
			`CharacterSetArray(setBase base=setNone, const char *initialSet="", bool valueAfter_=false) noexcept {`
			`valueAfter = valueAfter_;`
			`AddString(initialSet);`
			`if (base & setLower)`
			`AddString("abcdefghijklmnopqrstuvwxyz");`
			`if (base & setUpper)`
			`AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ");`
			`if (base & setDigits)`
			`AddString("0123456789");`
			`}`
Update: Scintilla 5.3.5 Lexilla 5.2.5 update to Scinitlla Release 5.3.5 (https://www.scintilla.org/scintilla535.zip) Released 31 May 2023. On Win32, implement IME context sensitivity with IMR_DOCUMENTFEED. Feature #1310. On Win32 remove dependence on MSIMG32.DLL by replacing AlphaBlend by GdiAlphaBlend. Bug #1923. On Qt, stop movement of IME candidate box. On Qt, report correct caret position within paragraph for IME retrieve surrounding text. On Qt for Cocoa, fix crash in entry of multi-character strings with IME. and Lexilla Release 5.2.5 (https://www.scintilla.org/lexilla525.zip) Released 31 May 2023. Add CharacterSetArray constructor without setBase initial argument for common case where this is setNone and the initialSet argument completely defines the characters. This shortens and clarifies use of CharacterSetArray. Bash: implement highlighting inside quoted elements and here-docs. Controlled with properties lexer.bash.styling.inside.string, lexer.bash.styling.inside.backticks, lexer.bash.styling.inside.parameter, and lexer.bash.styling.inside.heredoc. Issue #154, Issue #153, Feature #1033. Bash: add property lexer.bash.command.substitution to choose how to style command substitutions. 0 → SCE_SH_BACKTICKS; 1 → surrounding "$(" and ")" as operators and contents styled as bash code; 2 → use distinct styles (base style + 64) for contents. Choice (2) is a provisional feature and details may change before it is finalized. Issue #153. Bash: fix nesting of parameters (SCE_SH_PARAM) like ${var/$sub/"${rep}}"}. Issue #154. Bash: fix single character special parameters like $? by limiting style. Issue #154. Bash: treat "$$" as special parameter and end scalars before "$". Issue #154. Bash: treat "<<" in arithmetic contexts as left bitwise shift operator instead of here-doc. Issue #137. Batch: style SCE_BAT_AFTER_LABEL used for rest of line after label which is not executed. Issue #148. F#: Lex interpolated verbatim strings as verbatim. Issue #156. VB: allow multiline strings when lexer.vb.strings.multiline set. Issue #151. Close #13729 2 years ago			`CharacterSetArray(const char *initialSet, bool valueAfter_=false) noexcept :`
			`CharacterSetArray(setNone, initialSet, valueAfter_) {`
			`}`
Update Scintilla from v4.4.6 to v5.2.1 and add Lexilla v5.1.5 Update with https://www.scintilla.org/scintilla521.zip https://www.scintilla.org/lexilla515.zip - fix setting to bring Scintilla::PositionCR from ScintillaStructures.h inline with Sci_Position.h Sci_PositionCR - add workaround to enable lexer for searchResult commented out SCI_SETILEXER call on searchResult to get one result which is correctly handled by the lexer, added comment about the current problem with property @MarkingsStruct which seems to disappear after call to SCI_SETILEXER or CreateLexer - corrected usage of ObjC lexer - removed unnecessary filter stuff - use own sections for scintilla and lexilla build targets and allow parallel builds - as libscilex is no longer existing, changed to libscintilla - adapt makefiles and cmake - use VS2019 - started simple changes for createlexer adaptations, nullpointercheck missing on return of lexer name from deprecated LexerNameFromID -> undefined behaviour - movement from id -> lexer name, mostly done via LexerNameFromID + switching off corresponding compiler warning - changed to SCI_SETILEXER from SCI_SETLEXER, SCI_SETLEXERLANGUAGE needs to be corrected, see Scintilla5Migration.html - just commented out: SCI_LOADLEXERLIBRARY Fix #10504, close #11419 3 years ago			`// For compatibility with previous version but should not be used in new code.`
			`CharacterSetArray(setBase base, const char *initialSet, [[maybe_unused]]int size_, bool valueAfter_=false) noexcept :`
			`CharacterSetArray(base, initialSet, valueAfter_) {`
			`assert(size_ == N);`
			`}`
			`void Add(int val) noexcept {`
			`assert(val >= 0);`
			`assert(val < N);`
			`bset[val >> 3] \|= 1 << (val & 7);`
			`}`
			`void AddString(const char *setToAdd) noexcept {`
			`for (const char cp=setToAdd; cp; cp++) {`
			`const unsigned char uch = *cp;`
			`assert(uch < N);`
			`Add(uch);`
			`}`
			`}`
			`bool Contains(int val) const noexcept {`
			`assert(val >= 0);`
			`if (val < 0) return false;`
			`if (val >= N) return valueAfter;`
			`return bset[val >> 3] & (1 << (val & 7));`
			`}`
			`bool Contains(char ch) const noexcept {`
			`// Overload char as char may be signed`
			`const unsigned char uch = ch;`
			`return Contains(uch);`
			`}`
			`};`

			`using CharacterSet = CharacterSetArray<0x80>;`

			`// Functions for classifying characters`

			`template <typename T, typename... Args>`
			`constexpr bool AnyOf(T t, Args... args) noexcept {`
			`#if defined(__clang__)`
			`static_assert(__is_integral(T));`
			`#endif`
			`return ((t == args) \|\| ...);`
			`}`

			`// prevent pointer without <type_traits>`
			`template <typename T, typename... Args>`
			`constexpr void AnyOf([[maybe_unused]] T *t, [[maybe_unused]] Args... args) noexcept {}`
			`template <typename T, typename... Args>`
			`constexpr void AnyOf([[maybe_unused]] const T *t, [[maybe_unused]] Args... args) noexcept {}`

			`constexpr bool IsASpace(int ch) noexcept {`
			`return (ch == ' ') \|\| ((ch >= 0x09) && (ch <= 0x0d));`
			`}`

			`constexpr bool IsASpaceOrTab(int ch) noexcept {`
			`return (ch == ' ') \|\| (ch == '\t');`
			`}`

			`constexpr bool IsADigit(int ch) noexcept {`
			`return (ch >= '0') && (ch <= '9');`
			`}`

Update to scintilla 5.3.1 and lexilla 5.2.0 Close #12327 2 years ago			`constexpr bool IsAHeXDigit(int ch) noexcept {`
			`return (ch >= '0' && ch <= '9')`
			`\|\| (ch >= 'A' && ch <= 'F')`
			`\|\| (ch >= 'a' && ch <= 'f');`
			`}`

			`constexpr bool IsAnOctalDigit(int ch) noexcept {`
			`return ch >= '0' && ch <= '7';`
			`}`

Update Scintilla from v4.4.6 to v5.2.1 and add Lexilla v5.1.5 Update with https://www.scintilla.org/scintilla521.zip https://www.scintilla.org/lexilla515.zip - fix setting to bring Scintilla::PositionCR from ScintillaStructures.h inline with Sci_Position.h Sci_PositionCR - add workaround to enable lexer for searchResult commented out SCI_SETILEXER call on searchResult to get one result which is correctly handled by the lexer, added comment about the current problem with property @MarkingsStruct which seems to disappear after call to SCI_SETILEXER or CreateLexer - corrected usage of ObjC lexer - removed unnecessary filter stuff - use own sections for scintilla and lexilla build targets and allow parallel builds - as libscilex is no longer existing, changed to libscintilla - adapt makefiles and cmake - use VS2019 - started simple changes for createlexer adaptations, nullpointercheck missing on return of lexer name from deprecated LexerNameFromID -> undefined behaviour - movement from id -> lexer name, mostly done via LexerNameFromID + switching off corresponding compiler warning - changed to SCI_SETILEXER from SCI_SETLEXER, SCI_SETLEXERLANGUAGE needs to be corrected, see Scintilla5Migration.html - just commented out: SCI_LOADLEXERLIBRARY Fix #10504, close #11419 3 years ago			`constexpr bool IsADigit(int ch, int base) noexcept {`
			`if (base <= 10) {`
			`return (ch >= '0') && (ch < '0' + base);`
			`} else {`
			`return ((ch >= '0') && (ch <= '9')) \|\|`
			`((ch >= 'A') && (ch < 'A' + base - 10)) \|\|`
			`((ch >= 'a') && (ch < 'a' + base - 10));`
			`}`
			`}`

			`constexpr bool IsASCII(int ch) noexcept {`
			`return (ch >= 0) && (ch < 0x80);`
			`}`

			`constexpr bool IsLowerCase(int ch) noexcept {`
			`return (ch >= 'a') && (ch <= 'z');`
			`}`

			`constexpr bool IsUpperCase(int ch) noexcept {`
			`return (ch >= 'A') && (ch <= 'Z');`
			`}`

			`constexpr bool IsUpperOrLowerCase(int ch) noexcept {`
			`return IsUpperCase(ch) \|\| IsLowerCase(ch);`
			`}`

			`constexpr bool IsAlphaNumeric(int ch) noexcept {`
			`return`
			`((ch >= '0') && (ch <= '9')) \|\|`
			`((ch >= 'a') && (ch <= 'z')) \|\|`
			`((ch >= 'A') && (ch <= 'Z'));`
			`}`

			`/**`
			`* Check if a character is a space.`
			`* This is ASCII specific but is safe with chars >= 0x80.`
			`*/`
			`constexpr bool isspacechar(int ch) noexcept {`
			`return (ch == ' ') \|\| ((ch >= 0x09) && (ch <= 0x0d));`
			`}`

			`constexpr bool iswordchar(int ch) noexcept {`
			`return IsAlphaNumeric(ch) \|\| ch == '.' \|\| ch == '_';`
			`}`

			`constexpr bool iswordstart(int ch) noexcept {`
			`return IsAlphaNumeric(ch) \|\| ch == '_';`
			`}`

			`constexpr bool isoperator(int ch) noexcept {`
			`if (IsAlphaNumeric(ch))`
			`return false;`
			`if (ch == '%' \|\| ch == '^' \|\| ch == '&' \|\| ch == '*' \|\|`
			`ch == '(' \|\| ch == ')' \|\| ch == '-' \|\| ch == '+' \|\|`
			`ch == '=' \|\| ch == '\|' \|\| ch == '{' \|\| ch == '}' \|\|`
			`ch == '[' \|\| ch == ']' \|\| ch == ':' \|\| ch == ';' \|\|`
			`ch == '<' \|\| ch == '>' \|\| ch == ',' \|\| ch == '/' \|\|`
			`ch == '?' \|\| ch == '!' \|\| ch == '.' \|\| ch == '~')`
			`return true;`
			`return false;`
			`}`

			`// Simple case functions for ASCII supersets.`

			`template <typename T>`
			`constexpr T MakeUpperCase(T ch) noexcept {`
			`if (ch < 'a' \|\| ch > 'z')`
			`return ch;`
			`else`
			`return ch - 'a' + 'A';`
			`}`

			`template <typename T>`
			`constexpr T MakeLowerCase(T ch) noexcept {`
			`if (ch < 'A' \|\| ch > 'Z')`
			`return ch;`
			`else`
			`return ch - 'A' + 'a';`
			`}`

			`int CompareCaseInsensitive(const char a, const char b) noexcept;`
			`int CompareNCaseInsensitive(const char a, const char b, size_t len) noexcept;`

			`}`

			`#endif`