// Scintilla source code edit control /** @file CharacterSet.h ** Encapsulates a set of characters. Used to test if a character is within a set. **/ // Copyright 2007 by Neil Hodgson // The License.txt file describes the conditions under which this software may be distributed. #ifndef CHARACTERSET_H #define CHARACTERSET_H namespace Scintilla { class CharacterSet { int size; bool valueAfter; bool *bset; public: enum setBase { setNone=0, setLower=1, setUpper=2, setDigits=4, setAlpha=setLower|setUpper, setAlphaNum=setAlpha|setDigits }; CharacterSet(setBase base=setNone, const char *initialSet="", int size_=0x80, bool valueAfter_=false) { size = size_; valueAfter = valueAfter_; bset = new bool[size]; for (int i=0; i < size; i++) { bset[i] = false; } AddString(initialSet); if (base & setLower) AddString("abcdefghijklmnopqrstuvwxyz"); if (base & setUpper) AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); if (base & setDigits) AddString("0123456789"); } CharacterSet(const CharacterSet &other) { size = other.size; valueAfter = other.valueAfter; bset = new bool[size]; for (int i=0; i < size; i++) { bset[i] = other.bset[i]; } } CharacterSet(CharacterSet &&other) noexcept { size = other.size; valueAfter = other.valueAfter; bset = other.bset; other.size = 0; other.bset = nullptr; } CharacterSet &operator=(const CharacterSet &other) { if (this != &other) { bool *bsetNew = new bool[other.size]; for (int i = 0; i < other.size; i++) { bsetNew[i] = other.bset[i]; } delete[]bset; size = other.size; valueAfter = other.valueAfter; bset = bsetNew; } return *this; } CharacterSet &operator=(CharacterSet &&other) noexcept { if (this != &other) { delete []bset; size = other.size; valueAfter = other.valueAfter; bset = other.bset; other.size = 0; other.bset = nullptr; } return *this; } ~CharacterSet() { delete []bset; bset = nullptr; size = 0; } void Add(int val) { assert(val >= 0); assert(val < size); bset[val] = true; } void AddString(const char *setToAdd) { for (const char *cp=setToAdd; *cp; cp++) { const unsigned char uch = *cp; assert(uch < size); bset[uch] = true; } } bool Contains(int val) const noexcept { assert(val >= 0); if (val < 0) return false; return (val < size) ? bset[val] : valueAfter; } bool Contains(char ch) const noexcept { // Overload char as char may be signed const unsigned char uch = ch; return Contains(uch); } }; // Functions for classifying characters constexpr bool IsASpace(int ch) noexcept { return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); } constexpr bool IsASpaceOrTab(int ch) noexcept { return (ch == ' ') || (ch == '\t'); } constexpr bool IsADigit(int ch) noexcept { return (ch >= '0') && (ch <= '9'); } constexpr bool IsADigit(int ch, int base) noexcept { if (base <= 10) { return (ch >= '0') && (ch < '0' + base); } else { return ((ch >= '0') && (ch <= '9')) || ((ch >= 'A') && (ch < 'A' + base - 10)) || ((ch >= 'a') && (ch < 'a' + base - 10)); } } constexpr bool IsASCII(int ch) noexcept { return (ch >= 0) && (ch < 0x80); } constexpr bool IsLowerCase(int ch) noexcept { return (ch >= 'a') && (ch <= 'z'); } constexpr bool IsUpperCase(int ch) noexcept { return (ch >= 'A') && (ch <= 'Z'); } constexpr bool IsUpperOrLowerCase(int ch) noexcept { return IsUpperCase(ch) || IsLowerCase(ch); } constexpr bool IsAlphaNumeric(int ch) noexcept { return ((ch >= '0') && (ch <= '9')) || ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')); } /** * Check if a character is a space. * This is ASCII specific but is safe with chars >= 0x80. */ constexpr bool isspacechar(int ch) noexcept { return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); } constexpr bool iswordchar(int ch) noexcept { return IsAlphaNumeric(ch) || ch == '.' || ch == '_'; } constexpr bool iswordstart(int ch) noexcept { return IsAlphaNumeric(ch) || ch == '_'; } constexpr bool isoperator(int ch) noexcept { if (IsAlphaNumeric(ch)) return false; if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || ch == '(' || ch == ')' || ch == '-' || ch == '+' || ch == '=' || ch == '|' || ch == '{' || ch == '}' || ch == '[' || ch == ']' || ch == ':' || ch == ';' || ch == '<' || ch == '>' || ch == ',' || ch == '/' || ch == '?' || ch == '!' || ch == '.' || ch == '~') return true; return false; } // Simple case functions for ASCII supersets. template constexpr T MakeUpperCase(T ch) noexcept { if (ch < 'a' || ch > 'z') return ch; else return ch - 'a' + 'A'; } template constexpr T MakeLowerCase(T ch) noexcept { if (ch < 'A' || ch > 'Z') return ch; else return ch - 'A' + 'a'; } int CompareCaseInsensitive(const char *a, const char *b) noexcept; int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept; } #endif