2009-04-24 23:35:41 +00:00
|
|
|
// Scintilla source code edit control
|
|
|
|
/** @file UniConversion.h
|
2010-07-12 22:19:51 +00:00
|
|
|
** Functions to handle UTF-8 and UTF-16 strings.
|
2009-04-24 23:35:41 +00:00
|
|
|
**/
|
|
|
|
// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
|
|
|
|
// The License.txt file describes the conditions under which this software may be distributed.
|
|
|
|
|
2015-06-07 21:19:26 +00:00
|
|
|
#ifndef UNICONVERSION_H
|
|
|
|
#define UNICONVERSION_H
|
|
|
|
|
|
|
|
#ifdef SCI_NAMESPACE
|
|
|
|
namespace Scintilla {
|
|
|
|
#endif
|
|
|
|
|
2013-08-28 00:44:27 +00:00
|
|
|
const int UTF8MaxBytes = 4;
|
|
|
|
|
2015-06-07 21:19:26 +00:00
|
|
|
const int unicodeReplacementChar = 0xFFFD;
|
|
|
|
|
2009-04-24 23:35:41 +00:00
|
|
|
unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen);
|
|
|
|
void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len);
|
2010-07-12 22:19:51 +00:00
|
|
|
unsigned int UTF8CharLength(unsigned char ch);
|
2015-06-07 21:19:26 +00:00
|
|
|
size_t UTF16Length(const char *s, size_t len);
|
|
|
|
size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen);
|
|
|
|
unsigned int UTF32FromUTF8(const char *s, unsigned int len, unsigned int *tbuf, unsigned int tlen);
|
|
|
|
unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf);
|
2009-04-24 23:35:41 +00:00
|
|
|
|
2013-08-28 00:44:27 +00:00
|
|
|
extern int UTF8BytesOfLead[256];
|
|
|
|
void UTF8BytesOfLeadInitialise();
|
|
|
|
|
|
|
|
inline bool UTF8IsTrailByte(int ch) {
|
|
|
|
return (ch >= 0x80) && (ch < 0xc0);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool UTF8IsAscii(int ch) {
|
|
|
|
return ch < 0x80;
|
|
|
|
}
|
|
|
|
|
|
|
|
enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 };
|
|
|
|
int UTF8Classify(const unsigned char *us, int len);
|
|
|
|
|
2015-06-07 21:19:26 +00:00
|
|
|
// Similar to UTF8Classify but returns a length of 1 for invalid bytes
|
|
|
|
// instead of setting the invalid flag
|
|
|
|
int UTF8DrawBytes(const unsigned char *us, int len);
|
|
|
|
|
2013-08-28 00:44:27 +00:00
|
|
|
// Line separator is U+2028 \xe2\x80\xa8
|
|
|
|
// Paragraph separator is U+2029 \xe2\x80\xa9
|
|
|
|
const int UTF8SeparatorLength = 3;
|
|
|
|
inline bool UTF8IsSeparator(const unsigned char *us) {
|
|
|
|
return (us[0] == 0xe2) && (us[1] == 0x80) && ((us[2] == 0xa8) || (us[2] == 0xa9));
|
|
|
|
}
|
|
|
|
|
|
|
|
// NEL is U+0085 \xc2\x85
|
|
|
|
const int UTF8NELLength = 2;
|
|
|
|
inline bool UTF8IsNEL(const unsigned char *us) {
|
|
|
|
return (us[0] == 0xc2) && (us[1] == 0x85);
|
|
|
|
}
|
2015-06-07 21:19:26 +00:00
|
|
|
|
|
|
|
enum { SURROGATE_LEAD_FIRST = 0xD800 };
|
|
|
|
enum { SURROGATE_LEAD_LAST = 0xDBFF };
|
|
|
|
inline unsigned int UTF16CharLength(wchar_t uch) {
|
|
|
|
return ((uch >= SURROGATE_LEAD_FIRST) && (uch <= SURROGATE_LEAD_LAST)) ? 2 : 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef SCI_NAMESPACE
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|