856 lines
18 KiB
C++
856 lines
18 KiB
C++
#include <ex/ex_platform.h>
|
|
#include <ex/ex_str.h>
|
|
#include <ex/ex_util.h>
|
|
|
|
char* ex_strcpy(char* target, size_t size, const char* source)
|
|
{
|
|
if (target == source)
|
|
return target;
|
|
|
|
#ifdef EX_OS_WIN32
|
|
if (SUCCEEDED(StringCchCopyA(target, size, source)))
|
|
return target;
|
|
else
|
|
return NULL;
|
|
#else
|
|
size_t len = strlen(source);
|
|
if (size > len)
|
|
{
|
|
return strcpy(target, source);
|
|
}
|
|
else
|
|
{
|
|
memmove(target, source, size - 1);
|
|
return NULL;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
wchar_t* ex_wcscpy(wchar_t* target, size_t size, const wchar_t* source)
|
|
{
|
|
if (target == source)
|
|
return target;
|
|
|
|
#ifdef EX_OS_WIN32
|
|
if (SUCCEEDED(StringCchCopyW(target, size, source)))
|
|
return target;
|
|
else
|
|
return NULL;
|
|
#else
|
|
size_t len = wcslen(source);
|
|
if (size > len)
|
|
{
|
|
return wcscpy(target, source);
|
|
}
|
|
else
|
|
{
|
|
memmove(target, source, (size - 1)*sizeof(wchar_t));
|
|
return NULL;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
char* ex_strdup(const char* src)
|
|
{
|
|
if (NULL == src)
|
|
return NULL;
|
|
size_t len = strlen(src) + 1;
|
|
char* ret = (char*)calloc(1, len);
|
|
memcpy(ret, src, len);
|
|
return ret;
|
|
}
|
|
|
|
wchar_t* ex_wcsdup(const wchar_t* src)
|
|
{
|
|
if (NULL == src)
|
|
return NULL;
|
|
size_t len = wcslen(src) + 1;
|
|
wchar_t* ret = (wchar_t*)calloc(sizeof(wchar_t), len);
|
|
memcpy(ret, src, sizeof(wchar_t)*len);
|
|
return ret;
|
|
}
|
|
|
|
wchar_t* ex_str2wcs_alloc(const char* in_buffer, int code_page)
|
|
{
|
|
wchar_t* out_buffer = NULL;
|
|
#ifdef EX_OS_WIN32
|
|
int wlen = 0;
|
|
UINT _cp = 0;
|
|
if (code_page == EX_CODEPAGE_ACP)
|
|
_cp = CP_ACP;
|
|
else if (code_page == EX_CODEPAGE_UTF8)
|
|
_cp = CP_UTF8;
|
|
|
|
wlen = MultiByteToWideChar(_cp, 0, in_buffer, -1, NULL, 0);
|
|
if (0 == wlen)
|
|
return NULL;
|
|
|
|
out_buffer = (wchar_t*)calloc(wlen + 1, sizeof(wchar_t));
|
|
if (NULL == out_buffer)
|
|
return NULL;
|
|
|
|
wlen = MultiByteToWideChar(_cp, 0, in_buffer, -1, out_buffer, wlen);
|
|
if (0 == wlen)
|
|
{
|
|
free(out_buffer);
|
|
return NULL;
|
|
}
|
|
|
|
#else
|
|
size_t wlen = 0;
|
|
wlen = mbstowcs(NULL, in_buffer, 0);
|
|
if (wlen <= 0)
|
|
return NULL;
|
|
|
|
out_buffer = (wchar_t*)calloc(wlen + 1, sizeof(wchar_t));
|
|
if (NULL == out_buffer)
|
|
return NULL;
|
|
|
|
wlen = mbstowcs(out_buffer, in_buffer, wlen);
|
|
if (wlen <= 0)
|
|
{
|
|
free(out_buffer);
|
|
return NULL;
|
|
}
|
|
|
|
#endif
|
|
|
|
return out_buffer;
|
|
}
|
|
|
|
|
|
char* ex_wcs2str_alloc(const wchar_t* in_buffer, int code_page)
|
|
{
|
|
char* out_buffer = NULL;
|
|
|
|
if(NULL == in_buffer)
|
|
return NULL;
|
|
|
|
#ifdef EX_OS_WIN32
|
|
int len = 0;
|
|
UINT _cp = 0;
|
|
if (code_page == EX_CODEPAGE_ACP)
|
|
_cp = CP_ACP;
|
|
else if (code_page == EX_CODEPAGE_UTF8)
|
|
_cp = CP_UTF8;
|
|
|
|
len = WideCharToMultiByte(_cp, 0, in_buffer, -1, NULL, 0, NULL, NULL);
|
|
if (0 == len)
|
|
return NULL;
|
|
|
|
out_buffer = (char*)calloc(len + 1, sizeof(char));
|
|
if (NULL == out_buffer)
|
|
return NULL;
|
|
|
|
len = WideCharToMultiByte(_cp, 0, in_buffer, -1, out_buffer, len, NULL, NULL);
|
|
if (0 == len)
|
|
{
|
|
free(out_buffer);
|
|
return NULL;
|
|
}
|
|
|
|
#else
|
|
size_t len = 0;
|
|
len = wcstombs(NULL, in_buffer, 0);
|
|
if (len <= 0)
|
|
return NULL;
|
|
|
|
out_buffer = (char*)calloc(len + 1, sizeof(char));
|
|
if (NULL == out_buffer)
|
|
return NULL;
|
|
|
|
len = wcstombs(out_buffer, in_buffer, len);
|
|
if (len <= 0)
|
|
{
|
|
free(out_buffer);
|
|
return NULL;
|
|
}
|
|
|
|
#endif
|
|
|
|
return out_buffer;
|
|
}
|
|
|
|
wchar_t** ex_make_wargv(int argc, char** argv)
|
|
{
|
|
int i = 0;
|
|
wchar_t** ret = NULL;
|
|
|
|
ret = (wchar_t**)calloc(argc + 1, sizeof(wchar_t*));
|
|
if (!ret)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
for (i = 0; i < argc; ++i)
|
|
{
|
|
ret[i] = ex_str2wcs_alloc(argv[i], EX_CODEPAGE_DEFAULT);
|
|
if (NULL == ret[i])
|
|
goto err;
|
|
}
|
|
|
|
return ret;
|
|
|
|
err:
|
|
ex_free_wargv(argc, ret);
|
|
return NULL;
|
|
}
|
|
|
|
void ex_free_wargv(int argc, wchar_t** argv)
|
|
{
|
|
int i = 0;
|
|
for (i = 0; i < argc; ++i)
|
|
free(argv[i]);
|
|
|
|
free(argv);
|
|
}
|
|
|
|
EX_BOOL ex_str_only_white_space(const wchar_t* src)
|
|
{
|
|
if (ex_only_white_space(src))
|
|
return EX_TRUE;
|
|
else
|
|
return EX_FALSE;
|
|
}
|
|
|
|
EX_BOOL ex_wcs_only_white_space(const char* src)
|
|
{
|
|
if (ex_only_white_space(src))
|
|
return EX_TRUE;
|
|
else
|
|
return EX_FALSE;
|
|
}
|
|
|
|
int ex_strformat(char* out_buf, size_t buf_size, const char* fmt, ...)
|
|
{
|
|
int ret = 0;
|
|
va_list valist;
|
|
va_start(valist, fmt);
|
|
//_ts_printf_a(level, EX_COLOR_BLACK, fmt, valist);
|
|
#ifdef EX_OS_WIN32
|
|
ret = vsnprintf(out_buf, buf_size, fmt, valist);
|
|
#else
|
|
ret = vsprintf(out_buf, fmt, valist);
|
|
#endif
|
|
va_end(valist);
|
|
return ret;
|
|
}
|
|
|
|
int ex_wcsformat(wchar_t* out_buf, size_t buf_size, const wchar_t* fmt, ...)
|
|
{
|
|
int ret = 0;
|
|
va_list valist;
|
|
va_start(valist, fmt);
|
|
//_ts_printf_a(level, EX_COLOR_BLACK, fmt, valist);
|
|
#ifdef EX_OS_WIN32
|
|
//ret = vsnprintf(out_buf, buf_size, fmt, valist);
|
|
ret = _vsnwprintf_s(out_buf, buf_size, buf_size, fmt, valist);
|
|
#else
|
|
//ret = vsprintf(out_buf, fmt, valist);
|
|
ret = vswprintf(out_buf, buf_size, fmt, valist);
|
|
#endif
|
|
va_end(valist);
|
|
return ret;
|
|
}
|
|
|
|
|
|
#ifdef __cplusplus
|
|
bool ex_wstr2astr(const ex_wstr& in_str, ex_astr& out_str, int code_page/* = EX_CODEPAGE_DEFAULT*/)
|
|
{
|
|
return ex_wstr2astr(in_str.c_str(), out_str, code_page);
|
|
}
|
|
|
|
bool ex_wstr2astr(const wchar_t* in_str, ex_astr& out_str, int code_page/* = EX_CODEPAGE_DEFAULT*/)
|
|
{
|
|
char* astr = ex_wcs2str_alloc(in_str, code_page);
|
|
if (NULL == astr)
|
|
return false;
|
|
|
|
out_str = astr;
|
|
ex_free(astr);
|
|
return true;
|
|
}
|
|
|
|
bool ex_astr2wstr(const ex_astr& in_str, ex_wstr& out_str, int code_page/* = EX_CODEPAGE_DEFAULT*/)
|
|
{
|
|
return ex_astr2wstr(in_str.c_str(), out_str, code_page);
|
|
}
|
|
|
|
bool ex_astr2wstr(const char* in_str, ex_wstr& out_str, int code_page/* = EX_CODEPAGE_DEFAULT*/)
|
|
{
|
|
wchar_t* wstr = ex_str2wcs_alloc(in_str, code_page);
|
|
if (NULL == wstr)
|
|
return false;
|
|
|
|
out_str = wstr;
|
|
ex_free(wstr);
|
|
return true;
|
|
}
|
|
|
|
bool ex_only_white_space(const ex_astr& str_check)
|
|
{
|
|
ex_astr::size_type pos = 0;
|
|
ex_astr strFilter(" \t\r\n");
|
|
pos = str_check.find_first_not_of(strFilter);
|
|
if (ex_astr::npos == pos)
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
|
|
bool ex_only_white_space(const ex_wstr& str_check)
|
|
{
|
|
ex_wstr::size_type pos = 0;
|
|
ex_wstr strFilter(L" \t\r\n");
|
|
pos = str_check.find_first_not_of(strFilter);
|
|
if (ex_wstr::npos == pos)
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
|
|
void ex_remove_white_space(ex_astr& str_fix, int ulFlag /*= EX_RSC_ALL*/)
|
|
{
|
|
ex_astr::size_type pos = 0;
|
|
ex_astr strFilter(" \t\r\n");
|
|
|
|
if (ulFlag & EX_RSC_BEGIN)
|
|
{
|
|
pos = str_fix.find_first_not_of(strFilter);
|
|
if (ex_astr::npos != pos)
|
|
str_fix.erase(0, pos);
|
|
// FIXME
|
|
}
|
|
if (ulFlag & EX_RSC_END)
|
|
{
|
|
pos = str_fix.find_last_not_of(strFilter);
|
|
if (ex_astr::npos != pos)
|
|
str_fix.erase(pos + 1);
|
|
// FIXME
|
|
}
|
|
}
|
|
|
|
void ex_remove_white_space(ex_wstr& str_fix, int ulFlag /*= EX_RSC_ALL*/)
|
|
{
|
|
ex_wstr::size_type pos = 0;
|
|
ex_wstr strFilter(L" \t\r\n");
|
|
|
|
if (ulFlag & EX_RSC_BEGIN)
|
|
{
|
|
pos = str_fix.find_first_not_of(strFilter);
|
|
if (ex_wstr::npos != pos)
|
|
str_fix.erase(0, pos);
|
|
// FIXME
|
|
}
|
|
if (ulFlag & EX_RSC_END)
|
|
{
|
|
pos = str_fix.find_last_not_of(strFilter);
|
|
if (ex_wstr::npos != pos)
|
|
str_fix.erase(pos + 1);
|
|
// FIXME
|
|
}
|
|
}
|
|
|
|
ex_astr& ex_replace_all(ex_astr& str, const ex_astr& old_value, const ex_astr& new_value)
|
|
{
|
|
for (ex_astr::size_type pos(0); pos != ex_astr::npos; pos += new_value.length())
|
|
{
|
|
if ((pos = str.find(old_value, pos)) != ex_astr::npos)
|
|
str.replace(pos, old_value.length(), new_value);
|
|
else
|
|
break;
|
|
}
|
|
|
|
return str;
|
|
}
|
|
|
|
ex_wstr& ex_replace_all(ex_wstr& str, const ex_wstr& old_value, const ex_wstr& new_value)
|
|
{
|
|
for (ex_wstr::size_type pos(0); pos != ex_wstr::npos; pos += new_value.length())
|
|
{
|
|
if ((pos = str.find(old_value, pos)) != ex_wstr::npos)
|
|
str.replace(pos, old_value.length(), new_value);
|
|
else
|
|
break;
|
|
}
|
|
|
|
return str;
|
|
}
|
|
|
|
|
|
|
|
#ifndef EX_OS_WIN32
|
|
|
|
#define BYTE ex_u8
|
|
#define DWORD ex_u32
|
|
#define WCHAR ex_i16
|
|
#define LPWSTR WCHAR*
|
|
#define BOOL int
|
|
#define TRUE 1
|
|
#define FALSE 0
|
|
#define UINT unsigned int
|
|
#define LPCSTR const char*
|
|
#define CP_UTF8 1
|
|
|
|
typedef enum
|
|
{
|
|
conversionOK, /* conversion successful */
|
|
sourceExhausted, /* partial character in source, but hit end */
|
|
targetExhausted, /* insuff. room in target for conversion */
|
|
sourceIllegal /* source sequence is illegal/malformed */
|
|
} ConversionResult;
|
|
|
|
typedef enum
|
|
{
|
|
strictConversion = 0,
|
|
lenientConversion
|
|
} ConversionFlags;
|
|
|
|
static const char trailingBytesForUTF8[256] =
|
|
{
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
|
|
};
|
|
|
|
static const DWORD offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 0x03C82080UL, 0xFA082080UL, 0x82082080UL
|
|
};
|
|
|
|
static const BYTE firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
|
|
|
static const int halfShift = 10; /* used for shifting by 10 bits */
|
|
|
|
static const DWORD halfBase = 0x0010000UL;
|
|
static const DWORD halfMask = 0x3FFUL;
|
|
|
|
#define UNI_SUR_HIGH_START (DWORD)0xD800
|
|
#define UNI_SUR_HIGH_END (DWORD)0xDBFF
|
|
#define UNI_SUR_LOW_START (DWORD)0xDC00
|
|
#define UNI_SUR_LOW_END (DWORD)0xDFFF
|
|
|
|
#define UNI_REPLACEMENT_CHAR (DWORD)0x0000FFFD
|
|
#define UNI_MAX_BMP (DWORD)0x0000FFFF
|
|
#define UNI_MAX_UTF16 (DWORD)0x0010FFFF
|
|
#define UNI_MAX_UTF32 (DWORD)0x7FFFFFFF
|
|
#define UNI_MAX_LEGAL_UTF32 (DWORD)0x0010FFFF
|
|
|
|
|
|
static ConversionResult ConvertUTF16toUTF8(const WCHAR** sourceStart, const WCHAR* sourceEnd, BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags)
|
|
{
|
|
BYTE* target;
|
|
const WCHAR* source;
|
|
BOOL computeLength;
|
|
ConversionResult result;
|
|
computeLength = (!targetEnd) ? TRUE : FALSE;
|
|
source = *sourceStart;
|
|
target = *targetStart;
|
|
result = conversionOK;
|
|
|
|
while (source < sourceEnd)
|
|
{
|
|
DWORD ch;
|
|
unsigned short bytesToWrite = 0;
|
|
const DWORD byteMask = 0xBF;
|
|
const DWORD byteMark = 0x80;
|
|
const WCHAR* oldSource = source; /* In case we have to back up because of target overflow. */
|
|
ch = *source++;
|
|
|
|
/* If we have a surrogate pair, convert to UTF32 first. */
|
|
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
|
|
{
|
|
/* If the 16 bits following the high surrogate are in the source buffer... */
|
|
if (source < sourceEnd)
|
|
{
|
|
DWORD ch2 = *source;
|
|
|
|
/* If it's a low surrogate, convert to UTF32. */
|
|
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
|
|
{
|
|
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
|
|
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
|
|
++source;
|
|
}
|
|
else if (flags == strictConversion)
|
|
{
|
|
/* it's an unpaired high surrogate */
|
|
--source; /* return to the illegal value itself */
|
|
result = sourceIllegal;
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* We don't have the 16 bits following the high surrogate. */
|
|
--source; /* return to the high surrogate */
|
|
result = sourceExhausted;
|
|
break;
|
|
}
|
|
}
|
|
else if (flags == strictConversion)
|
|
{
|
|
/* UTF-16 surrogate values are illegal in UTF-32 */
|
|
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
|
|
{
|
|
--source; /* return to the illegal value itself */
|
|
result = sourceIllegal;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Figure out how many bytes the result will require */
|
|
if (ch < (DWORD)0x80)
|
|
{
|
|
bytesToWrite = 1;
|
|
}
|
|
else if (ch < (DWORD)0x800)
|
|
{
|
|
bytesToWrite = 2;
|
|
}
|
|
else if (ch < (DWORD)0x10000)
|
|
{
|
|
bytesToWrite = 3;
|
|
}
|
|
else if (ch < (DWORD)0x110000)
|
|
{
|
|
bytesToWrite = 4;
|
|
}
|
|
else
|
|
{
|
|
bytesToWrite = 3;
|
|
ch = UNI_REPLACEMENT_CHAR;
|
|
}
|
|
|
|
target += bytesToWrite;
|
|
|
|
if ((target > targetEnd) && (!computeLength))
|
|
{
|
|
source = oldSource; /* Back up source pointer! */
|
|
target -= bytesToWrite;
|
|
result = targetExhausted;
|
|
break;
|
|
}
|
|
|
|
if (!computeLength)
|
|
{
|
|
switch (bytesToWrite)
|
|
{
|
|
/* note: everything falls through. */
|
|
case 4:
|
|
*--target = (BYTE)((ch | byteMark) & byteMask);
|
|
ch >>= 6;
|
|
|
|
case 3:
|
|
*--target = (BYTE)((ch | byteMark) & byteMask);
|
|
ch >>= 6;
|
|
|
|
case 2:
|
|
*--target = (BYTE)((ch | byteMark) & byteMask);
|
|
ch >>= 6;
|
|
|
|
case 1:
|
|
*--target = (BYTE)(ch | firstByteMark[bytesToWrite]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
switch (bytesToWrite)
|
|
{
|
|
/* note: everything falls through. */
|
|
case 4:
|
|
--target;
|
|
ch >>= 6;
|
|
|
|
case 3:
|
|
--target;
|
|
ch >>= 6;
|
|
|
|
case 2:
|
|
--target;
|
|
ch >>= 6;
|
|
|
|
case 1:
|
|
--target;
|
|
}
|
|
}
|
|
|
|
target += bytesToWrite;
|
|
}
|
|
|
|
*sourceStart = source;
|
|
*targetStart = target;
|
|
return result;
|
|
}
|
|
|
|
|
|
static BOOL isLegalUTF8(const BYTE* source, int length)
|
|
{
|
|
BYTE a;
|
|
const BYTE* srcptr = source + length;
|
|
|
|
switch (length)
|
|
{
|
|
default:
|
|
return FALSE;
|
|
|
|
/* Everything else falls through when "TRUE"... */
|
|
case 4:
|
|
if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return FALSE;
|
|
|
|
case 3:
|
|
if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return FALSE;
|
|
|
|
case 2:
|
|
if ((a = (*--srcptr)) > 0xBF) return FALSE;
|
|
|
|
switch (*source)
|
|
{
|
|
/* no fall-through in this inner switch */
|
|
case 0xE0:
|
|
if (a < 0xA0) return FALSE;
|
|
|
|
break;
|
|
|
|
case 0xED:
|
|
if (a > 0x9F) return FALSE;
|
|
|
|
break;
|
|
|
|
case 0xF0:
|
|
if (a < 0x90) return FALSE;
|
|
|
|
break;
|
|
|
|
case 0xF4:
|
|
if (a > 0x8F) return FALSE;
|
|
|
|
break;
|
|
|
|
default:
|
|
if (a < 0x80) return FALSE;
|
|
}
|
|
|
|
case 1:
|
|
if (*source >= 0x80 && *source < 0xC2) return FALSE;
|
|
}
|
|
|
|
if (*source > 0xF4)
|
|
return FALSE;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static ConversionResult _ConvertUTF8toUTF16(const BYTE** sourceStart, const BYTE* sourceEnd, WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags)
|
|
{
|
|
WCHAR* target;
|
|
const BYTE* source;
|
|
BOOL computeLength;
|
|
ConversionResult result;
|
|
computeLength = (!targetEnd) ? TRUE : FALSE;
|
|
result = conversionOK;
|
|
source = *sourceStart;
|
|
target = *targetStart;
|
|
|
|
while (source < sourceEnd)
|
|
{
|
|
DWORD ch = 0;
|
|
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
|
|
|
if ((source + extraBytesToRead) >= sourceEnd)
|
|
{
|
|
result = sourceExhausted;
|
|
break;
|
|
}
|
|
|
|
/* Do this check whether lenient or strict */
|
|
if (!isLegalUTF8(source, extraBytesToRead + 1))
|
|
{
|
|
result = sourceIllegal;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* The cases all fall through. See "Note A" below.
|
|
*/
|
|
switch (extraBytesToRead)
|
|
{
|
|
case 5:
|
|
ch += *source++;
|
|
ch <<= 6; /* remember, illegal UTF-8 */
|
|
|
|
case 4:
|
|
ch += *source++;
|
|
ch <<= 6; /* remember, illegal UTF-8 */
|
|
|
|
case 3:
|
|
ch += *source++;
|
|
ch <<= 6;
|
|
|
|
case 2:
|
|
ch += *source++;
|
|
ch <<= 6;
|
|
|
|
case 1:
|
|
ch += *source++;
|
|
ch <<= 6;
|
|
|
|
case 0:
|
|
ch += *source++;
|
|
}
|
|
|
|
ch -= offsetsFromUTF8[extraBytesToRead];
|
|
|
|
if ((target >= targetEnd) && (!computeLength))
|
|
{
|
|
source -= (extraBytesToRead + 1); /* Back up source pointer! */
|
|
result = targetExhausted;
|
|
break;
|
|
}
|
|
|
|
if (ch <= UNI_MAX_BMP)
|
|
{
|
|
/* Target is a character <= 0xFFFF */
|
|
/* UTF-16 surrogate values are illegal in UTF-32 */
|
|
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
|
|
{
|
|
if (flags == strictConversion)
|
|
{
|
|
source -= (extraBytesToRead + 1); /* return to the illegal value itself */
|
|
result = sourceIllegal;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
if (!computeLength)
|
|
*target++ = UNI_REPLACEMENT_CHAR;
|
|
else
|
|
target++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (!computeLength)
|
|
*target++ = (WCHAR)ch; /* normal case */
|
|
else
|
|
target++;
|
|
}
|
|
}
|
|
else if (ch > UNI_MAX_UTF16)
|
|
{
|
|
if (flags == strictConversion)
|
|
{
|
|
result = sourceIllegal;
|
|
source -= (extraBytesToRead + 1); /* return to the start */
|
|
break; /* Bail out; shouldn't continue */
|
|
}
|
|
else
|
|
{
|
|
if (!computeLength)
|
|
*target++ = UNI_REPLACEMENT_CHAR;
|
|
else
|
|
target++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
|
if ((target + 1 >= targetEnd) && (!computeLength))
|
|
{
|
|
source -= (extraBytesToRead + 1); /* Back up source pointer! */
|
|
result = targetExhausted;
|
|
break;
|
|
}
|
|
|
|
ch -= halfBase;
|
|
|
|
if (!computeLength)
|
|
{
|
|
*target++ = (WCHAR)((ch >> halfShift) + UNI_SUR_HIGH_START);
|
|
*target++ = (WCHAR)((ch & halfMask) + UNI_SUR_LOW_START);
|
|
}
|
|
else
|
|
{
|
|
target++;
|
|
target++;
|
|
}
|
|
}
|
|
}
|
|
|
|
*sourceStart = source;
|
|
*targetStart = target;
|
|
return result;
|
|
}
|
|
|
|
static int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar)
|
|
{
|
|
int length;
|
|
LPWSTR targetStart;
|
|
const BYTE* sourceStart;
|
|
ConversionResult result;
|
|
|
|
/* If cbMultiByte is 0, the function fails */
|
|
|
|
if (cbMultiByte == 0)
|
|
return 0;
|
|
|
|
/* If cbMultiByte is -1, the string is null-terminated */
|
|
|
|
if (cbMultiByte == -1)
|
|
cbMultiByte = (int)strlen((char*)lpMultiByteStr) + 1;
|
|
|
|
/*
|
|
* if cchWideChar is 0, the function returns the required buffer size
|
|
* in characters for lpWideCharStr and makes no use of the output parameter itself.
|
|
*/
|
|
|
|
if (cchWideChar == 0)
|
|
{
|
|
sourceStart = (const BYTE*)lpMultiByteStr;
|
|
targetStart = (WCHAR*)NULL;
|
|
|
|
result = _ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
|
|
&targetStart, NULL, strictConversion);
|
|
|
|
length = (int)(targetStart - ((WCHAR*)NULL));
|
|
cchWideChar = length;
|
|
}
|
|
else
|
|
{
|
|
sourceStart = (const BYTE*)lpMultiByteStr;
|
|
targetStart = lpWideCharStr;
|
|
|
|
result = _ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
|
|
&targetStart, &targetStart[cchWideChar], strictConversion);
|
|
|
|
length = (int)(targetStart - ((WCHAR*)lpWideCharStr));
|
|
cchWideChar = length;
|
|
}
|
|
|
|
return cchWideChar;
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
bool ex_utf8_to_utf16le(const std::string& from, ex_str_utf16le& to)
|
|
{
|
|
int iSize = MultiByteToWideChar(CP_UTF8, 0, from.c_str(), -1, NULL, 0);
|
|
if (iSize <= 0)
|
|
return false;
|
|
|
|
//++iSize;
|
|
to.resize(iSize);
|
|
memset(&to[0], 0, sizeof(ex_utf16));
|
|
|
|
MultiByteToWideChar(CP_UTF8, 0, from.c_str(), -1, &to[0], iSize);
|
|
|
|
return true;
|
|
}
|
|
|
|
#endif
|