diff --git a/PowerEditor/src/uchardet/LangBulgarianModel.cpp b/PowerEditor/src/uchardet/LangBulgarianModel.cpp
index 0f73282b8..776866073 100644
--- a/PowerEditor/src/uchardet/LangBulgarianModel.cpp
+++ b/PowerEditor/src/uchardet/LangBulgarianModel.cpp
@@ -226,20 +226,18 @@ static const PRUint8 BulgarianLangModel[] =
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
};
-const SequenceModel Latin5BulgarianModel =
-{
+const SequenceModel Latin5BulgarianModel(
Latin5_BulgarianCharToOrderMap,
BulgarianLangModel,
(float)0.969392,
PR_FALSE,
"ISO-8859-5"
-};
+);
-const SequenceModel Win1251BulgarianModel =
-{
+const SequenceModel Win1251BulgarianModel(
win1251BulgarianCharToOrderMap,
BulgarianLangModel,
(float)0.969392,
PR_FALSE,
"windows-1251"
-};
+);
diff --git a/PowerEditor/src/uchardet/LangCyrillicModel.cpp b/PowerEditor/src/uchardet/LangCyrillicModel.cpp
index d8e73e8a9..42f28876b 100644
--- a/PowerEditor/src/uchardet/LangCyrillicModel.cpp
+++ b/PowerEditor/src/uchardet/LangCyrillicModel.cpp
@@ -300,56 +300,50 @@ static const PRUint8 RussianLangModel[] =
};
-const SequenceModel Koi8rModel =
-{
+const SequenceModel Koi8rModel(
KOI8R_CharToOrderMap,
RussianLangModel,
(float)0.976601,
PR_FALSE,
"KOI8-R"
-};
+);
-const SequenceModel Win1251Model =
-{
+const SequenceModel Win1251Model(
win1251_CharToOrderMap,
RussianLangModel,
(float)0.976601,
PR_FALSE,
"windows-1251"
-};
+);
-const SequenceModel Latin5Model =
-{
+const SequenceModel Latin5Model(
latin5_CharToOrderMap,
RussianLangModel,
(float)0.976601,
PR_FALSE,
"ISO-8859-5"
-};
+);
-const SequenceModel MacCyrillicModel =
-{
+const SequenceModel MacCyrillicModel(
macCyrillic_CharToOrderMap,
RussianLangModel,
(float)0.976601,
PR_FALSE,
"x-mac-cyrillic"
-};
+);
-const SequenceModel Ibm866Model =
-{
+const SequenceModel Ibm866Model(
IBM866_CharToOrderMap,
RussianLangModel,
(float)0.976601,
PR_FALSE,
"IBM866"
-};
+);
-const SequenceModel Ibm855Model =
-{
+const SequenceModel Ibm855Model(
IBM855_CharToOrderMap,
RussianLangModel,
(float)0.976601,
PR_FALSE,
"IBM855"
-};
+);
diff --git a/PowerEditor/src/uchardet/LangGreekModel.cpp b/PowerEditor/src/uchardet/LangGreekModel.cpp
index 30c65dc7f..d90ced9d3 100644
--- a/PowerEditor/src/uchardet/LangGreekModel.cpp
+++ b/PowerEditor/src/uchardet/LangGreekModel.cpp
@@ -225,20 +225,18 @@ static const PRUint8 GreekLangModel[] =
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
-const SequenceModel Latin7Model =
-{
+const SequenceModel Latin7Model (
Latin7_CharToOrderMap,
GreekLangModel,
(float)0.982851,
PR_FALSE,
"ISO-8859-7"
-};
+);
-const SequenceModel Win1253Model =
-{
+const SequenceModel Win1253Model(
win1253_CharToOrderMap,
GreekLangModel,
(float)0.982851,
PR_FALSE,
"windows-1253"
-};
+);
diff --git a/PowerEditor/src/uchardet/LangHebrewModel.cpp b/PowerEditor/src/uchardet/LangHebrewModel.cpp
index a4e10addb..99a36e724 100644
--- a/PowerEditor/src/uchardet/LangHebrewModel.cpp
+++ b/PowerEditor/src/uchardet/LangHebrewModel.cpp
@@ -208,12 +208,10 @@ static const PRUint8 HebrewLangModel[] =
0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
};
-const SequenceModel Win1255Model =
-{
+const SequenceModel Win1255Model(
win1255_CharToOrderMap,
HebrewLangModel,
(float)0.984004,
PR_FALSE,
- "windows-1255"
-};
+ "windows-1255");
diff --git a/PowerEditor/src/uchardet/LangHungarianModel.cpp b/PowerEditor/src/uchardet/LangHungarianModel.cpp
index 3af2f5882..856644afe 100644
--- a/PowerEditor/src/uchardet/LangHungarianModel.cpp
+++ b/PowerEditor/src/uchardet/LangHungarianModel.cpp
@@ -223,20 +223,16 @@ static const PRUint8 HungarianLangModel[] =
0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
};
-const SequenceModel Latin2HungarianModel =
-{
+const SequenceModel Latin2HungarianModel(
Latin2_HungarianCharToOrderMap,
HungarianLangModel,
(float)0.947368,
PR_TRUE,
- "ISO-8859-2"
-};
+ "ISO-8859-2");
-const SequenceModel Win1250HungarianModel =
-{
+const SequenceModel Win1250HungarianModel(
win1250HungarianCharToOrderMap,
HungarianLangModel,
(float)0.947368,
PR_TRUE,
- "windows-1250"
-};
+ "windows-1250");
diff --git a/PowerEditor/src/uchardet/LangThaiModel.cpp b/PowerEditor/src/uchardet/LangThaiModel.cpp
index 8145ffa1b..4c66699e9 100644
--- a/PowerEditor/src/uchardet/LangThaiModel.cpp
+++ b/PowerEditor/src/uchardet/LangThaiModel.cpp
@@ -208,14 +208,13 @@ static const PRUint8 ThaiLangModel[] =
0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-};
-
-
-const SequenceModel TIS620ThaiModel =
-{
- TIS620CharToOrderMap,
- ThaiLangModel,
- (float)0.926386,
- PR_FALSE,
- "TIS-620"
-};
+};
+
+
+const SequenceModel TIS620ThaiModel(
+ TIS620CharToOrderMap,
+ ThaiLangModel,
+ (float)0.926386,
+ PR_FALSE,
+ "TIS-620"
+);
diff --git a/PowerEditor/src/uchardet/nsCodingStateMachine.h b/PowerEditor/src/uchardet/nsCodingStateMachine.h
index 819f9ab07..07eadee9d 100644
--- a/PowerEditor/src/uchardet/nsCodingStateMachine.h
+++ b/PowerEditor/src/uchardet/nsCodingStateMachine.h
@@ -1,104 +1,107 @@
-/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * The Original Code is mozilla.org code.
- *
- * The Initial Developer of the Original Code is
- * Netscape Communications Corporation.
- * Portions created by the Initial Developer are Copyright (C) 1998
- * the Initial Developer. All Rights Reserved.
- *
- * Contributor(s):
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-#ifndef nsCodingStateMachine_h__
-#define nsCodingStateMachine_h__
-
-#include "nsPkgInt.h"
-
-typedef enum {
- eStart = 0,
- eError = 1,
- eItsMe = 2
-} nsSMState;
-
-#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable)
-
-//state machine model
-typedef struct
-{
- nsPkgInt classTable;
- PRUint32 classFactor;
- nsPkgInt stateTable;
- const PRUint32* charLenTable;
- const char* name;
-} SMModel;
-
-class nsCodingStateMachine {
-public:
- nsCodingStateMachine(const SMModel* sm) : mModel(sm) { mCurrentState = eStart; }
- nsSMState NextState(char c){
- //for each byte we get its class , if it is first byte, we also get byte length
- PRUint32 byteCls = GETCLASS(c);
- if (mCurrentState == eStart)
- {
- mCurrentBytePos = 0;
- mCurrentCharLen = mModel->charLenTable[byteCls];
- }
- //from byte's class and stateTable, we get its next state
- mCurrentState=(nsSMState)GETFROMPCK(mCurrentState*(mModel->classFactor)+byteCls,
- mModel->stateTable);
- mCurrentBytePos++;
- return mCurrentState;
- }
- PRUint32 GetCurrentCharLen(void) {return mCurrentCharLen;}
- void Reset(void) {mCurrentState = eStart;}
- const char * GetCodingStateMachine() {return mModel->name;}
-
-protected:
- nsSMState mCurrentState;
- PRUint32 mCurrentCharLen;
- PRUint32 mCurrentBytePos;
-
- const SMModel *mModel;
-};
-
-extern const SMModel UTF8SMModel;
-extern const SMModel Big5SMModel;
-extern const SMModel EUCJPSMModel;
-extern const SMModel EUCKRSMModel;
-extern const SMModel EUCTWSMModel;
-extern const SMModel GB18030SMModel;
-extern const SMModel SJISSMModel;
-
-
-extern const SMModel HZSMModel;
-extern const SMModel ISO2022CNSMModel;
-extern const SMModel ISO2022JPSMModel;
-extern const SMModel ISO2022KRSMModel;
-
-#endif /* nsCodingStateMachine_h__ */
-
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1998
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+#ifndef nsCodingStateMachine_h__
+#define nsCodingStateMachine_h__
+
+#include "nsPkgInt.h"
+
+typedef enum {
+ eStart = 0,
+ eError = 1,
+ eItsMe = 2
+} nsSMState;
+
+#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable)
+
+//state machine model
+struct SMModel
+{
+ nsPkgInt classTable;
+ PRUint32 classFactor;
+ nsPkgInt stateTable;
+ const PRUint32* charLenTable;
+ const char* name;
+ SMModel(){};
+ SMModel(nsPkgInt a,PRUint32 b,nsPkgInt c,const PRUint32* d, const char* e):
+ classTable(a), classFactor(b), stateTable(c), charLenTable(d), name(e){};
+} ;
+
+class nsCodingStateMachine {
+public:
+ nsCodingStateMachine(const SMModel* sm) : mModel(sm) { mCurrentState = eStart; }
+ nsSMState NextState(char c){
+ //for each byte we get its class , if it is first byte, we also get byte length
+ PRUint32 byteCls = GETCLASS(c);
+ if (mCurrentState == eStart)
+ {
+ mCurrentBytePos = 0;
+ mCurrentCharLen = mModel->charLenTable[byteCls];
+ }
+ //from byte's class and stateTable, we get its next state
+ mCurrentState=(nsSMState)GETFROMPCK(mCurrentState*(mModel->classFactor)+byteCls,
+ mModel->stateTable);
+ mCurrentBytePos++;
+ return mCurrentState;
+ }
+ PRUint32 GetCurrentCharLen(void) {return mCurrentCharLen;}
+ void Reset(void) {mCurrentState = eStart;}
+ const char * GetCodingStateMachine() {return mModel->name;}
+
+protected:
+ nsSMState mCurrentState;
+ PRUint32 mCurrentCharLen;
+ PRUint32 mCurrentBytePos;
+
+ const SMModel *mModel;
+};
+
+extern const SMModel UTF8SMModel;
+extern const SMModel Big5SMModel;
+extern const SMModel EUCJPSMModel;
+extern const SMModel EUCKRSMModel;
+extern const SMModel EUCTWSMModel;
+extern const SMModel GB18030SMModel;
+extern const SMModel SJISSMModel;
+
+
+extern const SMModel HZSMModel;
+extern const SMModel ISO2022CNSMModel;
+extern const SMModel ISO2022JPSMModel;
+extern const SMModel ISO2022KRSMModel;
+
+#endif /* nsCodingStateMachine_h__ */
+
diff --git a/PowerEditor/src/uchardet/nsEscSM.cpp b/PowerEditor/src/uchardet/nsEscSM.cpp
index eed1b7cf8..fdef171c2 100644
--- a/PowerEditor/src/uchardet/nsEscSM.cpp
+++ b/PowerEditor/src/uchardet/nsEscSM.cpp
@@ -83,13 +83,12 @@ PCK4BITS( 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
static const PRUint32 HZCharLenTable[] = {0, 0, 0, 0, 0, 0};
-const SMModel HZSMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls },
+const SMModel HZSMModel(
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls),
6,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st },
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st),
HZCharLenTable,
- "HZ-GB-2312",
-};
+ "HZ-GB-2312");
static const PRUint32 ISO2022CN_cls [ 256 / 8 ] = {
@@ -141,13 +140,12 @@ PCK4BITS(eError,eError,eError,eError,eError,eItsMe,eError,eStart) //38-3f
static const PRUint32 ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
-const SMModel ISO2022CNSMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls },
+const SMModel ISO2022CNSMModel(
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls),
9,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st },
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st),
ISO2022CNCharLenTable,
- "ISO-2022-CN",
-};
+ "ISO-2022-CN");
static const PRUint32 ISO2022JP_cls [ 256 / 8 ] = {
PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
@@ -199,13 +197,12 @@ PCK4BITS(eError,eError,eError,eError,eItsMe,eError,eStart,eStart) //40-47
static const PRUint32 ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0};
-const SMModel ISO2022JPSMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls },
+const SMModel ISO2022JPSMModel(
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls),
10,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st },
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st),
ISO2022JPCharLenTable,
- "ISO-2022-JP",
-};
+ "ISO-2022-JP");
static const PRUint32 ISO2022KR_cls [ 256 / 8 ] = {
PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
@@ -253,11 +250,10 @@ PCK4BITS(eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart) //20-27
static const PRUint32 ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0};
-const SMModel ISO2022KRSMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls },
+const SMModel ISO2022KRSMModel(
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls),
6,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st },
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st),
ISO2022KRCharLenTable,
- "ISO-2022-KR",
-};
+ "ISO-2022-KR");
diff --git a/PowerEditor/src/uchardet/nsMBCSSM.cpp b/PowerEditor/src/uchardet/nsMBCSSM.cpp
index 584e93182..e18c7cc83 100644
--- a/PowerEditor/src/uchardet/nsMBCSSM.cpp
+++ b/PowerEditor/src/uchardet/nsMBCSSM.cpp
@@ -1,513 +1,506 @@
-/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * The Original Code is mozilla.org code.
- *
- * The Initial Developer of the Original Code is
- * Netscape Communications Corporation.
- * Portions created by the Initial Developer are Copyright (C) 1998
- * the Initial Developer. All Rights Reserved.
- *
- * Contributor(s):
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-#include "nsCodingStateMachine.h"
-
-/*
-Modification from frank tang's original work:
-. 0x00 is allowed as a legal character. Since some web pages contains this char in
- text stream.
-*/
-
-// BIG5
-
-static const PRUint32 BIG5_cls [ 256 / 8 ] = {
-//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
-PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as legal value
-PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
-PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
-PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
-PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
-PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
-PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
-PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
-PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
-PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
-PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
-PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
-PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
-PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
-PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
-PCK4BITS(2,2,2,2,2,2,2,1), // 78 - 7f
-PCK4BITS(4,4,4,4,4,4,4,4), // 80 - 87
-PCK4BITS(4,4,4,4,4,4,4,4), // 88 - 8f
-PCK4BITS(4,4,4,4,4,4,4,4), // 90 - 97
-PCK4BITS(4,4,4,4,4,4,4,4), // 98 - 9f
-PCK4BITS(4,3,3,3,3,3,3,3), // a0 - a7
-PCK4BITS(3,3,3,3,3,3,3,3), // a8 - af
-PCK4BITS(3,3,3,3,3,3,3,3), // b0 - b7
-PCK4BITS(3,3,3,3,3,3,3,3), // b8 - bf
-PCK4BITS(3,3,3,3,3,3,3,3), // c0 - c7
-PCK4BITS(3,3,3,3,3,3,3,3), // c8 - cf
-PCK4BITS(3,3,3,3,3,3,3,3), // d0 - d7
-PCK4BITS(3,3,3,3,3,3,3,3), // d8 - df
-PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7
-PCK4BITS(3,3,3,3,3,3,3,3), // e8 - ef
-PCK4BITS(3,3,3,3,3,3,3,3), // f0 - f7
-PCK4BITS(3,3,3,3,3,3,3,0) // f8 - ff
-};
-
-
-static const PRUint32 BIG5_st [ 3] = {
-PCK4BITS(eError,eStart,eStart, 3,eError,eError,eError,eError),//00-07
-PCK4BITS(eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError),//08-0f
-PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart) //10-17
-};
-
-static const PRUint32 Big5CharLenTable[] = {0, 1, 1, 2, 0};
-
-SMModel const Big5SMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_cls },
- 5,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_st },
- Big5CharLenTable,
- "Big5",
-};
-
-static const PRUint32 EUCJP_cls [ 256 / 8 ] = {
-//PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07
-PCK4BITS(4,4,4,4,4,4,4,4), // 00 - 07
-PCK4BITS(4,4,4,4,4,4,5,5), // 08 - 0f
-PCK4BITS(4,4,4,4,4,4,4,4), // 10 - 17
-PCK4BITS(4,4,4,5,4,4,4,4), // 18 - 1f
-PCK4BITS(4,4,4,4,4,4,4,4), // 20 - 27
-PCK4BITS(4,4,4,4,4,4,4,4), // 28 - 2f
-PCK4BITS(4,4,4,4,4,4,4,4), // 30 - 37
-PCK4BITS(4,4,4,4,4,4,4,4), // 38 - 3f
-PCK4BITS(4,4,4,4,4,4,4,4), // 40 - 47
-PCK4BITS(4,4,4,4,4,4,4,4), // 48 - 4f
-PCK4BITS(4,4,4,4,4,4,4,4), // 50 - 57
-PCK4BITS(4,4,4,4,4,4,4,4), // 58 - 5f
-PCK4BITS(4,4,4,4,4,4,4,4), // 60 - 67
-PCK4BITS(4,4,4,4,4,4,4,4), // 68 - 6f
-PCK4BITS(4,4,4,4,4,4,4,4), // 70 - 77
-PCK4BITS(4,4,4,4,4,4,4,4), // 78 - 7f
-PCK4BITS(5,5,5,5,5,5,5,5), // 80 - 87
-PCK4BITS(5,5,5,5,5,5,1,3), // 88 - 8f
-PCK4BITS(5,5,5,5,5,5,5,5), // 90 - 97
-PCK4BITS(5,5,5,5,5,5,5,5), // 98 - 9f
-PCK4BITS(5,2,2,2,2,2,2,2), // a0 - a7
-PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
-PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
-PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
-PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
-PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
-PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
-PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
-PCK4BITS(0,0,0,0,0,0,0,0), // e0 - e7
-PCK4BITS(0,0,0,0,0,0,0,0), // e8 - ef
-PCK4BITS(0,0,0,0,0,0,0,0), // f0 - f7
-PCK4BITS(0,0,0,0,0,0,0,5) // f8 - ff
-};
-
-
-static const PRUint32 EUCJP_st [ 5] = {
-PCK4BITS( 3, 4, 3, 5,eStart,eError,eError,eError),//00-07
-PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
-PCK4BITS(eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError),//10-17
-PCK4BITS(eError,eError,eStart,eError,eError,eError, 3,eError),//18-1f
-PCK4BITS( 3,eError,eError,eError,eStart,eStart,eStart,eStart) //20-27
-};
-
-static const PRUint32 EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
-
-const SMModel EUCJPSMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls },
- 6,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st },
- EUCJPCharLenTable,
- "EUC-JP",
-};
-
-static const PRUint32 EUCKR_cls [ 256 / 8 ] = {
-//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
-PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
-PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
-PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
-PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
-PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
-PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
-PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
-PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
-PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47
-PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f
-PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57
-PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f
-PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67
-PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f
-PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77
-PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f
-PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87
-PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f
-PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97
-PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f
-PCK4BITS(0,2,2,2,2,2,2,2), // a0 - a7
-PCK4BITS(2,2,2,2,2,3,3,3), // a8 - af
-PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
-PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
-PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
-PCK4BITS(2,3,2,2,2,2,2,2), // c8 - cf
-PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
-PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
-PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
-PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
-PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
-PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff
-};
-
-
-static const PRUint32 EUCKR_st [ 2] = {
-PCK4BITS(eError,eStart, 3,eError,eError,eError,eError,eError),//00-07
-PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f
-};
-
-static const PRUint32 EUCKRCharLenTable[] = {0, 1, 2, 0};
-
-const SMModel EUCKRSMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_cls },
- 4,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_st },
- EUCKRCharLenTable,
- "EUC-KR",
-};
-
-static const PRUint32 EUCTW_cls [ 256 / 8 ] = {
-//PCK4BITS(0,2,2,2,2,2,2,2), // 00 - 07
-PCK4BITS(2,2,2,2,2,2,2,2), // 00 - 07
-PCK4BITS(2,2,2,2,2,2,0,0), // 08 - 0f
-PCK4BITS(2,2,2,2,2,2,2,2), // 10 - 17
-PCK4BITS(2,2,2,0,2,2,2,2), // 18 - 1f
-PCK4BITS(2,2,2,2,2,2,2,2), // 20 - 27
-PCK4BITS(2,2,2,2,2,2,2,2), // 28 - 2f
-PCK4BITS(2,2,2,2,2,2,2,2), // 30 - 37
-PCK4BITS(2,2,2,2,2,2,2,2), // 38 - 3f
-PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
-PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
-PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
-PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
-PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
-PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
-PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
-PCK4BITS(2,2,2,2,2,2,2,2), // 78 - 7f
-PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87
-PCK4BITS(0,0,0,0,0,0,6,0), // 88 - 8f
-PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97
-PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f
-PCK4BITS(0,3,4,4,4,4,4,4), // a0 - a7
-PCK4BITS(5,5,1,1,1,1,1,1), // a8 - af
-PCK4BITS(1,1,1,1,1,1,1,1), // b0 - b7
-PCK4BITS(1,1,1,1,1,1,1,1), // b8 - bf
-PCK4BITS(1,1,3,1,3,3,3,3), // c0 - c7
-PCK4BITS(3,3,3,3,3,3,3,3), // c8 - cf
-PCK4BITS(3,3,3,3,3,3,3,3), // d0 - d7
-PCK4BITS(3,3,3,3,3,3,3,3), // d8 - df
-PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7
-PCK4BITS(3,3,3,3,3,3,3,3), // e8 - ef
-PCK4BITS(3,3,3,3,3,3,3,3), // f0 - f7
-PCK4BITS(3,3,3,3,3,3,3,0) // f8 - ff
-};
-
-
-static const PRUint32 EUCTW_st [ 6] = {
-PCK4BITS(eError,eError,eStart, 3, 3, 3, 4,eError),//00-07
-PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f
-PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError),//10-17
-PCK4BITS(eStart,eStart,eStart,eError,eError,eError,eError,eError),//18-1f
-PCK4BITS( 5,eError,eError,eError,eStart,eError,eStart,eStart),//20-27
-PCK4BITS(eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
-};
-
-static const PRUint32 EUCTWCharLenTable[] = {0, 0, 1, 2, 2, 2, 3};
-
-const SMModel EUCTWSMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_cls },
- 7,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_st },
- EUCTWCharLenTable,
- "x-euc-tw",
-};
-
-/* obsolete GB2312 by gb18030
-static PRUint32 GB2312_cls [ 256 / 8 ] = {
-//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
-PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
-PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
-PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
-PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
-PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
-PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
-PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
-PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
-PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47
-PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f
-PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57
-PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f
-PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67
-PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f
-PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77
-PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f
-PCK4BITS(1,0,0,0,0,0,0,0), // 80 - 87
-PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f
-PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97
-PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f
-PCK4BITS(0,2,2,2,2,2,2,2), // a0 - a7
-PCK4BITS(2,2,3,3,3,3,3,3), // a8 - af
-PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
-PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
-PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
-PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
-PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
-PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
-PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
-PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
-PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
-PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff
-};
-
-
-static PRUint32 GB2312_st [ 2] = {
-PCK4BITS(eError,eStart, 3,eError,eError,eError,eError,eError),//00-07
-PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f
-};
-
-static const PRUint32 GB2312CharLenTable[] = {0, 1, 2, 0};
-
-SMModel GB2312SMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_cls },
- 4,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_st },
- GB2312CharLenTable,
- "GB2312",
-};
-*/
-
-// the following state machine data was created by perl script in
-// intl/chardet/tools. It should be the same as in PSM detector.
-static const PRUint32 GB18030_cls [ 256 / 8 ] = {
-PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
-PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
-PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
-PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
-PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
-PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
-PCK4BITS(3,3,3,3,3,3,3,3), // 30 - 37
-PCK4BITS(3,3,1,1,1,1,1,1), // 38 - 3f
-PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
-PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
-PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
-PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
-PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
-PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
-PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
-PCK4BITS(2,2,2,2,2,2,2,4), // 78 - 7f
-PCK4BITS(5,6,6,6,6,6,6,6), // 80 - 87
-PCK4BITS(6,6,6,6,6,6,6,6), // 88 - 8f
-PCK4BITS(6,6,6,6,6,6,6,6), // 90 - 97
-PCK4BITS(6,6,6,6,6,6,6,6), // 98 - 9f
-PCK4BITS(6,6,6,6,6,6,6,6), // a0 - a7
-PCK4BITS(6,6,6,6,6,6,6,6), // a8 - af
-PCK4BITS(6,6,6,6,6,6,6,6), // b0 - b7
-PCK4BITS(6,6,6,6,6,6,6,6), // b8 - bf
-PCK4BITS(6,6,6,6,6,6,6,6), // c0 - c7
-PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf
-PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7
-PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df
-PCK4BITS(6,6,6,6,6,6,6,6), // e0 - e7
-PCK4BITS(6,6,6,6,6,6,6,6), // e8 - ef
-PCK4BITS(6,6,6,6,6,6,6,6), // f0 - f7
-PCK4BITS(6,6,6,6,6,6,6,0) // f8 - ff
-};
-
-
-static const PRUint32 GB18030_st [ 6] = {
-PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart, 3,eError),//00-07
-PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f
-PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart),//10-17
-PCK4BITS( 4,eError,eStart,eStart,eError,eError,eError,eError),//18-1f
-PCK4BITS(eError,eError, 5,eError,eError,eError,eItsMe,eError),//20-27
-PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
-};
-
-// To be accurate, the length of class 6 can be either 2 or 4.
-// But it is not necessary to discriminate between the two since
-// it is used for frequency analysis only, and we are validing
-// each code range there as well. So it is safe to set it to be
-// 2 here.
-static const PRUint32 GB18030CharLenTable[] = {0, 1, 1, 1, 1, 1, 2};
-
-const SMModel GB18030SMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls },
- 7,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st },
- GB18030CharLenTable,
- "GB18030",
-};
-
-// sjis
-
-static const PRUint32 SJIS_cls [ 256 / 8 ] = {
-//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
-PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
-PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
-PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
-PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
-PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
-PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
-PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
-PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
-PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
-PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
-PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
-PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
-PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
-PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
-PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
-PCK4BITS(2,2,2,2,2,2,2,1), // 78 - 7f
-PCK4BITS(3,3,3,3,3,3,3,3), // 80 - 87
-PCK4BITS(3,3,3,3,3,3,3,3), // 88 - 8f
-PCK4BITS(3,3,3,3,3,3,3,3), // 90 - 97
-PCK4BITS(3,3,3,3,3,3,3,3), // 98 - 9f
-//0xa0 is illegal in sjis encoding, but some pages does
-//contain such byte. We need to be more error forgiven.
-PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
-PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
-PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
-PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
-PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
-PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
-PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
-PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
-PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7
-PCK4BITS(3,3,3,3,3,4,4,4), // e8 - ef
-PCK4BITS(4,4,4,4,4,4,4,4), // f0 - f7
-PCK4BITS(4,4,4,4,4,0,0,0) // f8 - ff
-};
-
-
-static const PRUint32 SJIS_st [ 3] = {
-PCK4BITS(eError,eStart,eStart, 3,eError,eError,eError,eError),//00-07
-PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
-PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart) //10-17
-};
-
-static const PRUint32 SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
-
-const SMModel SJISSMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls },
- 6,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st },
- SJISCharLenTable,
- "Shift_JIS",
-};
-
-
-static const PRUint32 UTF8_cls [ 256 / 8 ] = {
-//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
-PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as a legal value
-PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
-PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
-PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
-PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
-PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
-PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
-PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
-PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47
-PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f
-PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57
-PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f
-PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67
-PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f
-PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77
-PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f
-PCK4BITS(2,2,2,2,3,3,3,3), // 80 - 87
-PCK4BITS(4,4,4,4,4,4,4,4), // 88 - 8f
-PCK4BITS(4,4,4,4,4,4,4,4), // 90 - 97
-PCK4BITS(4,4,4,4,4,4,4,4), // 98 - 9f
-PCK4BITS(5,5,5,5,5,5,5,5), // a0 - a7
-PCK4BITS(5,5,5,5,5,5,5,5), // a8 - af
-PCK4BITS(5,5,5,5,5,5,5,5), // b0 - b7
-PCK4BITS(5,5,5,5,5,5,5,5), // b8 - bf
-PCK4BITS(0,0,6,6,6,6,6,6), // c0 - c7
-PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf
-PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7
-PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df
-PCK4BITS(7,8,8,8,8,8,8,8), // e0 - e7
-PCK4BITS(8,8,8,8,8,9,8,8), // e8 - ef
-PCK4BITS(10,11,11,11,11,11,11,11), // f0 - f7
-PCK4BITS(12,13,13,13,14,15,0,0) // f8 - ff
-};
-
-
-static const PRUint32 UTF8_st [ 26] = {
-PCK4BITS(eError,eStart,eError,eError,eError,eError, 12, 10),//00-07
-PCK4BITS( 9, 11, 8, 7, 6, 5, 4, 3),//08-0f
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//10-17
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//18-1f
-PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//20-27
-PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//28-2f
-PCK4BITS(eError,eError, 5, 5, 5, 5,eError,eError),//30-37
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//38-3f
-PCK4BITS(eError,eError,eError, 5, 5, 5,eError,eError),//40-47
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//48-4f
-PCK4BITS(eError,eError, 7, 7, 7, 7,eError,eError),//50-57
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//58-5f
-PCK4BITS(eError,eError,eError,eError, 7, 7,eError,eError),//60-67
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//68-6f
-PCK4BITS(eError,eError, 9, 9, 9, 9,eError,eError),//70-77
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//78-7f
-PCK4BITS(eError,eError,eError,eError,eError, 9,eError,eError),//80-87
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//88-8f
-PCK4BITS(eError,eError, 12, 12, 12, 12,eError,eError),//90-97
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//98-9f
-PCK4BITS(eError,eError,eError,eError,eError, 12,eError,eError),//a0-a7
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//a8-af
-PCK4BITS(eError,eError, 12, 12, 12,eError,eError,eError),//b0-b7
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//b8-bf
-PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eError,eError),//c0-c7
-PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError) //c8-cf
-};
-
-static const PRUint32 UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3,
- 3, 3, 4, 4, 5, 5, 6, 6 };
-
-const SMModel UTF8SMModel = {
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls },
- 16,
- {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st },
- UTF8CharLenTable,
- "UTF-8",
-};
-
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1998
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+#include "nsCodingStateMachine.h"
+
+/*
+Modification from frank tang's original work:
+. 0x00 is allowed as a legal character. Since some web pages contains this char in
+ text stream.
+*/
+
+// BIG5
+
+static const PRUint32 BIG5_cls [ 256 / 8 ] = {
+//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
+PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as legal value
+PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
+PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
+PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
+PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
+PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
+PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
+PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
+PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
+PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
+PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
+PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
+PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
+PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
+PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
+PCK4BITS(2,2,2,2,2,2,2,1), // 78 - 7f
+PCK4BITS(4,4,4,4,4,4,4,4), // 80 - 87
+PCK4BITS(4,4,4,4,4,4,4,4), // 88 - 8f
+PCK4BITS(4,4,4,4,4,4,4,4), // 90 - 97
+PCK4BITS(4,4,4,4,4,4,4,4), // 98 - 9f
+PCK4BITS(4,3,3,3,3,3,3,3), // a0 - a7
+PCK4BITS(3,3,3,3,3,3,3,3), // a8 - af
+PCK4BITS(3,3,3,3,3,3,3,3), // b0 - b7
+PCK4BITS(3,3,3,3,3,3,3,3), // b8 - bf
+PCK4BITS(3,3,3,3,3,3,3,3), // c0 - c7
+PCK4BITS(3,3,3,3,3,3,3,3), // c8 - cf
+PCK4BITS(3,3,3,3,3,3,3,3), // d0 - d7
+PCK4BITS(3,3,3,3,3,3,3,3), // d8 - df
+PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7
+PCK4BITS(3,3,3,3,3,3,3,3), // e8 - ef
+PCK4BITS(3,3,3,3,3,3,3,3), // f0 - f7
+PCK4BITS(3,3,3,3,3,3,3,0) // f8 - ff
+};
+
+
+static const PRUint32 BIG5_st [ 3] = {
+PCK4BITS(eError,eStart,eStart, 3,eError,eError,eError,eError),//00-07
+PCK4BITS(eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError),//08-0f
+PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart) //10-17
+};
+
+static const PRUint32 Big5CharLenTable[] = {0, 1, 1, 2, 0};
+
+const SMModel Big5SMModel(
+nsPkgInt( eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_cls ),
+ 5,
+nsPkgInt( eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_st ),
+ Big5CharLenTable,
+"Big5");
+
+static const PRUint32 EUCJP_cls [ 256 / 8 ] = {
+//PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07
+PCK4BITS(4,4,4,4,4,4,4,4), // 00 - 07
+PCK4BITS(4,4,4,4,4,4,5,5), // 08 - 0f
+PCK4BITS(4,4,4,4,4,4,4,4), // 10 - 17
+PCK4BITS(4,4,4,5,4,4,4,4), // 18 - 1f
+PCK4BITS(4,4,4,4,4,4,4,4), // 20 - 27
+PCK4BITS(4,4,4,4,4,4,4,4), // 28 - 2f
+PCK4BITS(4,4,4,4,4,4,4,4), // 30 - 37
+PCK4BITS(4,4,4,4,4,4,4,4), // 38 - 3f
+PCK4BITS(4,4,4,4,4,4,4,4), // 40 - 47
+PCK4BITS(4,4,4,4,4,4,4,4), // 48 - 4f
+PCK4BITS(4,4,4,4,4,4,4,4), // 50 - 57
+PCK4BITS(4,4,4,4,4,4,4,4), // 58 - 5f
+PCK4BITS(4,4,4,4,4,4,4,4), // 60 - 67
+PCK4BITS(4,4,4,4,4,4,4,4), // 68 - 6f
+PCK4BITS(4,4,4,4,4,4,4,4), // 70 - 77
+PCK4BITS(4,4,4,4,4,4,4,4), // 78 - 7f
+PCK4BITS(5,5,5,5,5,5,5,5), // 80 - 87
+PCK4BITS(5,5,5,5,5,5,1,3), // 88 - 8f
+PCK4BITS(5,5,5,5,5,5,5,5), // 90 - 97
+PCK4BITS(5,5,5,5,5,5,5,5), // 98 - 9f
+PCK4BITS(5,2,2,2,2,2,2,2), // a0 - a7
+PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
+PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
+PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
+PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
+PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
+PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
+PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
+PCK4BITS(0,0,0,0,0,0,0,0), // e0 - e7
+PCK4BITS(0,0,0,0,0,0,0,0), // e8 - ef
+PCK4BITS(0,0,0,0,0,0,0,0), // f0 - f7
+PCK4BITS(0,0,0,0,0,0,0,5) // f8 - ff
+};
+
+
+static const PRUint32 EUCJP_st [ 5] = {
+PCK4BITS( 3, 4, 3, 5,eStart,eError,eError,eError),//00-07
+PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
+PCK4BITS(eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError),//10-17
+PCK4BITS(eError,eError,eStart,eError,eError,eError, 3,eError),//18-1f
+PCK4BITS( 3,eError,eError,eError,eStart,eStart,eStart,eStart) //20-27
+};
+
+static const PRUint32 EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
+
+const SMModel EUCJPSMModel(
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls),
+ 6,
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st),
+ EUCJPCharLenTable,
+ "EUC-JP");
+
+static const PRUint32 EUCKR_cls [ 256 / 8 ] = {
+//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
+PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
+PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
+PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
+PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
+PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
+PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
+PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
+PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
+PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47
+PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f
+PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57
+PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f
+PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67
+PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f
+PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77
+PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f
+PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87
+PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f
+PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97
+PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f
+PCK4BITS(0,2,2,2,2,2,2,2), // a0 - a7
+PCK4BITS(2,2,2,2,2,3,3,3), // a8 - af
+PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
+PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
+PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
+PCK4BITS(2,3,2,2,2,2,2,2), // c8 - cf
+PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
+PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
+PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
+PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
+PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
+PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff
+};
+
+
+static const PRUint32 EUCKR_st [ 2] = {
+PCK4BITS(eError,eStart, 3,eError,eError,eError,eError,eError),//00-07
+PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f
+};
+
+static const PRUint32 EUCKRCharLenTable[] = {0, 1, 2, 0};
+
+const SMModel EUCKRSMModel (
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_cls),
+ 4,
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_st),
+ EUCKRCharLenTable,
+ "EUC-KR");
+
+static const PRUint32 EUCTW_cls [ 256 / 8 ] = {
+//PCK4BITS(0,2,2,2,2,2,2,2), // 00 - 07
+PCK4BITS(2,2,2,2,2,2,2,2), // 00 - 07
+PCK4BITS(2,2,2,2,2,2,0,0), // 08 - 0f
+PCK4BITS(2,2,2,2,2,2,2,2), // 10 - 17
+PCK4BITS(2,2,2,0,2,2,2,2), // 18 - 1f
+PCK4BITS(2,2,2,2,2,2,2,2), // 20 - 27
+PCK4BITS(2,2,2,2,2,2,2,2), // 28 - 2f
+PCK4BITS(2,2,2,2,2,2,2,2), // 30 - 37
+PCK4BITS(2,2,2,2,2,2,2,2), // 38 - 3f
+PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
+PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
+PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
+PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
+PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
+PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
+PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
+PCK4BITS(2,2,2,2,2,2,2,2), // 78 - 7f
+PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87
+PCK4BITS(0,0,0,0,0,0,6,0), // 88 - 8f
+PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97
+PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f
+PCK4BITS(0,3,4,4,4,4,4,4), // a0 - a7
+PCK4BITS(5,5,1,1,1,1,1,1), // a8 - af
+PCK4BITS(1,1,1,1,1,1,1,1), // b0 - b7
+PCK4BITS(1,1,1,1,1,1,1,1), // b8 - bf
+PCK4BITS(1,1,3,1,3,3,3,3), // c0 - c7
+PCK4BITS(3,3,3,3,3,3,3,3), // c8 - cf
+PCK4BITS(3,3,3,3,3,3,3,3), // d0 - d7
+PCK4BITS(3,3,3,3,3,3,3,3), // d8 - df
+PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7
+PCK4BITS(3,3,3,3,3,3,3,3), // e8 - ef
+PCK4BITS(3,3,3,3,3,3,3,3), // f0 - f7
+PCK4BITS(3,3,3,3,3,3,3,0) // f8 - ff
+};
+
+
+static const PRUint32 EUCTW_st [ 6] = {
+PCK4BITS(eError,eError,eStart, 3, 3, 3, 4,eError),//00-07
+PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f
+PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError),//10-17
+PCK4BITS(eStart,eStart,eStart,eError,eError,eError,eError,eError),//18-1f
+PCK4BITS( 5,eError,eError,eError,eStart,eError,eStart,eStart),//20-27
+PCK4BITS(eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
+};
+
+static const PRUint32 EUCTWCharLenTable[] = {0, 0, 1, 2, 2, 2, 3};
+
+const SMModel EUCTWSMModel(
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_cls),
+ 7,
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_st),
+ EUCTWCharLenTable,
+ "x-euc-tw");
+
+/* obsolete GB2312 by gb18030
+static PRUint32 GB2312_cls [ 256 / 8 ] = {
+//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
+PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
+PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
+PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
+PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
+PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
+PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
+PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
+PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
+PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47
+PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f
+PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57
+PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f
+PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67
+PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f
+PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77
+PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f
+PCK4BITS(1,0,0,0,0,0,0,0), // 80 - 87
+PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f
+PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97
+PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f
+PCK4BITS(0,2,2,2,2,2,2,2), // a0 - a7
+PCK4BITS(2,2,3,3,3,3,3,3), // a8 - af
+PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
+PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
+PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
+PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
+PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
+PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
+PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
+PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
+PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
+PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff
+};
+
+
+static PRUint32 GB2312_st [ 2] = {
+PCK4BITS(eError,eStart, 3,eError,eError,eError,eError,eError),//00-07
+PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f
+};
+
+static const PRUint32 GB2312CharLenTable[] = {0, 1, 2, 0};
+
+SMModel GB2312SMModel = {
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_cls },
+ 4,
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_st },
+ GB2312CharLenTable,
+ "GB2312",
+};
+*/
+
+// the following state machine data was created by perl script in
+// intl/chardet/tools. It should be the same as in PSM detector.
+static const PRUint32 GB18030_cls [ 256 / 8 ] = {
+PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
+PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
+PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
+PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
+PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
+PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
+PCK4BITS(3,3,3,3,3,3,3,3), // 30 - 37
+PCK4BITS(3,3,1,1,1,1,1,1), // 38 - 3f
+PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
+PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
+PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
+PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
+PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
+PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
+PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
+PCK4BITS(2,2,2,2,2,2,2,4), // 78 - 7f
+PCK4BITS(5,6,6,6,6,6,6,6), // 80 - 87
+PCK4BITS(6,6,6,6,6,6,6,6), // 88 - 8f
+PCK4BITS(6,6,6,6,6,6,6,6), // 90 - 97
+PCK4BITS(6,6,6,6,6,6,6,6), // 98 - 9f
+PCK4BITS(6,6,6,6,6,6,6,6), // a0 - a7
+PCK4BITS(6,6,6,6,6,6,6,6), // a8 - af
+PCK4BITS(6,6,6,6,6,6,6,6), // b0 - b7
+PCK4BITS(6,6,6,6,6,6,6,6), // b8 - bf
+PCK4BITS(6,6,6,6,6,6,6,6), // c0 - c7
+PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf
+PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7
+PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df
+PCK4BITS(6,6,6,6,6,6,6,6), // e0 - e7
+PCK4BITS(6,6,6,6,6,6,6,6), // e8 - ef
+PCK4BITS(6,6,6,6,6,6,6,6), // f0 - f7
+PCK4BITS(6,6,6,6,6,6,6,0) // f8 - ff
+};
+
+
+static const PRUint32 GB18030_st [ 6] = {
+PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart, 3,eError),//00-07
+PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f
+PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart),//10-17
+PCK4BITS( 4,eError,eStart,eStart,eError,eError,eError,eError),//18-1f
+PCK4BITS(eError,eError, 5,eError,eError,eError,eItsMe,eError),//20-27
+PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
+};
+
+// To be accurate, the length of class 6 can be either 2 or 4.
+// But it is not necessary to discriminate between the two since
+// it is used for frequency analysis only, and we are validing
+// each code range there as well. So it is safe to set it to be
+// 2 here.
+static const PRUint32 GB18030CharLenTable[] = {0, 1, 1, 1, 1, 1, 2};
+
+const SMModel GB18030SMModel(
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls ),
+ 7,
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st ),
+ GB18030CharLenTable,
+ "GB18030");
+
+// sjis
+
+static const PRUint32 SJIS_cls [ 256 / 8 ] = {
+//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
+PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
+PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
+PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
+PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
+PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
+PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
+PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
+PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
+PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
+PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
+PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
+PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
+PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
+PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
+PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
+PCK4BITS(2,2,2,2,2,2,2,1), // 78 - 7f
+PCK4BITS(3,3,3,3,3,3,3,3), // 80 - 87
+PCK4BITS(3,3,3,3,3,3,3,3), // 88 - 8f
+PCK4BITS(3,3,3,3,3,3,3,3), // 90 - 97
+PCK4BITS(3,3,3,3,3,3,3,3), // 98 - 9f
+//0xa0 is illegal in sjis encoding, but some pages does
+//contain such byte. We need to be more error forgiven.
+PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
+PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
+PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
+PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
+PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
+PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
+PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
+PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
+PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7
+PCK4BITS(3,3,3,3,3,4,4,4), // e8 - ef
+PCK4BITS(4,4,4,4,4,4,4,4), // f0 - f7
+PCK4BITS(4,4,4,4,4,0,0,0) // f8 - ff
+};
+
+
+static const PRUint32 SJIS_st [ 3] = {
+PCK4BITS(eError,eStart,eStart, 3,eError,eError,eError,eError),//00-07
+PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
+PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart) //10-17
+};
+
+static const PRUint32 SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
+
+const SMModel SJISSMModel(
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls),
+ 6,
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st),
+ SJISCharLenTable,
+ "Shift_JIS");
+
+
+static const PRUint32 UTF8_cls [ 256 / 8 ] = {
+//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
+PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as a legal value
+PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
+PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
+PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
+PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
+PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
+PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
+PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
+PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47
+PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f
+PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57
+PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f
+PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67
+PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f
+PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77
+PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f
+PCK4BITS(2,2,2,2,3,3,3,3), // 80 - 87
+PCK4BITS(4,4,4,4,4,4,4,4), // 88 - 8f
+PCK4BITS(4,4,4,4,4,4,4,4), // 90 - 97
+PCK4BITS(4,4,4,4,4,4,4,4), // 98 - 9f
+PCK4BITS(5,5,5,5,5,5,5,5), // a0 - a7
+PCK4BITS(5,5,5,5,5,5,5,5), // a8 - af
+PCK4BITS(5,5,5,5,5,5,5,5), // b0 - b7
+PCK4BITS(5,5,5,5,5,5,5,5), // b8 - bf
+PCK4BITS(0,0,6,6,6,6,6,6), // c0 - c7
+PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf
+PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7
+PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df
+PCK4BITS(7,8,8,8,8,8,8,8), // e0 - e7
+PCK4BITS(8,8,8,8,8,9,8,8), // e8 - ef
+PCK4BITS(10,11,11,11,11,11,11,11), // f0 - f7
+PCK4BITS(12,13,13,13,14,15,0,0) // f8 - ff
+};
+
+
+static const PRUint32 UTF8_st [ 26] = {
+PCK4BITS(eError,eStart,eError,eError,eError,eError, 12, 10),//00-07
+PCK4BITS( 9, 11, 8, 7, 6, 5, 4, 3),//08-0f
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//10-17
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//18-1f
+PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//20-27
+PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//28-2f
+PCK4BITS(eError,eError, 5, 5, 5, 5,eError,eError),//30-37
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//38-3f
+PCK4BITS(eError,eError,eError, 5, 5, 5,eError,eError),//40-47
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//48-4f
+PCK4BITS(eError,eError, 7, 7, 7, 7,eError,eError),//50-57
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//58-5f
+PCK4BITS(eError,eError,eError,eError, 7, 7,eError,eError),//60-67
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//68-6f
+PCK4BITS(eError,eError, 9, 9, 9, 9,eError,eError),//70-77
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//78-7f
+PCK4BITS(eError,eError,eError,eError,eError, 9,eError,eError),//80-87
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//88-8f
+PCK4BITS(eError,eError, 12, 12, 12, 12,eError,eError),//90-97
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//98-9f
+PCK4BITS(eError,eError,eError,eError,eError, 12,eError,eError),//a0-a7
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//a8-af
+PCK4BITS(eError,eError, 12, 12, 12,eError,eError,eError),//b0-b7
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//b8-bf
+PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eError,eError),//c0-c7
+PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError) //c8-cf
+};
+
+static const PRUint32 UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3,
+ 3, 3, 4, 4, 5, 5, 6, 6 };
+
+const SMModel UTF8SMModel(
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls),
+ 16,
+ nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st),
+ UTF8CharLenTable,
+ "UTF-8");
+
diff --git a/PowerEditor/src/uchardet/nsPkgInt.h b/PowerEditor/src/uchardet/nsPkgInt.h
index 3caa91220..abf186d65 100644
--- a/PowerEditor/src/uchardet/nsPkgInt.h
+++ b/PowerEditor/src/uchardet/nsPkgInt.h
@@ -60,19 +60,23 @@ typedef enum {
typedef enum {
eUnitMsk4bits = 0x0000000FL,
eUnitMsk8bits = 0x000000FFL,
- eUnitMsk16bits = 0x0000FFFFL
-} nsUnitMsk;
-
-typedef struct nsPkgInt {
- nsIdxSft idxsft;
- nsSftMsk sftmsk;
- nsBitSft bitsft;
- nsUnitMsk unitmsk;
- const PRUint32* const data;
-} nsPkgInt;
-
-
-#define PCK16BITS(a,b) ((PRUint32)(((b) << 16) | (a)))
+ eUnitMsk16bits = 0x0000FFFFL
+} nsUnitMsk;
+
+struct nsPkgInt {
+ nsIdxSft idxsft;
+ nsSftMsk sftmsk;
+ nsBitSft bitsft;
+ nsUnitMsk unitmsk;
+ const PRUint32* const data;
+ nsPkgInt(nsIdxSft a,nsSftMsk b, nsBitSft c,nsUnitMsk d,const PRUint32* const e)
+ :idxsft(a), sftmsk(b), bitsft(c), unitmsk(d), data(e){}
+ nsPkgInt();
+ nsPkgInt operator= (const nsPkgInt&);
+};
+
+
+#define PCK16BITS(a,b) ((PRUint32)(((b) << 16) | (a)))
#define PCK8BITS(a,b,c,d) PCK16BITS( ((PRUint32)(((b) << 8) | (a))), \
((PRUint32)(((d) << 8) | (c))))
diff --git a/PowerEditor/src/uchardet/nsSBCharSetProber.h b/PowerEditor/src/uchardet/nsSBCharSetProber.h
index d7180dcdf..7d199806c 100644
--- a/PowerEditor/src/uchardet/nsSBCharSetProber.h
+++ b/PowerEditor/src/uchardet/nsSBCharSetProber.h
@@ -46,29 +46,33 @@
#define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05
#define SYMBOL_CAT_ORDER 250
#define NUMBER_OF_SEQ_CAT 4
-#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1)
-#define NEGATIVE_CAT 0
-
-typedef struct
-{
- const unsigned char* const charToOrderMap; // [256] table use to find a char's order
- const PRUint8* const precedenceMatrix; // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency
- float mTypicalPositiveRatio; // = freqSeqs / totalSeqs
- PRBool keepEnglishLetter; // says if this script contains English characters (not implemented)
- const char* const charsetName;
-} SequenceModel;
-
-
-class nsSingleByteCharSetProber : public nsCharSetProber{
+#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1)
+#define NEGATIVE_CAT 0
+
+struct SequenceModel
+{
+ const unsigned char* const charToOrderMap; // [256] table use to find a char's order
+ const PRUint8* const precedenceMatrix; // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency
+ float mTypicalPositiveRatio; // = freqSeqs / totalSeqs
+ PRBool keepEnglishLetter; // says if this script contains English characters (not implemented)
+ const char* const charsetName;
+ SequenceModel(void);
+ SequenceModel(const unsigned char* const a, const PRUint8* const b,float c,PRBool d,const char* const e)
+ : charToOrderMap(a), precedenceMatrix(b), mTypicalPositiveRatio(c), keepEnglishLetter(d), charsetName(e){}
+ SequenceModel& operator=(const SequenceModel&);
+} ;
+
+
+class nsSingleByteCharSetProber : public nsCharSetProber{
public:
nsSingleByteCharSetProber(const SequenceModel *model)
- :mModel(model), mReversed(PR_FALSE), mNameProber(0) { Reset(); }
- nsSingleByteCharSetProber(const SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber)
- :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); }
-
- virtual const char* GetCharSetName();
- virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
- virtual nsProbingState GetState(void) {return mState;}
+ :mModel(model), mReversed(PR_FALSE), mNameProber(0) { Reset(); }
+ nsSingleByteCharSetProber(const SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber)
+ :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); }
+ nsSingleByteCharSetProber(): mModel(0), mReversed(0){};
+ virtual const char* GetCharSetName();
+ virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
+ virtual nsProbingState GetState(void) {return mState;}
virtual void Reset(void);
virtual float GetConfidence(void);
virtual void SetOpion() {}
@@ -77,12 +81,13 @@ public:
// contain this parameter as PR_FALSE. No one is looking at this
// parameter or calling this method.
// Moreover, the nsSBCSGroupProber which calls the HandleData of this
- // prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid
- // of the English letters.
- PRBool KeepEnglishLetters() {return mModel->keepEnglishLetter;} // (not implemented)
-
-#ifdef DEBUG_chardet
- virtual void DumpStatus();
+ // prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid
+ // of the English letters.
+ PRBool KeepEnglishLetters() {return mModel->keepEnglishLetter;} // (not implemented)
+ nsSingleByteCharSetProber operator=(const nsSingleByteCharSetProber&){};
+
+#ifdef DEBUG_chardet
+ virtual void DumpStatus();
#endif
protected:
diff --git a/PowerEditor/visual.net/notepadPlus.vcproj b/PowerEditor/visual.net/notepadPlus.vcproj
index 1cb202d83..943c9a886 100644
--- a/PowerEditor/visual.net/notepadPlus.vcproj
+++ b/PowerEditor/visual.net/notepadPlus.vcproj
@@ -358,7 +358,6 @@
@@ -380,7 +378,6 @@
@@ -402,7 +398,6 @@
@@ -424,7 +418,6 @@
@@ -446,7 +438,6 @@
@@ -468,7 +458,6 @@
@@ -490,7 +478,6 @@
@@ -556,7 +542,6 @@
@@ -598,7 +582,6 @@
@@ -620,7 +602,6 @@
@@ -642,7 +622,6 @@
@@ -664,7 +642,6 @@
@@ -686,7 +662,6 @@
@@ -708,7 +682,6 @@
@@ -730,7 +702,6 @@
@@ -752,7 +722,6 @@
@@ -774,7 +742,6 @@
@@ -796,7 +762,6 @@
@@ -818,7 +782,6 @@
@@ -840,7 +802,6 @@
@@ -862,7 +822,6 @@
@@ -884,7 +842,6 @@
@@ -906,7 +862,6 @@
@@ -1068,7 +1022,6 @@