8#include "nsUniversalDetector.h"
10#include "nsEscCharsetProber.h"
11#include "nsLatin1Prober.h"
12#include "nsMBCSGroupProber.h"
13#include "nsSBCSGroupProber.h"
15namespace kencodingprober
17nsUniversalDetector::nsUniversalDetector()
22 mEscCharSetProber =
nullptr;
25 mDetectedCharset =
nullptr;
27 mInputState = ePureAscii;
31 for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++) {
32 mCharSetProbers[i] =
nullptr;
36nsUniversalDetector::~nsUniversalDetector()
38 for (
int i = 0; i < NUM_OF_CHARSET_PROBERS; i++) {
39 delete mCharSetProbers[i];
41 delete mEscCharSetProber;
44void nsUniversalDetector::Reset()
51 mDetectedCharset =
nullptr;
53 mInputState = ePureAscii;
56 if (mEscCharSetProber) {
57 mEscCharSetProber->Reset();
61 for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++) {
62 if (mCharSetProbers[i]) {
63 mCharSetProbers[i]->Reset();
69#define SHORTCUT_THRESHOLD (float)0.95
70#define MINIMUM_THRESHOLD (float)0.20
72nsProbingState nsUniversalDetector::HandleData(
const char *aBuf,
unsigned int aLen)
83 for (i = 0; i < aLen; i++) {
85 if (aBuf[i] &
'\x80' && aBuf[i] !=
'\xA0') {
87 if (mInputState != eHighbyte) {
89 mInputState = eHighbyte;
92 delete mEscCharSetProber;
93 mEscCharSetProber =
nullptr;
96 if (
nullptr == mCharSetProbers[0]) {
97 mCharSetProbers[0] =
new nsMBCSGroupProber;
99 if (
nullptr == mCharSetProbers[1]) {
100 mCharSetProbers[1] =
new nsSBCSGroupProber;
102 if (
nullptr == mCharSetProbers[2]) {
103 mCharSetProbers[2] =
new nsLatin1Prober;
108 if (ePureAscii == mInputState && (aBuf[i] ==
'\033' || (aBuf[i] ==
'{' && mLastChar ==
'~'))) {
110 mInputState = eEscAscii;
117 nsProbingState st = eDetecting;
118 switch (mInputState) {
120 if (
nullptr == mEscCharSetProber) {
121 mEscCharSetProber =
new nsEscCharSetProber;
123 st = mEscCharSetProber->HandleData(aBuf, aLen);
124 if (st == eFoundIt) {
126 mDetectedCharset = mEscCharSetProber->GetCharSetName();
130 for (i = 0; i < NUM_OF_CHARSET_PROBERS; ++i) {
131 st = mCharSetProbers[i]->HandleData(aBuf, aLen);
132 if (st == eFoundIt) {
134 mDetectedCharset = mCharSetProbers[i]->GetCharSetName();
140 mDetectedCharset =
"UTF-8";
146const char *nsUniversalDetector::GetCharSetName()
148 if (mDetectedCharset) {
149 return mDetectedCharset;
151 switch (mInputState) {
153 float proberConfidence;
154 float maxProberConfidence = (float)0.0;
157 for (
int i = 0; i < NUM_OF_CHARSET_PROBERS; i++) {
158 proberConfidence = mCharSetProbers[i]->GetConfidence();
159 if (proberConfidence > maxProberConfidence) {
160 maxProberConfidence = proberConfidence;
165 if (maxProberConfidence > MINIMUM_THRESHOLD) {
166 return mCharSetProbers[maxProber]->GetCharSetName();
178float nsUniversalDetector::GetConfidence()
183 return MINIMUM_THRESHOLD;
185 if (mDetectedCharset) {
188 switch (mInputState) {
190 float proberConfidence;
191 float maxProberConfidence = (float)0.0;
194 for (
int i = 0; i < NUM_OF_CHARSET_PROBERS; i++) {
195 proberConfidence = mCharSetProbers[i]->GetConfidence();
196 if (proberConfidence > maxProberConfidence) {
197 maxProberConfidence = proberConfidence;
202 if (maxProberConfidence > MINIMUM_THRESHOLD) {
203 return mCharSetProbers[maxProber]->GetConfidence();
211 return MINIMUM_THRESHOLD;
214nsProbingState nsUniversalDetector::GetState()