7#include "nsLatin1Prober.h"
21namespace kencodingprober
23static const unsigned char Latin1_CharToClass[] = {
24 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
25 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
26 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
27 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
28 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
29 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
30 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
31 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
32 OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
33 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
34 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
35 ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,
36 OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,
37 ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,
38 ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,
39 ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,
40 OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,
41 OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,
42 UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
43 OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,
44 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
45 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
46 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
47 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,
48 ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,
49 ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,
50 ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,
51 ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,
52 ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,
53 ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,
54 ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,
55 ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,
63static const unsigned char Latin1ClassModel[] = {
65 0, 0, 0, 0, 0, 0, 0, 0,
66 0, 3, 3, 3, 3, 3, 3, 3,
67 0, 3, 3, 3, 3, 3, 3, 3,
68 0, 3, 3, 3, 1, 1, 3, 3,
69 0, 3, 3, 3, 1, 2, 1, 2,
70 0, 3, 3, 3, 3, 3, 3, 3,
71 0, 3, 1, 3, 1, 1, 1, 3,
72 0, 3, 1, 3, 1, 1, 3, 3,
75void nsLatin1Prober::Reset(
void)
79 for (
int i = 0; i < FREQ_CAT_NUM; i++) {
84nsProbingState nsLatin1Prober::HandleData(
const char *aBuf,
unsigned int aLen)
86 char *newBuf1 =
nullptr;
87 unsigned int newLen1 = 0;
89 if (!FilterWithEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) {
90 newBuf1 = (
char *)aBuf;
94 for (
unsigned int i = 0; i < newLen1; i++) {
95 const unsigned char charClass = Latin1_CharToClass[(
unsigned char)newBuf1[i]];
96 const unsigned char freq = Latin1ClassModel[mLastCharClass * CLASS_NUM + charClass];
101 mFreqCounter[freq]++;
102 mLastCharClass = charClass;
105 if (newBuf1 != aBuf) {
112float nsLatin1Prober::GetConfidence(
void)
114 if (mState == eNotMe) {
119 unsigned int total = 0;
120 for (
int i = 0; i < FREQ_CAT_NUM; i++) {
121 total += mFreqCounter[i];
127 confidence = mFreqCounter[3] * 1.0f / total;
128 confidence -= mFreqCounter[1] * 20.0f / total;
131 if (confidence < 0.0f) {
143void nsLatin1Prober::DumpStatus()
145 printf(
" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());