KCodecs

nsMBCSGroupProber.cpp
1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "nsMBCSGroupProber.h"
8
9#include <stdio.h>
10#include <stdlib.h>
11
12namespace kencodingprober
13{
14#ifdef DEBUG_PROBE
15static const char *const ProberName[] = {
16 "Unicode",
17 "SJIS",
18 "EUCJP",
19 "GB18030",
20 "EUCKR",
21 "Big5",
22};
23
24#endif
25
26nsMBCSGroupProber::nsMBCSGroupProber()
27{
28 mProbers[0] = new UnicodeGroupProber();
29 mProbers[1] = new nsSJISProber();
30 mProbers[2] = new nsEUCJPProber();
31 mProbers[3] = new nsGB18030Prober();
32 mProbers[4] = new nsEUCKRProber();
33 mProbers[5] = new nsBig5Prober();
34 Reset();
35}
36
37nsMBCSGroupProber::~nsMBCSGroupProber()
38{
39 for (unsigned int i = 0; i < NUM_OF_PROBERS; i++) {
40 delete mProbers[i];
41 }
42}
43
44const char *nsMBCSGroupProber::GetCharSetName()
45{
46 if (mBestGuess == -1) {
47 GetConfidence();
48 if (mBestGuess == -1) {
49 mBestGuess = 0;
50 }
51 }
52 return mProbers[mBestGuess]->GetCharSetName();
53}
54
55void nsMBCSGroupProber::Reset(void)
56{
57 mActiveNum = 0;
58 for (unsigned int i = 0; i < NUM_OF_PROBERS; i++) {
59 if (mProbers[i]) {
60 mProbers[i]->Reset();
61 mIsActive[i] = true;
62 ++mActiveNum;
63 } else {
64 mIsActive[i] = false;
65 }
66 }
67 mBestGuess = -1;
68 mState = eDetecting;
69}
70
71nsProbingState nsMBCSGroupProber::HandleData(const char *aBuf, unsigned int aLen)
72{
73 nsProbingState st;
74 unsigned int i;
75
76 // do filtering to reduce load to probers
77 char *highbyteBuf;
78 char *hptr;
79 bool keepNext = true; // assume previous is not ascii, it will do no harm except add some noise
80 hptr = highbyteBuf = (char *)malloc(aLen);
81 if (!hptr) {
82 return mState;
83 }
84 for (i = 0; i < aLen; ++i) {
85 if (aBuf[i] & 0x80) {
86 *hptr++ = aBuf[i];
87 keepNext = true;
88 } else {
89 // if previous is highbyte, keep this even it is a ASCII
90 if (keepNext) {
91 *hptr++ = aBuf[i];
92 keepNext = false;
93 }
94 }
95 }
96
97 for (i = 0; i < NUM_OF_PROBERS; ++i) {
98 if (!mIsActive[i]) {
99 continue;
100 }
101 st = mProbers[i]->HandleData(highbyteBuf, hptr - highbyteBuf);
102 if (st == eFoundIt) {
103 mBestGuess = i;
104 mState = eFoundIt;
105 break;
106 } else if (st == eNotMe) {
107 mIsActive[i] = false;
108 mActiveNum--;
109 if (mActiveNum == 0) {
110 mState = eNotMe;
111 break;
112 }
113 }
114 }
115
116 free(highbyteBuf);
117
118 return mState;
119}
120
121float nsMBCSGroupProber::GetConfidence(void)
122{
123 unsigned int i;
124 float bestConf = 0.0;
125 float cf;
126
127 switch (mState) {
128 case eFoundIt:
129 return (float)0.99;
130 case eNotMe:
131 return (float)0.01;
132 default:
133 for (i = 0; i < NUM_OF_PROBERS; ++i) {
134 if (!mIsActive[i]) {
135 continue;
136 }
137 cf = mProbers[i]->GetConfidence();
138 if (bestConf < cf) {
139 bestConf = cf;
140 mBestGuess = i;
141 }
142 }
143 }
144 return bestConf;
145}
146
147#ifdef DEBUG_PROBE
148void nsMBCSGroupProber::DumpStatus()
149{
150 unsigned int i;
151 float cf;
152
153 GetConfidence();
154 for (i = 0; i < NUM_OF_PROBERS; i++) {
155 if (!mIsActive[i]) {
156 printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
157 } else {
158 cf = mProbers[i]->GetConfidence();
159 printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
160 }
161 }
162}
163#endif
164}
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 18 2024 12:18:52 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.