KCodecs

JapaneseGroupProber.cpp
1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "JapaneseGroupProber.h"
8
9#include <stdio.h>
10#include <stdlib.h>
11
12namespace kencodingprober
13{
14#ifdef DEBUG_PROBE
15static const char *const ProberName[] = {
16 "Unicode",
17 "GB18030",
18 "Big5",
19};
20
21#endif
22
23JapaneseGroupProber::JapaneseGroupProber()
24{
25 mProbers[0] = new UnicodeGroupProber();
26 mProbers[1] = new nsSJISProber();
27 mProbers[2] = new nsEUCJPProber();
28 Reset();
29}
30
31JapaneseGroupProber::~JapaneseGroupProber()
32{
33 for (unsigned int i = 0; i < JP_NUM_OF_PROBERS; i++) {
34 delete mProbers[i];
35 }
36}
37
38const char *JapaneseGroupProber::GetCharSetName()
39{
40 if (mBestGuess == -1) {
41 GetConfidence();
42 if (mBestGuess == -1) {
43 mBestGuess = 1; // assume it's GB18030
44 }
45 }
46 return mProbers[mBestGuess]->GetCharSetName();
47}
48
49void JapaneseGroupProber::Reset(void)
50{
51 mActiveNum = 0;
52 for (unsigned int i = 0; i < JP_NUM_OF_PROBERS; i++) {
53 if (mProbers[i]) {
54 mProbers[i]->Reset();
55 mIsActive[i] = true;
56 ++mActiveNum;
57 } else {
58 mIsActive[i] = false;
59 }
60 }
61 mBestGuess = -1;
62 mState = eDetecting;
63}
64
65nsProbingState JapaneseGroupProber::HandleData(const char *aBuf, unsigned int aLen)
66{
67 nsProbingState st;
68 unsigned int i;
69
70 // do filtering to reduce load to probers
71 char *highbyteBuf;
72 char *hptr;
73 bool keepNext = true; // assume previous is not ascii, it will do no harm except add some noise
74 hptr = highbyteBuf = (char *)malloc(aLen);
75 if (!hptr) {
76 return mState;
77 }
78 for (i = 0; i < aLen; ++i) {
79 if (aBuf[i] & 0x80) {
80 *hptr++ = aBuf[i];
81 keepNext = true;
82 } else {
83 // if previous is highbyte, keep this even it is a ASCII
84 if (keepNext) {
85 *hptr++ = aBuf[i];
86 keepNext = false;
87 }
88 }
89 }
90
91 for (i = 0; i < JP_NUM_OF_PROBERS; ++i) {
92 if (!mIsActive[i]) {
93 continue;
94 }
95 st = mProbers[i]->HandleData(highbyteBuf, hptr - highbyteBuf);
96 if (st == eFoundIt) {
97 mBestGuess = i;
98 mState = eFoundIt;
99 break;
100 } else if (st == eNotMe) {
101 mIsActive[i] = false;
102 --mActiveNum;
103 if (mActiveNum == 0) {
104 mState = eNotMe;
105 break;
106 }
107 }
108 }
109
110 free(highbyteBuf);
111
112 return mState;
113}
114
115float JapaneseGroupProber::GetConfidence(void)
116{
117 unsigned int i;
118 float bestConf = 0.0;
119 float cf;
120
121 switch (mState) {
122 case eFoundIt:
123 return (float)0.99;
124 case eNotMe:
125 return (float)0.01;
126 default:
127 for (i = 0; i < JP_NUM_OF_PROBERS; ++i) {
128 if (!mIsActive[i]) {
129 continue;
130 }
131 cf = mProbers[i]->GetConfidence();
132 if (bestConf < cf) {
133 bestConf = cf;
134 mBestGuess = i;
135 }
136 }
137 }
138 return bestConf;
139}
140
141#ifdef DEBUG_PROBE
142void JapaneseGroupProber::DumpStatus()
143{
144 unsigned int i;
145 float cf;
146
147 GetConfidence();
148 for (i = 0; i < JP_NUM_OF_PROBERS; i++) {
149 if (!mIsActive[i]) {
150 printf(" Chinese group inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
151 } else {
152 cf = mProbers[i]->GetConfidence();
153 printf(" Chinese group %1.3f: [%s]\r\n", cf, ProberName[i]);
154 }
155 }
156}
157#endif
158}
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 18 2024 12:18:52 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.