KCodecs

nsGB2312Prober.cpp
1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7// for S-JIS encoding, observe characteristic:
8// 1, kana character (or hankaku?) often have high frequency of appearance
9// 2, kana character often exist in group
10// 3, certain combination of kana is never used in japanese language
11
12#include "nsGB2312Prober.h"
13
14namespace kencodingprober
15{
16void nsGB18030Prober::Reset(void)
17{
18 mCodingSM->Reset();
19 mState = eDetecting;
20 mDistributionAnalyser.Reset();
21 // mContextAnalyser.Reset();
22}
23
24nsProbingState nsGB18030Prober::HandleData(const char *aBuf, unsigned int aLen)
25{
26 if (aLen == 0) {
27 return mState;
28 }
29
30 for (unsigned int i = 0; i < aLen; i++) {
31 const nsSMState codingState = mCodingSM->NextState(aBuf[i]);
32 if (codingState == eError) {
33 mState = eNotMe;
34 break;
35 }
36 if (codingState == eItsMe) {
37 mState = eFoundIt;
38 break;
39 }
40 if (codingState == eStart) {
41 unsigned int charLen = mCodingSM->GetCurrentCharLen();
42
43 if (i == 0) {
44 mLastChar[1] = aBuf[0];
45 mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
46 } else {
47 mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
48 }
49 }
50 }
51
52 mLastChar[0] = aBuf[aLen - 1];
53
54 if (mState == eDetecting) {
55 if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) {
56 mState = eFoundIt;
57 }
58 }
59 // else
60 // mDistributionAnalyser.HandleData(aBuf, aLen);
61
62 return mState;
63}
64
65float nsGB18030Prober::GetConfidence(void)
66{
67 float distribCf = mDistributionAnalyser.GetConfidence();
68
69 return (float)distribCf;
70}
71}
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 18 2024 12:18:52 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.