KCodecs

nsSBCharSetProber.h
1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#ifndef NSSBCHARSETPROBER_H
8#define NSSBCHARSETPROBER_H
9
10#include "nsCharSetProber.h"
11
12#define SAMPLE_SIZE 64
13#define SB_ENOUGH_REL_THRESHOLD 1024
14#define POSITIVE_SHORTCUT_THRESHOLD (float)0.95
15#define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05
16#define SYMBOL_CAT_ORDER 250
17#define NUMBER_OF_SEQ_CAT 4
18#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT - 1)
19#define NEGATIVE_CAT 0
20
21namespace kencodingprober
22{
23typedef struct {
24 const unsigned char *charToOrderMap; // [256] table use to find a char's order
25 const char *precedenceMatrix; // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency
26 float mTypicalPositiveRatio; // = freqSeqs / totalSeqs
27 bool keepEnglishLetter; // says if this script contains English characters (not implemented)
28 const char *charsetName;
29} SequenceModel;
30
31class KCODECS_NO_EXPORT nsSingleByteCharSetProber : public nsCharSetProber
32{
33public:
34 explicit nsSingleByteCharSetProber(const SequenceModel *model)
35 : mModel(model)
36 , mReversed(false)
37 , mNameProber(nullptr)
38 {
39 Reset();
40 }
41 nsSingleByteCharSetProber(const SequenceModel *model, bool reversed, nsCharSetProber *nameProber)
42 : mModel(model)
43 , mReversed(reversed)
44 , mNameProber(nameProber)
45 {
46 Reset();
47 }
48
49 const char *GetCharSetName() override;
50 nsProbingState HandleData(const char *aBuf, unsigned int aLen) override;
51 nsProbingState GetState(void) override
52 {
53 return mState;
54 }
55 void Reset(void) override;
56 float GetConfidence(void) override;
57 void SetOpion() override
58 {
59 }
60
61 // This feature is not implemented yet. any current language model
62 // contain this parameter as false. No one is looking at this
63 // parameter or calling this method.
64 // Moreover, the nsSBCSGroupProber which calls the HandleData of this
65 // prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid
66 // of the English letters.
67 bool KeepEnglishLetters()
68 {
69 return mModel->keepEnglishLetter;
70 } // (not implemented)
71
72#ifdef DEBUG_PROBE
73 void DumpStatus() override;
74#endif
75
76protected:
77 nsProbingState mState;
78 const SequenceModel *mModel;
79 const bool mReversed; // true if we need to reverse every pair in the model lookup
80
81 // char order of last character
82 unsigned char mLastOrder;
83
84 unsigned int mTotalSeqs;
85 unsigned int mSeqCounters[NUMBER_OF_SEQ_CAT];
86
87 unsigned int mTotalChar;
88 // characters that fall in our sampling range
89 unsigned int mFreqChar;
90
91 // Optional auxiliary prober for name decision. created and destroyed by the GroupProber
92 nsCharSetProber *mNameProber;
93};
94
95extern const SequenceModel Koi8rModel;
96extern const SequenceModel Win1251Model;
97extern const SequenceModel Latin5Model;
98extern const SequenceModel MacCyrillicModel;
99extern const SequenceModel Ibm866Model;
100extern const SequenceModel Ibm855Model;
101extern const SequenceModel Latin7Model;
102extern const SequenceModel Win1253Model;
103extern const SequenceModel Latin5BulgarianModel;
104extern const SequenceModel Win1251BulgarianModel;
105extern const SequenceModel Latin2HungarianModel;
106extern const SequenceModel Win1250HungarianModel;
107extern const SequenceModel Win1255Model;
108}
109#endif /* NSSBCHARSETPROBER_H */
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 18 2024 12:18:52 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.