9#include "kencodingprober.h"
11#include "probers/ChineseGroupProber.h"
12#include "probers/JapaneseGroupProber.h"
13#include "probers/UnicodeGroupProber.h"
14#include "probers/nsCharSetProber.h"
15#include "probers/nsMBCSGroupProber.h"
16#include "probers/nsSBCSGroupProber.h"
17#include "probers/nsUniversalDetector.h"
21class KEncodingProberPrivate
24 KEncodingProberPrivate()
29 ~KEncodingProberPrivate()
33 void setProberType(KEncodingProber::ProberType pType)
44 switch (mProberType) {
45 case KEncodingProber::None:
48 case KEncodingProber::Arabic:
49 case KEncodingProber::Baltic:
50 case KEncodingProber::CentralEuropean:
51 case KEncodingProber::Cyrillic:
52 case KEncodingProber::Greek:
53 case KEncodingProber::Hebrew:
54 case KEncodingProber::NorthernSaami:
55 case KEncodingProber::Other:
56 case KEncodingProber::SouthEasternEurope:
57 case KEncodingProber::Thai:
58 case KEncodingProber::Turkish:
59 case KEncodingProber::WesternEuropean:
60 mProber =
new kencodingprober::nsSBCSGroupProber();
62 case KEncodingProber::ChineseSimplified:
63 case KEncodingProber::ChineseTraditional:
64 mProber =
new kencodingprober::ChineseGroupProber();
66 case KEncodingProber::Japanese:
67 mProber =
new kencodingprober::JapaneseGroupProber();
69 case KEncodingProber::Korean:
70 mProber =
new kencodingprober::nsMBCSGroupProber();
72 case KEncodingProber::Unicode:
73 mProber =
new kencodingprober::UnicodeGroupProber();
75 case KEncodingProber::Universal:
76 mProber =
new kencodingprober::nsUniversalDetector();
82 void unicodeTest(
const char *aBuf,
int aLen)
89 if ((
'\xBB' == aBuf[1]) && (
'\xBF' == aBuf[2]))
96 if ((
'\xFF' == aBuf[1]) && (
'\x00' == aBuf[2]) && (
'\x00' == aBuf[3]))
100 }
else if (
'\xFF' == aBuf[1])
107 if ((
'\x00' == aBuf[1]) && (
'\xFE' == aBuf[2]) && (
'\xFF' == aBuf[3]))
111 }
else if ((
'\x00' == aBuf[1]) && (
'\xFF' == aBuf[2]) && (
'\xFE' == aBuf[3]))
118 if ((
'\xFE' == aBuf[1]) && (
'\x00' == aBuf[2]) && (
'\x00' == aBuf[3]))
122 }
else if (
'\xFE' == aBuf[1])
132 KEncodingProber::ProberType mProberType;
134 kencodingprober::nsCharSetProber *mProber;
139 : d(new KEncodingProberPrivate())
144KEncodingProber::~KEncodingProber() =
default;
155 return d->mProberState;
157 if (d->mProberState ==
Probing) {
160 if (d->mProberState ==
FoundIt) {
161 return d->mProberState;
165 switch (d->mProber->GetState()) {
166 case kencodingprober::eNotMe:
167 d->mProberState =
NotMe;
169 case kencodingprober::eFoundIt:
178 d->mProber->DumpStatus();
180 return d->mProberState;
185 return d->mProberState;
194 return QByteArray(d->mProber->GetCharSetName());
203 return d->mProber->GetConfidence();
206KEncodingProber::ProberType KEncodingProber::proberType()
const
208 return d->mProberType;
213 d->setProberType(proberType);
220 return KEncodingProber::Universal;
221 }
else if (lang == tr(
"Disabled",
"@item Text character set")) {
222 return KEncodingProber::None;
223 }
else if (lang == tr(
"Universal",
"@item Text character set")) {
224 return KEncodingProber::Universal;
225 }
else if (lang == tr(
"Unicode",
"@item Text character set")) {
226 return KEncodingProber::Unicode;
227 }
else if (lang == tr(
"Cyrillic",
"@item Text character set")) {
228 return KEncodingProber::Cyrillic;
229 }
else if (lang == tr(
"Western European",
"@item Text character set")) {
230 return KEncodingProber::WesternEuropean;
231 }
else if (lang == tr(
"Central European",
"@item Text character set")) {
232 return KEncodingProber::CentralEuropean;
233 }
else if (lang == tr(
"Greek",
"@item Text character set")) {
234 return KEncodingProber::Greek;
235 }
else if (lang == tr(
"Hebrew",
"@item Text character set")) {
236 return KEncodingProber::Hebrew;
237 }
else if (lang == tr(
"Turkish",
"@item Text character set")) {
238 return KEncodingProber::Turkish;
239 }
else if (lang == tr(
"Japanese",
"@item Text character set")) {
240 return KEncodingProber::Japanese;
241 }
else if (lang == tr(
"Baltic",
"@item Text character set")) {
242 return KEncodingProber::Baltic;
243 }
else if (lang == tr(
"Chinese Traditional",
"@item Text character set")) {
244 return KEncodingProber::ChineseTraditional;
245 }
else if (lang == tr(
"Chinese Simplified",
"@item Text character set")) {
246 return KEncodingProber::ChineseSimplified;
247 }
else if (lang == tr(
"Korean",
"@item Text character set")) {
248 return KEncodingProber::Korean;
249 }
else if (lang == tr(
"Thai",
"@item Text character set")) {
250 return KEncodingProber::Thai;
251 }
else if (lang == tr(
"Arabic",
"@item Text character set")) {
252 return KEncodingProber::Arabic;
255 return KEncodingProber::Universal;
260 switch (proberType) {
261 case KEncodingProber::None:
262 return tr(
"Disabled",
"@item Text character set");
264 case KEncodingProber::Universal:
265 return tr(
"Universal",
"@item Text character set");
267 case KEncodingProber::Arabic:
268 return tr(
"Arabic",
"@item Text character set");
270 case KEncodingProber::Baltic:
271 return tr(
"Baltic",
"@item Text character set");
273 case KEncodingProber::CentralEuropean:
274 return tr(
"Central European",
"@item Text character set");
276 case KEncodingProber::Cyrillic:
277 return tr(
"Cyrillic",
"@item Text character set");
279 case KEncodingProber::Greek:
280 return tr(
"Greek",
"@item Text character set");
282 case KEncodingProber::Hebrew:
283 return tr(
"Hebrew",
"@item Text character set");
285 case KEncodingProber::Japanese:
286 return tr(
"Japanese",
"@item Text character set");
288 case KEncodingProber::Turkish:
289 return tr(
"Turkish",
"@item Text character set");
291 case KEncodingProber::WesternEuropean:
292 return tr(
"Western European",
"@item Text character set");
294 case KEncodingProber::ChineseTraditional:
295 return tr(
"Chinese Traditional",
"@item Text character set");
297 case KEncodingProber::ChineseSimplified:
298 return tr(
"Chinese Simplified",
"@item Text character set");
300 case KEncodingProber::Korean:
301 return tr(
"Korean",
"@item Text character set");
303 case KEncodingProber::Thai:
304 return tr(
"Thai",
"@item Text character set");
306 case KEncodingProber::Unicode:
307 return tr(
"Unicode",
"@item Text character set");
void reset()
reset the prober's internal state and data.
KEncodingProber(ProberType proberType=Universal)
Default ProberType is Universal(detect all possible encodings)
static QString nameForProberType(ProberType proberType)
map ProberType to language string
ProberState state() const
void setProberType(ProberType proberType)
change current prober's ProberType and reset the prober
static ProberType proberTypeForName(const QString &lang)
ProberState feed(QByteArrayView data)
The main class method.
QByteArray encoding() const
@ Probing
Need more data to make a decision.
@ NotMe
Sure not included in current ProberType's all supported encodings
@ FoundIt
Sure find the encoding.
const_pointer constData() const const
qsizetype size() const const
bool isEmpty() const const