8#include "kcharselectdata_p.h"
10#include <QCoreApplication>
12#include <QFutureInterface>
13#include <QRegularExpression>
19#include <../test-config.h>
20#include <qstandardpaths.h>
31#define NCount (VCount * TCount)
32#define SCount (LCount * NCount)
34class RunIndexCreation :
public QFutureInterface<Index>,
public QRunnable
37 RunIndexCreation(KCharSelectData *data,
const QByteArray &dataFile)
39 , m_dataFile(dataFile)
54 Index index = m_data->createIndex(m_dataFile);
56 reportFinished(
nullptr);
60 KCharSelectData *
const m_data;
65static const char JAMO_L_TABLE[][4] = {
66 "G",
"GG",
"N",
"D",
"DD",
"R",
"M",
"B",
"BB",
67 "S",
"SS",
"",
"J",
"JJ",
"C",
"K",
"T",
"P",
"H"
70static const char JAMO_V_TABLE[][4] = {
71 "A",
"AE",
"YA",
"YAE",
"EO",
"E",
"YEO",
"YE",
"O",
72 "WA",
"WAE",
"OE",
"YO",
"U",
"WEO",
"WE",
"WI",
76static const char JAMO_T_TABLE[][4] = {
77 "",
"G",
"GG",
"GS",
"N",
"NJ",
"NH",
"D",
"L",
"LG",
"LM",
78 "LB",
"LS",
"LT",
"LP",
"LH",
"M",
"B",
"BS",
79 "S",
"SS",
"NG",
"J",
"C",
"K",
"T",
"P",
"H"
83bool KCharSelectData::openDataFile()
85 if (!dataFile.isEmpty()) {
88 QFile file(QStringLiteral(
":/kf6/kcharselect/kcharselect-data"));
90 dataFile = file.readAll();
92 if (dataFile.size() < 40) {
96 const uchar *data =
reinterpret_cast<const uchar *
>(dataFile.constData());
97 const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 20);
98 const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 24);
99 uint blocks = (offsetEnd - offsetBegin) / 4;
103 }
else if (blocks >= 174 && blocks <= 180) {
111 futureIndex = (
new RunIndexCreation(
this, dataFile))->
start();
119quint16 KCharSelectData::mapCodePointToDataBase(uint code)
const
121 if (remapType == 0) {
122 if (code >= 0xE000 && code <= 0xEFFF) {
125 if (code >= 0xF000 && code <= 0xFFFF) {
126 return code - 0x1000;
128 if (code >= 0x1F000 && code <= 0x1FFFF) {
129 return code - 0x10000;
132 if (code >= 0x10000) {
138uint KCharSelectData::mapDataBaseToCodePoint(quint16 code)
const
140 if (remapType == 0) {
141 if (code >= 0xE000 && code <= 0xEFFF) {
142 return code + 0x1000;
144 if (code >= 0xF000) {
145 return code + 0x10000;
151quint32 KCharSelectData::getDetailIndex(uint c)
const
153 const uchar *data =
reinterpret_cast<const uchar *
>(dataFile.constData());
156 const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 12);
157 const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 16);
161 int max = ((offsetEnd - offsetBegin) / 27) - 1;
163 quint16 unicode = mapCodePointToDataBase(c);
164 if (unicode == 0xFFFF) {
168 static quint16 most_recent_searched;
169 static quint32 most_recent_result;
171 if (unicode == most_recent_searched) {
172 return most_recent_result;
175 most_recent_searched = unicode;
178 mid = (min + max) / 2;
179 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid * 27);
180 if (unicode > midUnicode) {
182 }
else if (unicode < midUnicode) {
185 most_recent_result = offsetBegin + mid * 27;
187 return most_recent_result;
191 most_recent_result = 0;
195QString KCharSelectData::formatCode(uint code,
int length,
const QString &prefix,
int base)
198 while (s.
size() < length) {
205QList<uint> KCharSelectData::blockContents(
int block)
207 if (!openDataFile()) {
211 const uchar *data =
reinterpret_cast<const uchar *
>(dataFile.constData());
212 const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 20);
213 const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 24);
215 int max = ((offsetEnd - offsetBegin) / 4) - 1;
223 quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block * 4);
224 quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block * 4 + 2);
226 while (unicodeBegin < unicodeEnd) {
227 res.
append(mapDataBaseToCodePoint(unicodeBegin));
230 res.
append(mapDataBaseToCodePoint(unicodeBegin));
235QList<int> KCharSelectData::sectionContents(
int section)
238 if (!openDataFile()) {
242 const uchar *data =
reinterpret_cast<const uchar *
>(dataFile.constData());
243 const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 28);
244 const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 32);
246 int max = ((offsetEnd - offsetBegin) / 4) - 1;
254 for (
int i = 0; i <= max; i++) {
255 const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i * 4);
256 if (currSection == section || section < 0) {
257 res.
append(qFromLittleEndian<quint16>(data + offsetBegin + i * 4 + 2));
266 if (!openDataFile()) {
270 const uchar *udata =
reinterpret_cast<const uchar *
>(dataFile.constData());
271 const quint32 stringBegin = qFromLittleEndian<quint32>(udata + 24);
272 const quint32 stringEnd = qFromLittleEndian<quint32>(udata + 28);
274 const char *data = dataFile.constData();
276 quint32 i = stringBegin;
278 while (i < stringEnd) {
280 i += qstrlen(data + i) + 1;
286QString KCharSelectData::block(uint c)
288 return blockName(blockIndex(c));
291QString KCharSelectData::section(uint c)
293 return sectionName(sectionIndex(blockIndex(c)));
296QString KCharSelectData::name(uint c)
298 if (!openDataFile()) {
302 if ((c & 0xFFFE) == 0xFFFE || (c >= 0xFDD0 && c <= 0xFDEF)) {
304 }
else if ((c >= 0x3400 && c <= 0x4DBF) || (c >= 0x4E00 && c <= 0x9FFF) || (c >= 0x20000 && c <= 0x2F7FF)) {
306 }
else if (c >= 0xAC00 && c <= 0xD7AF) {
308 int SIndex = c - SBase;
313 if (SIndex < 0 || SIndex >= SCount) {
317 LIndex = SIndex / NCount;
318 VIndex = (SIndex % NCount) / TCount;
319 TIndex = SIndex % TCount;
323 }
else if (c >= 0xD800 && c <= 0xDB7F) {
325 }
else if (c >= 0xDB80 && c <= 0xDBFF) {
327 }
else if (c >= 0xDC00 && c <= 0xDFFF) {
329 }
else if ((c >= 0xE000 && c <= 0xF8FF) || c >= 0xF0000) {
331 }
else if ((c >= 0xF900 && c <= 0xFAFF) || (c >= 0x2F800 && c <= 0x2FFFF)) {
334 quint16 unicode = mapCodePointToDataBase(c);
335 if (unicode == 0xFFFF) {
338 const uchar *data =
reinterpret_cast<const uchar *
>(dataFile.constData());
339 const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 4);
340 const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 8);
344 int max = ((offsetEnd - offsetBegin) / 6) - 1;
348 mid = (min + max) / 2;
349 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid * 6);
350 if (unicode > midUnicode) {
352 }
else if (unicode < midUnicode) {
355 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid * 6 + 2);
369int KCharSelectData::blockIndex(uint c)
371 if (!openDataFile()) {
375 const uchar *data =
reinterpret_cast<const uchar *
>(dataFile.constData());
376 const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 20);
377 const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 24);
378 const quint16 unicode = mapCodePointToDataBase(c);
379 if (unicode == 0xFFFF) {
383 int max = ((offsetEnd - offsetBegin) / 4) - 1;
387 while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i * 4 + 2) && i < max) {
394int KCharSelectData::sectionIndex(
int block)
396 if (!openDataFile()) {
400 const uchar *data =
reinterpret_cast<const uchar *
>(dataFile.constData());
401 const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 28);
402 const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 32);
404 int max = ((offsetEnd - offsetBegin) / 4) - 1;
406 for (
int i = 0; i <= max; i++) {
407 if (qFromLittleEndian<quint16>(data + offsetBegin + i * 4 + 2) == block) {
408 return qFromLittleEndian<quint16>(data + offsetBegin + i * 4) + 1;
415QString KCharSelectData::blockName(
int index)
417 if (!openDataFile()) {
421 const uchar *udata =
reinterpret_cast<const uchar *
>(dataFile.constData());
422 const quint32 stringBegin = qFromLittleEndian<quint32>(udata + 16);
423 const quint32 stringEnd = qFromLittleEndian<quint32>(udata + 20);
425 quint32 i = stringBegin;
428 const char *data = dataFile.constData();
429 while (i < stringEnd && currIndex < index) {
430 i += qstrlen(data + i) + 1;
437QString KCharSelectData::sectionName(
int index)
442 if (!openDataFile()) {
448 const uchar *udata =
reinterpret_cast<const uchar *
>(dataFile.constData());
449 const quint32 stringBegin = qFromLittleEndian<quint32>(udata + 24);
450 const quint32 stringEnd = qFromLittleEndian<quint32>(udata + 28);
452 quint32 i = stringBegin;
455 const char *data = dataFile.constData();
456 while (i < stringEnd && currIndex < index) {
457 i += qstrlen(data + i) + 1;
466 if (!openDataFile()) {
469 const uchar *udata =
reinterpret_cast<const uchar *
>(dataFile.constData());
470 const int detailIndex = getDetailIndex(c);
471 if (detailIndex == 0) {
475 const quint8 count = *(quint8 *)(udata + detailIndex + 6);
476 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2);
481 const char *data = dataFile.constData();
482 for (
int i = 0; i < count; i++) {
484 offset += qstrlen(data + offset) + 1;
491 if (!openDataFile()) {
494 const int detailIndex = getDetailIndex(c);
495 if (detailIndex == 0) {
499 const uchar *udata =
reinterpret_cast<const uchar *
>(dataFile.constData());
500 const quint8 count = *(quint8 *)(udata + detailIndex + 11);
501 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7);
506 const char *data = dataFile.constData();
507 for (
int i = 0; i < count; i++) {
509 offset += qstrlen(data + offset) + 1;
517 if (!openDataFile()) {
520 const int detailIndex = getDetailIndex(c);
521 if (detailIndex == 0) {
525 const uchar *udata =
reinterpret_cast<const uchar *
>(dataFile.constData());
526 const quint8 count = *(quint8 *)(udata + detailIndex + 26);
527 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22);
532 for (
int i = 0; i < count; i++) {
533 seeAlso.
append(mapDataBaseToCodePoint(qFromLittleEndian<quint16>(udata + offset)));
542 if (!openDataFile()) {
545 const int detailIndex = getDetailIndex(c);
546 if (detailIndex == 0) {
550 const uchar *udata =
reinterpret_cast<const uchar *
>(dataFile.constData());
551 const quint8 count = *(quint8 *)(udata + detailIndex + 21);
552 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17);
557 const char *data = dataFile.constData();
558 for (
int i = 0; i < count; i++) {
560 offset += qstrlen(data + offset) + 1;
566QStringList KCharSelectData::approximateEquivalents(uint c)
568 if (!openDataFile()) {
571 const int detailIndex = getDetailIndex(c);
572 if (detailIndex == 0) {
576 const uchar *udata =
reinterpret_cast<const uchar *
>(dataFile.constData());
577 const quint8 count = *(quint8 *)(udata + detailIndex + 16);
578 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12);
581 approxEquivalents.
reserve(count);
583 const char *data = dataFile.constData();
584 for (
int i = 0; i < count; i++) {
586 offset += qstrlen(data + offset) + 1;
589 return approxEquivalents;
595 uint SIndex = c - SBase;
596 if (SIndex >= SCount) {
600 uint L = LBase + SIndex / NCount;
601 uint V = VBase + (SIndex % NCount) / TCount;
602 uint T = TBase + SIndex % TCount;
614 if (!openDataFile()) {
618 quint16 unicode = mapCodePointToDataBase(c);
619 if (unicode == 0xFFFF) {
623 const char *data = dataFile.constData();
624 const uchar *udata =
reinterpret_cast<const uchar *
>(data);
625 const quint32 offsetBegin = qFromLittleEndian<quint32>(udata + 36);
626 const quint32 offsetEnd = dataFile.size();
630 int max = ((offsetEnd - offsetBegin) / 30) - 1;
633 mid = (min + max) / 2;
634 const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid * 30);
635 if (unicode > midUnicode) {
637 }
else if (unicode < midUnicode) {
642 for (
int i = 0; i < 7; i++) {
643 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid * 30 + 2 + i * 4);
659 if (!openDataFile()) {
663 ushort unicode = mapCodePointToDataBase(c);
664 if (unicode == 0xFFFF) {
668 const uchar *data =
reinterpret_cast<const uchar *
>(dataFile.constData());
669 const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 4);
670 const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 8);
674 int max = ((offsetEnd - offsetBegin) / 6) - 1;
677 mid = (min + max) / 2;
678 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid * 6);
679 if (unicode > midUnicode) {
681 }
else if (unicode < midUnicode) {
684 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid * 6 + 2);
685 uchar categoryCode = *(data + offset);
686 Q_ASSERT(categoryCode > 0);
696bool KCharSelectData::isPrint(uint c)
702bool KCharSelectData::isDisplayable(uint c)
707 if (c == 0xFDD0 || c == 0xFDD1) {
711 return !isIgnorable(c) && isPrint(c);
714bool KCharSelectData::isIgnorable(uint c)
732 return c == 0x00AD || c == 0x034F || c == 0x115F || c == 0x1160 ||
733 c == 0x17B4 || c == 0x17B5 || (c >= 0x180B && c <= 0x180D) ||
734 (c >= 0x200B && c <= 0x200F) || (c >= 0x202A && c <= 0x202E) ||
735 (c >= 0x2060 && c <= 0x206F) || c == 0x3164 ||
736 (c >= 0xFE00 && c <= 0xFE0F) || c == 0xFEFF || c == 0xFFA0 ||
737 (c >= 0xFFF0 && c <= 0xFFF8);
741bool KCharSelectData::isCombining(uint c)
749QString KCharSelectData::display(uint c,
const QFont &font)
751 if (!isDisplayable(c)) {
755 if (isCombining(c)) {
756 s += displayCombining(c);
765QString KCharSelectData::displayCombining(uint c)
858 if (octalExp.match(simplified).hasMatch()) {
862 for (
int i = 0; i <= simplified.
length(); ++i) {
864 if (c >=
'0' && c <=
'7') {
865 byte = 8 *
byte + c -
'0';
866 }
else if (
byte == -1) {
868 }
else if (
byte >= 0x00 &&
byte <= 0xFF) {
876 if (simplified.
length() <= 2) {
878 if (ucs4.
size() == 1) {
892 static const QRegularExpression hexExp(QStringLiteral(
"^(?:|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4,5})$"));
893 for (
const QString &s : std::as_const(searchStrings)) {
895 if (
match.hasMatch()) {
900 searchStrings[searchStrings.
indexOf(s)] = cap;
905 int unicode = s.
toInt(&ok);
907 returnRes.
append(unicode);
911 bool firstSubString =
true;
912 for (
const QString &s : std::as_const(searchStrings)) {
914 if (firstSubString) {
916 firstSubString =
false;
924 for (uint c : std::as_const(returnRes)) {
930 for (
auto c : std::as_const(result)) {
933 std::sort(sortedResult.
begin(), sortedResult.
end());
935 returnRes += sortedResult;
941 if (dataFile.isEmpty()) {
944 futureIndex.waitForFinished();
945 const Index index = futureIndex.result();
946 Index::const_iterator pos = index.lowerBound(s);
949 while (pos != index.constEnd() && pos.key().startsWith(s)) {
950 for (quint16 c : pos.value()) {
951 result.
insert(mapDataBaseToCodePoint(c));
965 while (end < length) {
966 while (end < length && (s[end].isLetterOrNumber() || s[end] ==
QLatin1Char(
'+'))) {
973 while (end < length && !(s[end].isLetterOrNumber() || s[end] ==
QLatin1Char(
'+'))) {
981void KCharSelectData::appendToIndex(Index *index, quint16 unicode,
const QString &s)
984 for (
const QString &s : strings) {
985 (*index)[s.
toLower()].append(unicode);
994 const uchar *udata =
reinterpret_cast<const uchar *
>(dataFile.
constData());
996 const quint32 nameOffsetBegin = qFromLittleEndian<quint32>(udata + 4);
997 const quint32 nameOffsetEnd = qFromLittleEndian<quint32>(udata + 8);
999 int max = ((nameOffsetEnd - nameOffsetBegin) / 6) - 1;
1001 for (
int pos = 0; pos <= max; pos++) {
1002 const quint16 unicode = qFromLittleEndian<quint16>(udata + nameOffsetBegin + pos * 6);
1003 quint32 offset = qFromLittleEndian<quint32>(udata + nameOffsetBegin + pos * 6 + 2);
1008 const quint32 detailsOffsetBegin = qFromLittleEndian<quint32>(udata + 12);
1009 const quint32 detailsOffsetEnd = qFromLittleEndian<quint32>(udata + 16);
1011 max = ((detailsOffsetEnd - detailsOffsetBegin) / 27) - 1;
1013 for (
int pos = 0; pos <= max; pos++) {
1014 const quint16 unicode = qFromLittleEndian<quint16>(udata + detailsOffsetBegin + pos * 27);
1017 const quint8 aliasCount = *(quint8 *)(udata + detailsOffsetBegin + pos * 27 + 6);
1018 quint32 aliasOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos * 27 + 2);
1020 for (
int j = 0; j < aliasCount; j++) {
1022 aliasOffset += qstrlen(data + aliasOffset) + 1;
1026 const quint8 notesCount = *(quint8 *)(udata + detailsOffsetBegin + pos * 27 + 11);
1027 quint32 notesOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos * 27 + 7);
1029 for (
int j = 0; j < notesCount; j++) {
1031 notesOffset += qstrlen(data + notesOffset) + 1;
1035 const quint8 apprCount = *(quint8 *)(udata + detailsOffsetBegin + pos * 27 + 16);
1036 quint32 apprOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos * 27 + 12);
1038 for (
int j = 0; j < apprCount; j++) {
1040 apprOffset += qstrlen(data + apprOffset) + 1;
1044 const quint8 equivCount = *(quint8 *)(udata + detailsOffsetBegin + pos * 27 + 21);
1045 quint32 equivOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos * 27 + 17);
1047 for (
int j = 0; j < equivCount; j++) {
1049 equivOffset += qstrlen(data + equivOffset) + 1;
1053 const quint8 seeAlsoCount = *(quint8 *)(udata + detailsOffsetBegin + pos * 27 + 26);
1054 quint32 seeAlsoOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos * 27 + 22);
1056 for (
int j = 0; j < seeAlsoCount; j++) {
1057 quint16 seeAlso = qFromLittleEndian<quint16>(udata + seeAlsoOffset);
1058 appendToIndex(&i, unicode, formatCode(seeAlso, 4,
QString()));
1059 equivOffset += qstrlen(data + equivOffset) + 1;
Q_SCRIPTABLE Q_NOREPLY void start()
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
KIOCORE_EXPORT QStringList list(const QString &fileClass)
KGuiItem ok()
Returns the 'Ok' gui item.
Category category(StandardShortcut id)
const QList< QKeySequence > & end()
Trait::StringList splitString(const typename Trait::String &str, const typename Trait::Char &ch)
QByteArray & append(QByteArrayView data)
const char * constData() const const
Category category() const const
QString translate(const char *context, const char *sourceText, const char *disambiguation, int n)
QString family() const const
void append(QList< T > &&value)
const_reference at(qsizetype i) const const
bool isEmpty() const const
void reserve(qsizetype size)
qsizetype size() const const
qsizetype count() const const
iterator insert(const T &value)
QSet< T > & intersect(const QSet< T > &other)
bool remove(const T &value)
const QChar at(qsizetype position) const const
QString fromUtf8(QByteArrayView str)
bool isNull() const const
qsizetype length() const const
QString mid(qsizetype position, qsizetype n) const const
QString number(double n, char format, int precision)
QString & prepend(QChar ch)
QString simplified() const const
qsizetype size() const const
int toInt(bool *ok, int base) const const
QString toLower() const const
QList< uint > toUcs4() const const
QString toUpper() const const
qsizetype indexOf(const QRegularExpression &re, qsizetype from) const const
QThreadPool * globalInstance()
void start(Callable &&callableToRun, int priority)