Akonadi Search

xapianqueryparser.cpp
1/*
2 * SPDX-FileCopyrightText: 2014 Vishesh Handa <me@vhanda.in>
3 *
4 * SPDX-License-Identifier: LGPL-2.1-or-later
5 *
6 */
7
8#include "xapianqueryparser.h"
9
10#include "akonadi_search_xapian_debug.h"
11#include <QStringList>
12#include <QTextBoundaryFinder>
13
14using namespace Akonadi::Search;
15
16XapianQueryParser::XapianQueryParser() = default;
17
18void XapianQueryParser::setDatabase(Xapian::Database *db)
19{
20 m_db = db;
21}
22
23namespace
24{
25struct Term {
26 std::string t;
27 uint count;
28
29 // pop_heap pops the largest element, we want the smallest to be popped
30 bool operator<(const Term &rhs) const
31 {
32 return count > rhs.count;
33 }
34};
35
36Xapian::Query makeQuery(const QString &string, int position, Xapian::Database *db)
37{
38 if (!db) {
39 const QByteArray arr = string.toUtf8();
40 const std::string stdString(arr.constData(), arr.size());
41 return Xapian::Query(stdString, 1, position);
42 }
43
44 // Lets just keep the top x (+1 for push_heap)
45 static const int MaxTerms = 100;
46 QList<Term> topTerms;
47 topTerms.reserve(MaxTerms + 1);
48
49 const std::string stdString(string.toStdString());
50 Xapian::TermIterator it = db->allterms_begin(stdString);
51 Xapian::TermIterator end = db->allterms_end(stdString);
52 for (; it != end; ++it) {
53 Term term;
54 term.t = *it;
55 term.count = db->get_collection_freq(term.t);
56
57 if (topTerms.size() < MaxTerms) {
58 topTerms.push_back(term);
59 std::push_heap(topTerms.begin(), topTerms.end());
60 } else {
61 // Remove the term with the min count
62 topTerms.push_back(term);
63 std::push_heap(topTerms.begin(), topTerms.end());
64
65 std::pop_heap(topTerms.begin(), topTerms.end());
66 topTerms.pop_back();
67 }
68 }
69
71 queries.reserve(topTerms.size());
72
73 for (const Term &term : std::as_const(topTerms)) {
74 queries << Xapian::Query(term.t, 1, position);
75 }
76
77 if (queries.isEmpty()) {
78 return Xapian::Query(string.toStdString(), 1, position);
79 }
80 Xapian::Query finalQ(Xapian::Query::OP_SYNONYM, queries.begin(), queries.end());
81 return finalQ;
82}
83
84bool containsSpace(const QString &string)
85{
86 for (const QChar &ch : string) {
87 if (ch.isSpace()) {
88 return true;
89 }
90 }
91
92 return false;
93}
94}
95
96Xapian::Query XapianQueryParser::parseQuery(const QString &text, const QString &prefix)
97{
98 /*
99 Xapian::QueryParser parser;
100 parser.set_default_op(Xapian::Query::OP_AND);
101
102 if (m_db)
103 parser.set_database(*m_db);
104
105 int flags = Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_PARTIAL;
106
107 std::string stdString(text.toStdString());
108 return parser.parse_query(stdString, flags);
109 */
110
111 if (text.isEmpty()) {
112 return {};
113 }
114
115 QList<Xapian::Query> queries;
116 QList<Xapian::Query> phraseQueries;
117
118 int start = 0;
119 int end = 0;
120 int position = 0;
121
122 bool inDoubleQuotes = false;
123 bool inSingleQuotes = false;
124 bool inPhrase = false;
125
127 for (; bf.position() != -1; bf.toNextBoundary()) {
128 if (bf.boundaryReasons() & QTextBoundaryFinder::StartOfItem) {
129 //
130 // Check the previous delimiter
131 int pos = bf.position();
132 if (pos != end) {
133 QString delim = text.mid(end, pos - end);
134 if (delim.contains(QLatin1Char('"'))) {
135 if (inDoubleQuotes) {
136 queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end());
137 phraseQueries.clear();
138 inDoubleQuotes = false;
139 } else {
140 inDoubleQuotes = true;
141 }
142 } else if (delim.contains(QLatin1Char('\''))) {
143 if (inSingleQuotes) {
144 queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end());
145 phraseQueries.clear();
146 inSingleQuotes = false;
147 } else {
148 inSingleQuotes = true;
149 }
150 } else if (!containsSpace(delim)) {
151 if (!inPhrase && !queries.isEmpty()) {
152 phraseQueries << queries.takeLast();
153 }
154 inPhrase = true;
155 } else if (inPhrase && !phraseQueries.isEmpty()) {
156 queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end());
157 phraseQueries.clear();
158 inPhrase = false;
159 }
160 }
161
162 start = bf.position();
163 continue;
164 } else if (bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) {
165 end = bf.position();
166
167 QString str = text.mid(start, end - start);
168
169 // Get the string ready for saving
170 str = str.toLower();
171
172 // Remove all accents
173 const QString denormalized = str.normalized(QString::NormalizationForm_KD);
174 QString cleanString;
175 for (const QChar &ch : denormalized) {
176 auto cat = ch.category();
178 cleanString.append(ch);
179 }
180 }
181
182 str = cleanString.normalized(QString::NormalizationForm_KC);
184 for (const QStringView t : lst) {
185 const QString term = prefix + t;
186
187 position++;
188 if (inDoubleQuotes || inSingleQuotes || inPhrase) {
189 const QByteArray arr = term.toUtf8();
190 const std::string str(arr.constData(), arr.length());
191 phraseQueries << Xapian::Query(str, 1, position);
192 } else {
193 if (m_autoExpand) {
194 queries << makeQuery(term, position, m_db);
195 } else {
196 queries << Xapian::Query(term.toStdString(), 1, position);
197 }
198 }
199 }
200 }
201 }
202
203 if (inPhrase) {
204 queries << Xapian::Query(Xapian::Query::OP_PHRASE, phraseQueries.begin(), phraseQueries.end());
205 phraseQueries.clear();
206 }
207
208 if (!phraseQueries.isEmpty()) {
209 queries << phraseQueries;
210 phraseQueries.clear();
211 }
212
213 if (queries.size() == 1) {
214 return queries.first();
215 }
216 return {Xapian::Query::OP_AND, queries.begin(), queries.end()};
217}
218
220{
221 m_autoExpand = autoexpand;
222}
223
224Xapian::Query XapianQueryParser::expandWord(const QString &word, const QString &prefix)
225{
226 const std::string stdString((prefix + word).toUtf8().constData());
227 Xapian::TermIterator it = m_db->allterms_begin(stdString);
228 Xapian::TermIterator end = m_db->allterms_end(stdString);
229
230 QList<Xapian::Query> queries;
231 for (; it != end; ++it) {
232 queries << Xapian::Query(*it);
233 }
234
235 if (queries.isEmpty()) {
236 return Xapian::Query(stdString);
237 }
238 Xapian::Query finalQ(Xapian::Query::OP_SYNONYM, queries.begin(), queries.end());
239 return finalQ;
240}
Search term.
Definition term.h:27
Xapian::Query expandWord(const QString &word, const QString &prefix=QString())
Expands word to every possible option which it can be expanded to.
void setAutoExapand(bool autoexpand)
Set if each word in the string should be treated as a partial word and should be expanded to every po...
Q_SCRIPTABLE Q_NOREPLY void start()
Akonadi search infrastructure.
Definition core/query.h:21
const QList< QKeySequence > & end()
bool operator<(const PosRange< Trait > &l, const PosRange< Trait > &r)
const char * constData() const const
qsizetype length() const const
qsizetype size() const const
Mark_NonSpacing
iterator begin()
void clear()
iterator end()
T & first()
bool isEmpty() const const
void pop_back()
void push_back(parameter_type value)
void reserve(qsizetype size)
qsizetype size() const const
value_type takeLast()
NormalizationForm_KD
QString & append(QChar ch)
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString mid(qsizetype position, qsizetype n) const const
QString normalized(NormalizationForm mode, QChar::UnicodeVersion version) const const
QString toLower() const const
std::string toStdString() const const
QByteArray toUtf8() const const
QList< QStringView > split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
SkipEmptyParts
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 18 2024 12:10:52 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.