KIO

kuriikwsfiltereng.cpp
1/*
2 This file is part of the KDE project
3 SPDX-FileCopyrightText: 2002, 2003 Dawit Alemayehu <adawit@kde.org>
4 SPDX-FileCopyrightText: 2000 Yves Arrouye <yves@realnames.com>
5 SPDX-FileCopyrightText: 1999 Simon Hausmann <hausmann@kde.org>
6
7 Advanced web shortcuts:
8 SPDX-FileCopyrightText: 2001 Andreas Hochsteger <e9625392@student.tuwien.ac.at>
9
10 SPDX-License-Identifier: GPL-2.0-or-later
11*/
12
13#include "kuriikwsfiltereng_p.h"
14#include "searchprovider.h"
15
16#include <KConfig>
17#include <KConfigGroup>
18#include <kprotocolinfo.h>
19
20#ifdef WITH_QTDBUS
21#include <QDBusConnection>
22#endif
23
24#include <QLoggingCategory>
25#include <QRegularExpression>
26#include <QStringEncoder>
27
28Q_LOGGING_CATEGORY(category, "kf.kio.urifilters.ikws", QtWarningMsg)
29using namespace KIO;
30
31/**
32 * IMPORTANT: If you change anything here, make sure kiowidgets-kurifiltertest-{colon,space}-separator
33 * unit tests still pass (they're usually run as part of "make test").
34 */
35
36KURISearchFilterEngine::KURISearchFilterEngine()
37{
38 configure();
39 // Only after initial load, we would want to reparse the files on config changes.
40 // When the registry is constructed, it automatically loads the searchproviders
41 m_reloadRegistry = true;
42
43#ifdef WITH_QTDBUS
45 .connect(QString(), QStringLiteral("/"), QStringLiteral("org.kde.KUriFilterPlugin"), QStringLiteral("configure"), this, SLOT(configure()));
46#endif
47}
48
49KURISearchFilterEngine::~KURISearchFilterEngine() = default;
50
51// static
52QStringList KURISearchFilterEngine::defaultSearchProviders()
53{
54 static const QStringList defaultProviders{QStringLiteral("google"),
55 QStringLiteral("youtube"),
56 QStringLiteral("yahoo"),
57 QStringLiteral("wikipedia"),
58 QStringLiteral("wikit")};
59 return defaultProviders;
60}
61
62SearchProvider *KURISearchFilterEngine::webShortcutQuery(const QString &typedString, QString &searchTerm) const
63{
64 const auto getProviderForKey = [this, &searchTerm](const QString &key) {
65 SearchProvider *provider = nullptr;
66 // If the key contains a : an assertion in the isKnownProtocol method would fail. This can be
67 // the case if the delimiter is switched to space, see kiowidgets_space_separator_test
68 if (!key.isEmpty() && (key.contains(QLatin1Char(':')) || !KProtocolInfo::isKnownProtocol(key, false))) {
69 provider = m_registry.findByKey(key);
70 if (provider) {
71 if (!m_bUseOnlyPreferredWebShortcuts || m_preferredWebShortcuts.contains(provider->desktopEntryName())) {
72 qCDebug(category) << "found provider" << provider->desktopEntryName() << "searchTerm=" << searchTerm;
73 } else {
74 provider = nullptr;
75 }
76 }
77 }
78 return provider;
79 };
80
81 SearchProvider *provider = nullptr;
82 if (m_bWebShortcutsEnabled) {
83 QString key;
84 if (typedString.contains(QLatin1Char('!'))) {
85 const static QRegularExpression bangRegex(QStringLiteral("!([^ ]+)"));
86 const auto match = bangRegex.match(typedString);
87 if (match.hasMatch() && match.lastCapturedIndex() == 1) {
88 key = match.captured(1);
89 searchTerm = QString(typedString).remove(bangRegex);
90 }
91 }
92
93 // If we have found a bang-match it might be unintentionally triggered, because the ! character is contained
94 // in the query. To avoid not returning any results we check if we can find a provider for the key, if not
95 // we clear it and try the traditional query syntax, see https://bugs.kde.org/show_bug.cgi?id=437660
96 if (!key.isEmpty()) {
97 provider = getProviderForKey(key);
98 if (!provider) {
99 key.clear();
100 }
101 }
102 if (key.isEmpty()) {
103 const int pos = typedString.indexOf(QLatin1Char(m_cKeywordDelimiter));
104 if (pos > -1) {
105 key = typedString.left(pos).toLower(); // #169801
106 searchTerm = typedString.mid(pos + 1);
107 } else if (!typedString.isEmpty() && m_cKeywordDelimiter == ' ') {
108 key = typedString;
109 searchTerm = typedString.mid(pos + 1);
110 }
111 provider = getProviderForKey(key);
112 }
113
114 qCDebug(category) << "m_cKeywordDelimiter=" << QLatin1Char(m_cKeywordDelimiter) << "key=" << key << "typedString=" << typedString;
115 }
116
117 return provider;
118}
119
120SearchProvider *KURISearchFilterEngine::autoWebSearchQuery(const QString &typedString, const QString &defaultShortcut) const
121{
122 SearchProvider *provider = nullptr;
123 const QString defaultSearchProvider = (m_defaultWebShortcut.isEmpty() ? defaultShortcut : m_defaultWebShortcut);
124
125 if (m_bWebShortcutsEnabled && !defaultSearchProvider.isEmpty()) {
126 // Make sure we ignore supported protocols, e.g. "smb:", "http:"
127 const int pos = typedString.indexOf(QLatin1Char(':'));
128
129 if (pos == -1 || !KProtocolInfo::isKnownProtocol(typedString.left(pos), false)) {
130 provider = m_registry.findByDesktopName(defaultSearchProvider);
131 }
132 }
133
134 return provider;
135}
136
137QByteArray KURISearchFilterEngine::name() const
138{
139 return "kuriikwsfilter";
140}
141
142char KURISearchFilterEngine::keywordDelimiter() const
143{
144 return m_cKeywordDelimiter;
145}
146
147QString KURISearchFilterEngine::defaultSearchEngine() const
148{
149 return m_defaultWebShortcut;
150}
151
152QStringList KURISearchFilterEngine::favoriteEngineList() const
153{
154 return m_preferredWebShortcuts;
155}
156
157KURISearchFilterEngine *KURISearchFilterEngine::self()
158{
159 static KURISearchFilterEngine self;
160 return &self;
161}
162
163QStringList KURISearchFilterEngine::modifySubstitutionMap(SubstMap &map, const QString &query) const
164{
165 // Returns the number of query words
166 QString userquery = query;
167
168 // Do some pre-encoding, before we can start the work:
169 {
170 const static QRegularExpression qsexpr(QStringLiteral("\\\"[^\\\"]*\\\""));
171 // Temporarily substitute spaces in quoted strings (" " -> "%20")
172 // Needed to split user query into StringList correctly.
173 int start = 0;
175 while ((match = qsexpr.match(userquery, start)).hasMatch()) {
176 QString str = match.captured(0);
177 str.replace(QLatin1Char(' '), QLatin1String("%20"));
178 userquery.replace(match.capturedStart(0), match.capturedLength(0), str);
179 start = match.capturedStart(0) + str.size(); // Move after last quote
180 }
181 }
182
183 // Split user query between spaces:
185
186 // Back-substitute quoted strings (%20 -> " "):
187 userquery.replace(QLatin1String("%20"), QLatin1String(" "));
188 l.replaceInStrings(QStringLiteral("%20"), QStringLiteral(" "));
189
190 qCDebug(category) << "Generating substitution map:\n";
191 // Generate substitution map from user query:
192 for (int i = 0; i <= l.count(); i++) {
193 int pos = 0;
194 QString v;
195
196 // Add whole user query (\{0}) to substitution map:
197 if (i == 0) {
198 v = userquery;
199 }
200 // Add partial user query items to substitution map:
201 else {
202 v = l[i - 1];
203 }
204
205 // Insert partial queries (referenced by \1 ... \n) to map:
206 map.insert(QString::number(i), v);
207
208 // Insert named references (referenced by \name) to map:
209 if ((i > 0) && (pos = v.indexOf(QLatin1Char('='))) > 0) {
210 QString s = v.mid(pos + 1);
211 QString k = v.left(pos);
212
213 // Back-substitute references contained in references (e.g. '\refname' substitutes to 'thisquery=\0')
214 s.replace(QLatin1String("%5C"), QLatin1String("\\"));
215 map.insert(k, s);
216 }
217 }
218
219 return l;
220}
221
222static QString encodeString(const QString &s, QStringEncoder &codec)
223{
224 // we encode all characters, including the space character BUG: 304276
225 QByteArray encoded = QByteArray(codec.encode(s)).toPercentEncoding();
226 return QString::fromUtf8(encoded);
227}
228
229QString KURISearchFilterEngine::substituteQuery(const QString &url, SubstMap &map, const QString &userquery, QStringEncoder &codec) const
230{
231 QString newurl = url;
232 QStringList ql = modifySubstitutionMap(map, userquery);
233 const int count = ql.count();
234
235 // Substitute references (\{ref1,ref2,...}) with values from user query:
236 {
237 const static QRegularExpression reflistRe(QStringLiteral("\\\\\\{([^\\}]+)\\}"));
238 // Substitute reflists (\{ref1,ref2,...}):
239 int start = 0;
241 while ((match = reflistRe.match(newurl, start)).hasMatch()) {
242 bool found = false;
243
244 // bool rest = false;
245 QString v;
246 const QString rlstring = match.captured(1);
247
248 // \{@} gets a special treatment later
249 if (rlstring == QLatin1String("@")) {
250 v = QStringLiteral("\\@");
251 found = true;
252 }
253
254 // TODO: strip whitespaces around commas
255 const QStringList refList = rlstring.split(QLatin1Char(','), Qt::SkipEmptyParts);
256
257 for (const QString &rlitem : refList) {
258 if (found) {
259 break;
260 }
261
262 const static QRegularExpression rangeRe(QStringLiteral("([0-9]*)\\-([0-9]*)"));
263 const QRegularExpressionMatch rangeMatch = rangeRe.match(rlitem);
264 // Substitute a range of keywords
265 if (rangeMatch.hasMatch()) {
266 int first = rangeMatch.captured(1).toInt();
267 int last = rangeMatch.captured(2).toInt();
268
269 if (first == 0) {
270 first = 1;
271 }
272
273 if (last == 0) {
274 last = count;
275 }
276
277 for (int i = first; i <= last; i++) {
278 v += map[QString::number(i)] + QLatin1Char(' ');
279 // Remove used value from ql (needed for \{@}):
280 ql[i - 1].clear();
281 }
282
283 v = v.trimmed();
284 if (!v.isEmpty()) {
285 found = true;
286 }
287
288 v = encodeString(v, codec);
289 } else if (rlitem.startsWith(QLatin1Char('\"')) && rlitem.endsWith(QLatin1Char('\"'))) {
290 // Use default string from query definition:
291 found = true;
292 QString s = rlitem.mid(1, rlitem.length() - 2);
293 v = encodeString(s, codec);
294 } else if (map.contains(rlitem)) {
295 // Use value from substitution map:
296 found = true;
297 v = encodeString(map[rlitem], codec);
298
299 // Remove used value from ql (needed for \{@}):
300 const QChar c = rlitem.at(0); // rlitem can't be empty at this point
301 if (c == QLatin1Char('0')) {
302 // It's a numeric reference to '0'
303 for (QStringList::Iterator it = ql.begin(); it != ql.end(); ++it) {
304 (*it).clear();
305 }
306 } else if ((c >= QLatin1String("0")) && (c <= QLatin1String("9"))) { // krazy:excludeall=doublequote_chars
307 // It's a numeric reference > '0'
308 int n = rlitem.toInt();
309 ql[n - 1].clear();
310 } else {
311 // It's a alphanumeric reference
313 while ((it != ql.end()) && !it->startsWith(rlitem + QLatin1Char('='))) {
314 ++it;
315 }
316 if (it != ql.end()) {
317 it->clear();
318 }
319 }
320
321 // Encode '+', otherwise it would be interpreted as space in the resulting url:
322 v.replace(QLatin1Char('+'), QLatin1String("%2B"));
323 } else if (rlitem == QLatin1String("@")) {
324 v = QStringLiteral("\\@");
325 }
326 }
327
328 newurl.replace(match.capturedStart(0), match.capturedLength(0), v);
329 start = match.capturedStart(0) + v.size();
330 }
331
332 // Special handling for \{@};
333 {
334 // Generate list of unmatched strings:
335 QString v = ql.join(QLatin1Char(' ')).simplified();
336 v = encodeString(v, codec);
337
338 // Substitute \{@} with list of unmatched query strings
339 newurl.replace(QLatin1String("\\@"), v);
340 }
341 }
342
343 return newurl;
344}
345
346QUrl KURISearchFilterEngine::formatResult(const QString &url, const QString &cset1, const QString &cset2, const QString &query, bool isMalformed) const
347{
348 SubstMap map;
349 return formatResult(url, cset1, cset2, query, isMalformed, map);
350}
351
352QUrl KURISearchFilterEngine::formatResult(const QString &url,
353 const QString &cset1,
354 const QString &cset2,
355 const QString &userquery,
356 bool /* isMalformed */,
357 SubstMap &map) const
358{
359 // Return nothing if userquery is empty and it contains
360 // substitution strings...
361 if (userquery.isEmpty() && url.indexOf(QLatin1String("\\{")) > 0) {
362 return QUrl();
363 }
364
365 // Create a codec for the desired encoding so that we can transcode the user's "url".
366 QString cseta = cset1;
367 if (cseta.isEmpty()) {
368 cseta = QStringLiteral("UTF-8");
369 }
370
371 QStringEncoder csetacodec(cseta.toLatin1().constData());
372 if (!csetacodec.isValid()) {
373 cseta = QStringLiteral("UTF-8");
375 }
376
377 // Add charset indicator for the query to substitution map:
378 map.insert(QStringLiteral("ikw_charset"), cseta);
379
380 // Add charset indicator for the fallback query to substitution map:
381 QString csetb = cset2;
382 if (csetb.isEmpty()) {
383 csetb = QStringLiteral("UTF-8");
384 }
385 map.insert(QStringLiteral("wsc_charset"), csetb);
386
387 QString newurl = substituteQuery(url, map, userquery, csetacodec);
388
389 return QUrl(newurl, QUrl::StrictMode);
390}
391
392void KURISearchFilterEngine::configure()
393{
394 qCDebug(category) << "Keywords Engine: Loading config...";
395
396 // Load the config.
398 KConfigGroup group = config.group(QStringLiteral("General"));
399
400 m_cKeywordDelimiter = group.readEntry("KeywordDelimiter", ":").at(0).toLatin1();
401 m_bWebShortcutsEnabled = group.readEntry("EnableWebShortcuts", true);
402 m_defaultWebShortcut = group.readEntry("DefaultWebShortcut", "duckduckgo");
403 m_bUseOnlyPreferredWebShortcuts = group.readEntry("UsePreferredWebShortcutsOnly", false);
404
405 QStringList defaultPreferredShortcuts;
406 if (!group.hasKey("PreferredWebShortcuts")) {
407 defaultPreferredShortcuts = KURISearchFilterEngine::defaultSearchProviders();
408 }
409 m_preferredWebShortcuts = group.readEntry("PreferredWebShortcuts", defaultPreferredShortcuts);
410
411 // Use either a white space or a : as the keyword delimiter...
412 if (strchr(" :", m_cKeywordDelimiter) == nullptr) {
413 m_cKeywordDelimiter = ':';
414 }
415
416 qCDebug(category) << "Web Shortcuts Enabled: " << m_bWebShortcutsEnabled;
417 qCDebug(category) << "Default Shortcut: " << m_defaultWebShortcut;
418 qCDebug(category) << "Keyword Delimiter: " << m_cKeywordDelimiter;
419 if (m_reloadRegistry) {
420 m_registry.reload();
421 }
422}
423
424SearchProviderRegistry *KURISearchFilterEngine::registry()
425{
426 return &m_registry;
427}
428
429#include "moc_kuriikwsfiltereng_p.cpp"
KConfigGroup group(const QString &group)
bool hasKey(const char *key) const
QString readEntry(const char *key, const char *aDefault=nullptr) const
static bool isKnownProtocol(const QUrl &url)
Returns whether a protocol is installed that is able to handle url.
QString desktopEntryName() const
Returns the desktop filename of the search provider without any extension.
Q_SCRIPTABLE Q_NOREPLY void start()
KSERVICE_EXPORT KService::List query(FilterFunc filterFunc)
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
A namespace for KIO globals.
QString name(StandardAction id)
KGuiItem configure()
const char * constData() const const
QByteArray toPercentEncoding(const QByteArray &exclude, const QByteArray &include, char percent) const const
char toLatin1() const const
bool connect(const QString &service, const QString &path, const QString &interface, const QString &name, QObject *receiver, const char *slot)
QDBusConnection sessionBus()
iterator begin()
void clear()
qsizetype count() const const
iterator end()
QString captured(QStringView name) const const
bool hasMatch() const const
const QChar at(qsizetype position) const const
void clear()
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
QString fromUtf8(QByteArrayView str)
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString left(qsizetype n) const const
QString mid(qsizetype position, qsizetype n) const const
QString number(double n, char format, int precision)
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
QString simplified() const const
qsizetype size() const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
int toInt(bool *ok, int base) const const
QByteArray toLatin1() const const
QString toLower() const const
QString trimmed() const const
DecodedData< QStringView > encode(QStringView in)
QString join(QChar separator) const const
QStringList & replaceInStrings(QStringView before, QStringView after, Qt::CaseSensitivity cs)
SkipEmptyParts
QFuture< void > map(Iterator begin, Iterator end, MapFunctor &&function)
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Oct 11 2024 12:11:14 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.