Messagelib

urlhashing.cpp
1/*
2 SPDX-FileCopyrightText: 2016-2025 Laurent Montel <montel@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "urlhashing.h"
8#include <QCryptographicHash>
9#include <QDebug>
10
11using namespace WebEngineViewer;
12
13UrlHashing::UrlHashing(const QUrl &url)
14 : mUrl(url)
15{
16}
17
18UrlHashing::~UrlHashing() = default;
19
20QString UrlHashing::canonicalizeUrl(QUrl url)
21{
22 if (url.isEmpty()) {
23 return {};
24 }
25 QString path = url.path();
26 if (url.path().isEmpty()) {
27 url.setPath(QStringLiteral("/"));
28 } else {
29 // First, remove tab (0x09), CR (0x0d), and LF (0x0a) characters from the URL. Do not remove escape sequences for these characters (e.g. '%0a').
30 path.remove(QLatin1Char('\t'));
31 path.remove(QLatin1Char('\r'));
32 path.remove(QLatin1Char('\n'));
33
34 // In the URL, percent-escape all characters that are <= ASCII 32, >= 127, "#", or "%". The escapes should use uppercase hex characters.
35 // TODO
36
37 url.setPath(path);
38 }
39 // Remove all leading and trailing dots.
40#if 0
41 QString hostname = url.host();
42 qDebug() << " hostname" << hostname;
43 while (!hostname.isEmpty() && hostname.at(0) == QLatin1Char('.')) {
44 hostname.remove(0, 1);
45 }
46 qDebug() << "BEFORE hostname" << hostname;
47 for (int i = hostname.length(); i >= 0; --i) {
48 if (hostname.at(i) == QLatin1Char('.')) {
50 } else {
51 break;
52 }
53 }
54 qDebug() << "AFTER hostname" << hostname;
55 mUrl.setHost(hostname);
56#endif
59 // qDebug() << "BEFORE urlEncoded" <<urlEncoded;
60 urlEncoded.replace(QByteArrayLiteral("%25"), QByteArrayLiteral("%"));
61 // qDebug() << "AFTER urlEncoded" <<urlEncoded;
62 return QString::fromLatin1(urlEncoded);
63}
64
65QStringList UrlHashing::generatePathsToCheck(const QString &str, const QString &query)
66{
67 QStringList pathToCheck;
68 if (str.isEmpty()) {
69 return pathToCheck;
70 }
71 const int strLength(str.length());
72 for (int i = 0; i < strLength; ++i) {
73 // We check 5 element => 4 here and host if necessary
74 if (pathToCheck.count() == 4) {
75 break;
76 }
77 if (str.at(i) == QLatin1Char('/')) {
78 if (i == 0) {
79 pathToCheck << QStringLiteral("/");
80 } else {
81 pathToCheck << str.left(i + 1);
82 }
83 }
84 }
85 if (!pathToCheck.isEmpty() && pathToCheck.at(pathToCheck.count() - 1) != str) {
86 pathToCheck << str;
87 }
88 if (!query.isEmpty()) {
89 pathToCheck << str + QLatin1Char('?') + query;
90 }
91 return pathToCheck;
92}
93
94QStringList UrlHashing::generateHostsToCheck(const QString &str)
95{
96 QStringList hostToCheck;
97 if (str.isEmpty()) {
98 return hostToCheck;
99 }
100 const int strLength(str.length());
101 bool lastElement = true;
102 for (int i = (strLength - 1); i > 0; --i) {
103 // We need to check just 5 element => 4 splits hosts + current host
104 if (hostToCheck.count() == 4) {
105 break;
106 }
107 if (str.at(i) == QLatin1Char('.')) {
108 if (lastElement) {
109 lastElement = false;
110 } else {
111 hostToCheck << str.right(strLength - i - 1);
112 }
113 }
114 }
115 hostToCheck << str;
116 return hostToCheck;
117}
118
119QHash<QByteArray, QByteArray> UrlHashing::hashList() const
120{
122 if (mUrl.isValid()) {
123 const QString result = WebEngineViewer::UrlHashing::canonicalizeUrl(mUrl);
124 const QUrl url(result);
125 const QStringList hosts = WebEngineViewer::UrlHashing::generateHostsToCheck(url.host());
126 const QStringList paths = WebEngineViewer::UrlHashing::generatePathsToCheck(url.path(), url.query());
127
128 for (const QString &host : hosts) {
129 for (const QString &path : paths) {
130 const QString str = host + path;
132 QByteArray baShort = ba;
133 baShort.truncate(4);
134 lst.insert(ba, baShort);
135 // qDebug() << " ba " << ba.toBase64();
136 }
137 }
138 }
139 return lst;
140}
std::optional< QSqlQuery > query(const QString &queryStatement)
QString path(const QString &relativePath)
NETWORKMANAGERQT_EXPORT QString hostname()
QByteArray & replace(QByteArrayView before, QByteArrayView after)
void truncate(qsizetype pos)
QByteArray hash(QByteArrayView data, Algorithm method)
iterator insert(const Key &key, const T &value)
const_reference at(qsizetype i) const const
qsizetype count() const const
bool isEmpty() const const
const QChar at(qsizetype position) const const
QString fromLatin1(QByteArrayView str)
bool isEmpty() const const
QString left(qsizetype n) const const
qsizetype length() const const
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString right(qsizetype n) const const
QByteArray toLatin1() const const
EncodeUnicode
RemoveFragment
QString host(ComponentFormattingOptions options) const const
bool isEmpty() const const
bool isValid() const const
QString path(ComponentFormattingOptions options) const const
QString query(ComponentFormattingOptions options) const const
void setHost(const QString &host, ParsingMode mode)
void setPath(const QString &path, ParsingMode mode)
QByteArray toEncoded(FormattingOptions options) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:55:28 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.