Syndication

loaderutil.cpp
1/*
2 This file is part of the syndication library
3 SPDX-FileCopyrightText: 2019 Laurent Montel <montel@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#include "loaderutil_p.h"
9#include <QDebug>
10#include <QRegularExpression>
11
12// #define DEBUG_PARSING_FEED
13#ifdef DEBUG_PARSING_FEED
14#include <QFile>
15#include <QTextStream>
16#endif
17QUrl Syndication::LoaderUtil::parseFeed(const QByteArray &data, const QUrl &url)
18{
19#ifdef DEBUG_PARSING_FEED
20 qDebug() << " QUrl Syndication::LoaderUtil::parseFeed(const QByteArray &data, const QUrl &url)";
21 QFile headerFile(QStringLiteral("/tmp/bb.txt"));
22 headerFile.open(QIODevice::WriteOnly | QIODevice::Text);
23 QTextStream outHeaderStream(&headerFile);
24 outHeaderStream << data;
25 headerFile.close();
26#endif
27 QUrl discoveredFeedURL;
28 QString str = QString::fromLatin1(data.constData()).simplified();
29 QString s2;
30 // QTextStream ts( &str, QIODevice::WriteOnly );
31 // ts << data.data();
32
33 // "<[\\s]link[^>]*rel[\\s]=[\\s]\\\"[\\s]alternate[\\s]\\\"[^>]*>"
34 // "type[\\s]=[\\s]\\\"application/rss+xml\\\""
35 // "href[\\s]=[\\s]\\\"application/rss+xml\\\""
36
37 // test regexp: https://www.regexplanet.com/advanced/perl/index.html
38
39 const static QRegularExpression rx0(
40 QStringLiteral(
41 R"((?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[^sAa]*[\s]*type[^=]*="application/rss\+xml"[^s]*[\s]*[^s]*(?:HREF)[^=]*?=[^A-Z0-9-_~,./$]*([^'\">\s]*))"),
43
45 if ((match = rx0.match(str)).hasMatch()) {
46 s2 = match.captured(1);
47 } else {
48 const static QRegularExpression rx(
49 QStringLiteral(
50 R"((?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[^sAa]*[\s]*type[^=]*=\"application/rss\+xml\"[^s][^s](?:[^>]*)[\s]*[^s]*(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\s]*))"),
52 if ((match = rx.match(str)).hasMatch()) {
53 s2 = match.captured(1);
54 } else {
55 static const QRegularExpression rx2(
56 QStringLiteral(R"((?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\s]*))"),
58 if ((match = rx2.match(str)).hasMatch()) {
59 s2 = match.captured(1);
60 } else {
61 // does not support Atom/RSS autodiscovery.. try finding feeds by brute force....
62 QStringList feeds;
63 QString host = url.host();
64 static const QRegularExpression rx3(QStringLiteral(R"((?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\s]*))"));
65 QRegularExpressionMatchIterator iter = rx3.globalMatch(str);
66 while (iter.hasNext()) {
67 match = iter.next();
68 s2 = match.captured(1);
69 if (s2.endsWith(QLatin1String(".rdf")) //
70 || s2.endsWith(QLatin1String(".rss")) //
71 || s2.endsWith(QLatin1String(".xml"))) {
72 feeds.append(s2);
73 }
74 }
75
76 // Prefer feeds on same host
77 auto it = std::find_if(feeds.cbegin(), feeds.cend(), [&host](const QString &s) {
78 return QUrl(s).host() == host;
79 });
80 if (it != feeds.cend()) {
81 s2 = *it;
82 }
83 }
84 }
85 }
86
87 if (s2.isNull()) {
88 return discoveredFeedURL;
89 }
90
91 if (QUrl(s2).isRelative()) {
92 if (s2.startsWith(QLatin1String("//"))) {
93 s2.prepend(url.scheme() + QLatin1Char(':'));
94 discoveredFeedURL = QUrl(s2);
95 } else if (s2.startsWith(QLatin1Char('/'))) {
96 discoveredFeedURL = url;
97 discoveredFeedURL.setPath(s2);
98 } else {
99 discoveredFeedURL = url;
100 discoveredFeedURL.setPath(discoveredFeedURL.path() + QLatin1Char('/') + s2);
101 }
102 } else {
103 discoveredFeedURL = QUrl(s2);
104 }
105
106 return discoveredFeedURL;
107}
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
void append(QList< T > &&value)
const_iterator cbegin() const const
const_iterator cend() const const
QRegularExpressionMatch next()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString fromLatin1(QByteArrayView str)
bool isNull() const const
QString & prepend(QChar ch)
QString simplified() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QString host(ComponentFormattingOptions options) const const
QString path(ComponentFormattingOptions options) const const
QString scheme() const const
void setPath(const QString &path, ParsingMode mode)
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Oct 11 2024 12:09:52 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.