KFileMetaData

xmlextractor.cpp
1/*
2 SPDX-FileCopyrightText: 2018 Stefan BrĂ¼ns <stefan.bruens@rwth-aachen.de>
3
4 SPDX-License-Identifier: LGPL-2.1-or-later
5*/
6
7
8#include "xmlextractor.h"
9#include "kfilemetadata_debug.h"
10#include "dublincoreextractor.h"
11
12#include <QDomDocument>
13#include <QFile>
14#include <QXmlStreamReader>
15
16#ifdef SVG_XML_COMPRESSED_SUPPORT
17#include <KCompressionDevice>
18#endif
19
20namespace {
21
22//inline QString dcElementNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); }
23inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg"); }
24inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); }
25inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#"); }
26
27void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node)
28{
29 if (node.namespaceURI() != svgNS()) {
30 return;
31 }
32
33 if ((node.localName() == QLatin1String("g")) ||
34 (node.localName() == QLatin1String("a"))) {
36 for (; !e.isNull(); e = e.nextSiblingElement()) {
37 extractSvgText(result, e);
38 }
39 } else if (node.localName() == QLatin1String("text")) {
40 qCDebug(KFILEMETADATA_LOG) << node.text();
41 result->append(node.text());
42 }
43}
44
45static const QStringList supportedMimeTypes = {
46 QStringLiteral("application/xml"),
47 QStringLiteral("image/svg+xml"),
48 QStringLiteral("image/svg+xml-compressed"),
49 QStringLiteral("image/svg"),
50};
51
52}
53
54namespace KFileMetaData
55{
56
57XmlExtractor::XmlExtractor(QObject* parent)
58 : ExtractorPlugin(parent)
59{
60
61}
62
63QStringList XmlExtractor::mimetypes() const
64{
65 return supportedMimeTypes;
66}
67
68void XmlExtractor::extract(ExtractionResult* result)
69{
70 auto flags = result->inputFlags();
71
72 QFile file(result->inputUrl());
73 if (!file.open(QIODevice::ReadOnly)) {
74 qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file";
75 return;
76 }
77
78
79 if ((result->inputMimetype() == QLatin1String("image/svg")) ||
80 (result->inputMimetype() == QLatin1String("image/svg+xml-compressed")) ||
81 (result->inputMimetype() == QLatin1String("image/svg+xml"))) {
82
83 result->addType(Type::Image);
84
85 QIODevice *ioDevice = &file;
86#ifdef SVG_XML_COMPRESSED_SUPPORT
87 std::unique_ptr<KCompressionDevice> gzReader;
88 if (result->inputMimetype() == QLatin1String("image/svg+xml-compressed")) {
89 gzReader.reset(new KCompressionDevice(&file, false, KCompressionDevice::CompressionType::GZip));
90 if (!gzReader->open(QIODevice::ReadOnly)) {
91 return;
92 }
93 ioDevice = gzReader.get();
94 }
95#else
96 if (result->inputMimetype() == QLatin1String("image/svg+xml-compressed")) {
97 return;
98 }
99#endif
100
101 QDomDocument doc;
102 doc.setContent(ioDevice, QDomDocument::ParseOption::UseNamespaceProcessing);
103 QDomElement svg = doc.firstChildElement();
104
105 if (!svg.isNull()
106 && svg.localName() == QLatin1String("svg")
107 && svg.namespaceURI() == svgNS()) {
108
110 for (; !e.isNull(); e = e.nextSiblingElement()) {
111 if (e.namespaceURI() != svgNS()) {
112 continue;
113 }
114
115 if (e.localName() == QLatin1String("metadata")) {
116 if (!(flags & ExtractionResult::ExtractMetaData)) {
117 continue;
118 }
119
120 auto rdf = e.firstChildElement(QLatin1String("RDF"));
121 if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) {
122 continue;
123 }
124
125 auto cc = rdf.firstChildElement(QLatin1String("Work"));
126 if (cc.isNull() || cc.namespaceURI() != ccNS()) {
127 continue;
128 }
129
130 DublinCoreExtractor::extract(result, cc);
131
132 } else if (e.localName() == QLatin1String("defs")) {
133 // skip
134 continue;
135 } else if (flags & ExtractionResult::ExtractPlainText) {
136 // extract
137 extractSvgText(result, e);
138 }
139 }
140 }
141 } else {
142 result->addType(Type::Text);
143
144 if (flags & ExtractionResult::ExtractPlainText) {
145 QXmlStreamReader stream(&file);
146 while (!stream.atEnd()) {
147 QXmlStreamReader::TokenType token = stream.readNext();
148
149 if (token == QXmlStreamReader::Characters) {
150 QString text = stream.text().trimmed().toString();
151 if (!text.isEmpty()) {
152 result->append(text);
153 }
154 }
155 }
156 }
157 }
158}
159
160} // namespace KFileMetaData
161
162#include "moc_xmlextractor.cpp"
The ExtractionResult class is where all the data extracted by the indexer is saved.
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
The ExtractorPlugin is the base class for all file metadata extractors.
The KFileMetaData namespace.
ParseResult setContent(QAnyStringView text, ParseOptions options)
QString text() const const
QDomElement firstChildElement(const QString &tagName, const QString &namespaceURI) const const
bool isNull() const const
QString localName() const const
QString namespaceURI() const const
QDomElement nextSiblingElement(const QString &tagName, const QString &namespaceURI) const const
bool isEmpty() const const
QString trimmed() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Feb 28 2025 11:50:27 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.