KFileMetaData

fb2extractor.cpp
1/*
2 SPDX-FileCopyrightText: 2022 Kai Uwe Broulik <kde@broulik.de>
3
4 SPDX-License-Identifier: LGPL-2.1-or-later
5*/
6
7#include "fb2extractor.h"
8#include "kfilemetadata_debug.h"
9
10#include <QDateTime>
11#include <QFile>
12#include <QXmlStreamReader>
13
14#include <KZip>
15
16#include <memory>
17
18using namespace KFileMetaData;
19
20Fb2Extractor::Fb2Extractor(QObject *parent)
21 : ExtractorPlugin(parent)
22{
23}
24
25namespace
26{
27static const QString regularMimeType()
28{
29 return QStringLiteral("application/x-fictionbook+xml");
30}
31
32static const QString compressedMimeType()
33{
34 return QStringLiteral("application/x-zip-compressed-fb2");
35}
36
37static const QStringList supportedMimeTypes = {regularMimeType(), compressedMimeType()};
38
39}
40
41QStringList Fb2Extractor::mimetypes() const
42{
43 return supportedMimeTypes;
44}
45
46void Fb2Extractor::extract(ExtractionResult *result)
47{
48 std::unique_ptr<QIODevice> device;
49 std::unique_ptr<KZip> zip;
50
51 if (result->inputMimetype() == regularMimeType()) {
52 device.reset(new QFile(result->inputUrl()));
53 if (!device->open(QIODevice::ReadOnly | QIODevice::Text)) {
54 return;
55 }
56
57 } else if (result->inputMimetype() == compressedMimeType()) {
58 zip.reset(new KZip(result->inputUrl()));
59 if (!zip->open(QIODevice::ReadOnly)) {
60 return;
61 }
62
63 const auto entries = zip->directory()->entries();
64 if (entries.count() != 1) {
65 return;
66 }
67
68 const QString entryPath = entries.first();
69 if (!entryPath.endsWith(QLatin1String(".fb2"))) {
70 return;
71 }
72
73 const auto *entry = zip->directory()->file(entryPath);
74 if (!entry) {
75 return;
76 }
77
78 device.reset(entry->createDevice());
79 }
80
81 result->addType(Type::Document);
82
83 QXmlStreamReader xml(device.get());
84
85 bool inFictionBook = false;
86 bool inDescription = false;
87 bool inTitleInfo = false;
88 bool inAuthor = false;
89 bool inDocumentInfo = false;
90 bool inPublishInfo = false;
91 bool inBody = false;
92
93 QString authorFirstName;
94 QString authorMiddleName;
95 QString authorLastName;
96 QString authorNickName;
97
98 while (!xml.atEnd() && !xml.hasError()) {
99 xml.readNext();
100
101 if (xml.name() == QLatin1String("FictionBook")) {
102 if (xml.isStartElement()) {
103 inFictionBook = true;
104 } else if (xml.isEndElement()) {
105 break;
106 }
107 } else if (xml.name() == QLatin1String("description")) {
108 if (xml.isStartElement()) {
109 inDescription = true;
110 } else if (xml.isEndElement()) {
111 inDescription = false;
112 }
113 } else if (xml.name() == QLatin1String("title-info")) {
114 if (xml.isStartElement()) {
115 inTitleInfo = true;
116 } else if (xml.isEndElement()) {
117 inTitleInfo = false;
118 }
119 } else if (xml.name() == QLatin1String("document-info")) {
120 if (xml.isStartElement()) {
121 inDocumentInfo = true;
122 } else if (xml.isEndElement()) {
123 inDocumentInfo = false;
124 }
125 } else if (xml.name() == QLatin1String("publish-info")) {
126 if (xml.isStartElement()) {
127 inPublishInfo = true;
128 } else if (xml.isEndElement()) {
129 inPublishInfo = false;
130 }
131 } else if (xml.name() == QLatin1String("body")) {
132 if (xml.isStartElement()) {
133 inBody = true;
134 } else if (xml.isEndElement()) {
135 inBody = false;
136 }
137 }
138
139 if (!inFictionBook) {
140 continue;
141 }
142
143 if (inDescription && result->inputFlags() & ExtractionResult::ExtractMetaData) {
144 if (inTitleInfo) {
145 if (xml.isStartElement()) {
146 if (xml.name() == QLatin1String("author")) {
147 inAuthor = true;
148 } else if (inAuthor) {
149 if (xml.name() == QLatin1String("first-name")) {
150 authorFirstName = xml.readElementText();
151 } else if (xml.name() == QLatin1String("middle-name")) {
152 authorMiddleName = xml.readElementText();
153 } else if (xml.name() == QLatin1String("last-name")) {
154 authorLastName = xml.readElementText();
155 } else if (xml.name() == QLatin1String("nickname")) {
156 authorNickName = xml.readElementText();
157 }
158 } else if (xml.name() == QLatin1String("book-title")) {
159 result->add(Property::Title, xml.readElementText());
160 } else if (xml.name() == QLatin1String("annotation")) {
161 result->add(Property::Description, xml.readElementText(QXmlStreamReader::IncludeChildElements).trimmed());
162 } else if (xml.name() == QLatin1String("lang")) {
163 result->add(Property::Language, xml.readElementText());
164 } else if (xml.name() == QLatin1String("genre")) {
165 result->add(Property::Genre, xml.readElementText());
166 }
167 } else if (xml.isEndElement()) {
168 inAuthor = false;
169
170 QStringList nameParts = {authorFirstName, authorMiddleName, authorLastName};
171 nameParts.removeAll(QString());
172
173 if (!nameParts.isEmpty()) {
174 result->add(Property::Author, nameParts.join(QLatin1Char(' ')));
175 } else if (!authorNickName.isEmpty()) {
176 result->add(Property::Author, authorNickName);
177 }
178
179 authorFirstName.clear();
180 authorMiddleName.clear();
181 authorLastName.clear();
182 authorNickName.clear();
183 }
184 } else if (inDocumentInfo) {
185 if (xml.name() == QLatin1String("date")) {
186 // Date can be "not exact" but date "value", if present, is an xs:date
187 const auto dateValue = xml.attributes().value(QLatin1String("value"));
188 QDateTime dt = QDateTime::fromString(dateValue.toString());
189
190 if (!dt.isValid()) {
191 dt = ExtractorPlugin::dateTimeFromString(xml.readElementText());
192 }
193
194 if (dt.isValid()) {
195 result->add(Property::CreationDate, dt);
196 }
197 } else if (xml.name() == QLatin1String("program-used")) {
198 result->add(Property::Generator, xml.readElementText());
199 // "Owner of the fb2 document copyrights"
200 } else if (xml.name() == QLatin1String("publisher")) {
201 result->add(Property::Copyright, xml.readElementText());
202 }
203 } else if (inPublishInfo) {
204 if (xml.name() == QLatin1String("publisher")) {
205 result->add(Property::Publisher, xml.readElementText());
206 } else if (xml.name() == QLatin1String("year")) {
207 bool ok;
208 const int releaseYear = xml.readElementText().toInt(&ok);
209 if (ok) {
210 result->add(Property::ReleaseYear, releaseYear);
211 }
212 }
213 }
214 } else if (inBody && result->inputFlags() & ExtractionResult::ExtractPlainText && xml.isCharacters() && !xml.isWhitespace()) {
215 result->append(xml.text().toString());
216 }
217 }
218}
219
220#include "moc_fb2extractor.cpp"
The ExtractionResult class is where all the data extracted by the indexer is saved.
QString inputUrl() const
The input URL which the plugins will use to locate the file.
virtual void addType(Type::Type type)=0
This function is called by the plugins.
QString inputMimetype() const
The input MIME type.
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
The ExtractorPlugin is the base class for all file metadata extractors.
static QDateTime dateTimeFromString(const QString &dateString)
Tries to extract a valid date time from the string provided.
@ Title
Refers to the Title of the content of the file.
Definition properties.h:121
@ Author
The Author field indicated the primary creator of a document.
Definition properties.h:114
@ Genre
The Genre of an Audio file.
Definition properties.h:52
@ Description
Represents the description stored in the file.
Definition properties.h:351
@ Generator
Refers to the Application used to create this file.
Definition properties.h:134
@ CreationDate
The date the content of the file was created.
Definition properties.h:177
@ Publisher
The publisher of the content.
Definition properties.h:169
@ Language
The language the document is written in.
Definition properties.h:159
@ Copyright
The copyright of the file.
Definition properties.h:164
@ ReleaseYear
Indicates the year a track was released.
Definition properties.h:71
@ Document
Any file which counts as a document.
Definition types.h:63
The KFileMetaData namespace.
QDateTime fromString(QStringView string, QStringView format, QCalendar cal)
bool isValid() const const
bool isEmpty() const const
qsizetype removeAll(const AT &t)
void clear()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString first(qsizetype n) const const
bool isEmpty() const const
QString join(QChar separator) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Sat Dec 21 2024 16:59:41 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.