Baloo

basicindexingjob.cpp
1/*
2 This file is part of the KDE Baloo Project
3 SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <me@vhanda.in>
4
5 SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
6*/
7
8#include "basicindexingjob.h"
9#include "termgenerator.h"
10#include "idutils.h"
11
12#include <QStringList>
13#include <QFile>
14
15#include <KFileMetaData/Types>
16#include <KFileMetaData/UserMetaData>
17
18using namespace Baloo;
19
20BasicIndexingJob::BasicIndexingJob(const QString& filePath, const QString& mimetype,
21 IndexingLevel level)
22 : m_filePath(filePath)
23 , m_mimetype(mimetype)
24 , m_indexingLevel(level)
25{
26 if (m_filePath.endsWith(QLatin1Char('/'))) {
27 m_filePath.chop(1);
28 }
29}
30
31namespace {
32
33void indexXAttr(const QString& url, Document& doc)
34{
35 KFileMetaData::UserMetaData userMetaData(url);
36
38 auto attributes = userMetaData.queryAttributes(Attribute::Tags |
39 Attribute::Rating | Attribute::Comment);
40 if (attributes == Attribute::None) {
41 return;
42 }
43
44 TermGenerator tg(doc);
45
46 const QStringList tags = userMetaData.tags();
47 for (const QString& tag : tags) {
48 tg.indexXattrText(tag, QByteArray("TA"));
49 doc.addXattrTerm(QByteArray("TAG-") + tag.toUtf8());
50 }
51
52 int rating = userMetaData.rating();
53 if (rating) {
54 doc.addXattrTerm(QByteArray("R") + QByteArray::number(rating));
55 }
56
57 QString comment = userMetaData.userComment();
58 if (!comment.isEmpty()) {
59 tg.indexXattrText(comment, QByteArray("C"));
60 }
61}
62
63QVector<KFileMetaData::Type::Type> typesForMimeType(const QString& mimeType)
64{
65 using namespace KFileMetaData;
67 types.reserve(2);
68
69 // Basic types
70 if (mimeType.startsWith(QLatin1String("audio/"))) {
71 types << Type::Audio;
72 }
73 if (mimeType.startsWith(QLatin1String("video/"))) {
74 types << Type::Video;
75 }
76 if (mimeType.startsWith(QLatin1String("image/"))) {
77 types << Type::Image;
78 }
79 if (mimeType.startsWith(QLatin1String("text/"))) {
80 types << Type::Text;
81 }
82 if (mimeType.contains(QLatin1String("document"))) {
83 types << Type::Document;
84 }
85
86 if (mimeType.contains(QLatin1String("powerpoint"))) {
87 types << Type::Presentation;
88 types << Type::Document;
89 }
90 if (mimeType.contains(QLatin1String("excel"))) {
91 types << Type::Spreadsheet;
92 types << Type::Document;
93 }
94 // Compressed tar archives: "application/x-<compression>-compressed-tar"
95 if ((mimeType.startsWith(QLatin1String("application/x-"))) &&
96 (mimeType.endsWith(QLatin1String("-compressed-tar")))) {
97 types << Type::Archive;
98 }
99
100 static QMultiHash<QString, Type::Type> typeMapper {
101 {QStringLiteral("text/plain"), Type::Document},
102 // MS Office
103 {QStringLiteral("application/msword"), Type::Document},
104 {QStringLiteral("application/x-scribus"), Type::Document},
105 // The old pre-XML MS Office formats are already covered by the excel/powerpoint "contains" above:
106 // - application/vnd.ms-powerpoint
107 // - application/vnd.ms-excel
108 // "openxmlformats-officedocument" and "opendocument" contain "document", i.e. already have Type::Document
109 // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
110 // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
111 // - application/vnd.openxmlformats-officedocument.presentationml.presentation
112 // - application/vnd.oasis.opendocument.text
113 // - application/vnd.oasis.opendocument.spreadsheet
114 // - application/vnd.oasis.opendocument.presentation
115 // Office 2007
116 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.presentation"), Type::Presentation},
117 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.slideshow"), Type::Presentation},
118 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.template"), Type::Presentation},
119 {QStringLiteral("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), Type::Spreadsheet},
120 // Open Document Formats - https://en.wikipedia.org/wiki/OpenDocument_technical_specification
121 {QStringLiteral("application/vnd.oasis.opendocument.presentation"), Type::Presentation},
122 {QStringLiteral("application/vnd.oasis.opendocument.spreadsheet"), Type::Spreadsheet},
123 {QStringLiteral("application/pdf"), Type::Document},
124 {QStringLiteral("application/postscript"), Type::Document},
125 {QStringLiteral("application/x-dvi"), Type::Document},
126 {QStringLiteral("application/rtf"), Type::Document},
127 // EBooks
128 {QStringLiteral("application/epub+zip"), Type::Document},
129 {QStringLiteral("application/vnd.amazon.mobi8-ebook"), Type::Document},
130 {QStringLiteral("application/x-mobipocket-ebook"), Type::Document},
131 // Graphic EBooks
132 {QStringLiteral("application/vnd.comicbook-rar"), Type::Document},
133 {QStringLiteral("application/vnd.comicbook+zip"), Type::Document},
134 {QStringLiteral("application/x-cb7"), Type::Document},
135 {QStringLiteral("application/x-cbt"), Type::Document},
136 // Archives - https://en.wikipedia.org/wiki/List_of_archive_formats
137 {QStringLiteral("application/gzip"), Type::Archive},
138 {QStringLiteral("application/x-tar"), Type::Archive},
139 {QStringLiteral("application/x-tarz"), Type::Archive},
140 {QStringLiteral("application/x-arc"), Type::Archive},
141 {QStringLiteral("application/x-archive"), Type::Archive},
142 {QStringLiteral("application/x-bzip"), Type::Archive},
143 {QStringLiteral("application/x-cpio"), Type::Archive},
144 {QStringLiteral("application/x-lha"), Type::Archive},
145 {QStringLiteral("application/x-lhz"), Type::Archive},
146 {QStringLiteral("application/x-lrzip"), Type::Archive},
147 {QStringLiteral("application/x-lz4"), Type::Archive},
148 {QStringLiteral("application/x-lzip"), Type::Archive},
149 {QStringLiteral("application/x-lzma"), Type::Archive},
150 {QStringLiteral("application/x-lzop"), Type::Archive},
151 {QStringLiteral("application/x-7z-compressed"), Type::Archive},
152 {QStringLiteral("application/x-ace"), Type::Archive},
153 {QStringLiteral("application/x-astrotite-afa"), Type::Archive},
154 {QStringLiteral("application/x-alz"), Type::Archive},
155 {QStringLiteral("application/vnd.android.package-archive"), Type::Archive},
156 {QStringLiteral("application/x-arj"), Type::Archive},
157 {QStringLiteral("application/vnd.ms-cab-compressed"), Type::Archive},
158 {QStringLiteral("application/x-cfs-compressed"), Type::Archive},
159 {QStringLiteral("application/x-dar"), Type::Archive},
160 {QStringLiteral("application/x-lzh"), Type::Archive},
161 {QStringLiteral("application/x-lzx"), Type::Archive},
162 {QStringLiteral("application/vnd.rar"), Type::Archive},
163 {QStringLiteral("application/x-stuffit"), Type::Archive},
164 {QStringLiteral("application/x-stuffitx"), Type::Archive},
165 {QStringLiteral("application/x-tzo"), Type::Archive},
166 {QStringLiteral("application/x-ustar"), Type::Archive},
167 {QStringLiteral("application/x-xar"), Type::Archive},
168 {QStringLiteral("application/x-xz"), Type::Archive},
169 {QStringLiteral("application/x-zoo"), Type::Archive},
170 {QStringLiteral("application/zip"), Type::Archive},
171 {QStringLiteral("application/zlib"), Type::Archive},
172 {QStringLiteral("application/zstd"), Type::Archive},
173 // WPS office
174 {QStringLiteral("application/wps-office.doc"), Type::Document},
175 {QStringLiteral("application/wps-office.xls"), Type::Document},
176 {QStringLiteral("application/wps-office.xls"), Type::Spreadsheet},
177 {QStringLiteral("application/wps-office.pot"), Type::Document},
178 {QStringLiteral("application/wps-office.pot"), Type::Presentation},
179 {QStringLiteral("application/wps-office.wps"), Type::Document},
180 {QStringLiteral("application/wps-office.docx"), Type::Document},
181 {QStringLiteral("application/wps-office.xlsx"), Type::Document},
182 {QStringLiteral("application/wps-office.xlsx"), Type::Spreadsheet},
183 {QStringLiteral("application/wps-office.pptx"), Type::Document},
184 {QStringLiteral("application/wps-office.pptx"), Type::Presentation},
185 // Other
186 {QStringLiteral("text/markdown"), Type::Document},
187 {QStringLiteral("image/vnd.djvu+multipage"), Type::Document},
188 {QStringLiteral("application/x-lyx"), Type::Document}
189 };
190
191 auto hashIt = typeMapper.find(mimeType);
192 while (hashIt != typeMapper.end() && hashIt.key() == mimeType) {
193 types.append(hashIt.value());
194 ++hashIt;
195 }
196
197 return types;
198}
199} // namespace
200
201BasicIndexingJob::~BasicIndexingJob()
202{
203}
204
205bool BasicIndexingJob::index()
206{
207 const QByteArray url = QFile::encodeName(m_filePath);
208 auto lastSlash = url.lastIndexOf('/');
209
210 const QByteArray fileName = url.mid(lastSlash + 1);
211 const QByteArray filePath = url.left(lastSlash);
212
213 QT_STATBUF statBuf;
214 if (filePathToStat(filePath, statBuf) != 0) {
215 return false;
216 }
217
218 Document doc;
219 doc.setParentId(statBufToId(statBuf));
220
221 if (filePathToStat(url, statBuf) != 0) {
222 return false;
223 }
224 doc.setId(statBufToId(statBuf));
225 doc.setUrl(url);
226
227 TermGenerator tg(doc);
228 tg.indexFileNameText(QFile::decodeName(fileName));
229 if (statBuf.st_size == 0) {
230 tg.indexText(QStringLiteral("application/x-zerosize"), QByteArray("M"));
231 } else {
232 tg.indexText(m_mimetype, QByteArray("M"));
233 }
234
235 // (Content) Modification time, Metadata (e.g. XAttr) change time
236 doc.setMTime(statBuf.st_mtime);
237 doc.setCTime(statBuf.st_ctime);
238
239 if (S_ISDIR(statBuf.st_mode)) {
240 static const QByteArray type = QByteArray("T") + QByteArray::number(static_cast<int>(KFileMetaData::Type::Folder));
241 doc.addTerm(type);
242 // For folders we do not need to go through file indexing, so we do not set contentIndexing
243
244 } else if (statBuf.st_size > 0) {
245 if (m_indexingLevel == MarkForContentIndexing) {
246 doc.setContentIndexing(true);
247 }
248 // Types
249 const QVector<KFileMetaData::Type::Type> tList = typesForMimeType(m_mimetype);
250 for (KFileMetaData::Type::Type type : tList) {
251 QByteArray num = QByteArray::number(static_cast<int>(type));
252 doc.addTerm(QByteArray("T") + num);
253 }
254 }
255
256 indexXAttr(m_filePath, doc);
257
258 m_doc = doc;
259 return true;
260}
Implements storage for docIds without any associated data Instantiated for:
Definition coding.cpp:11
quint64 statBufToId(const QT_STATBUF &stBuf)
Convert the QT_STATBUF into a 64 bit unique identifier for the file.
Definition idutils.h:37
KCALUTILS_EXPORT QString mimeType()
KIOCORE_EXPORT MimetypeJob * mimetype(const QUrl &url, JobFlags flags=DefaultFlags)
QStringView level(QStringView ifopt)
VehicleSection::Type type(QStringView coachNumber, QStringView coachClassification)
qsizetype lastIndexOf(QByteArrayView bv) const const
QByteArray left(qsizetype len) const const
QByteArray mid(qsizetype pos, qsizetype len) const const
QByteArray number(double n, char format, int precision)
QString decodeName(const QByteArray &localFileName)
QByteArray encodeName(const QString &fileName)
void append(QList< T > &&value)
void reserve(qsizetype size)
iterator find(const Key &key, const T &value)
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Nov 22 2024 12:03:07 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.