KItinerary

externalprocessor.cpp
1/*
2 SPDX-FileCopyrightText: 2017-2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include <config-kitinerary.h>
8
9#include "externalprocessor.h"
10#include "logging.h"
11
12#include <KItinerary/AbstractExtractor>
13#include <KItinerary/ExtractorEngine>
14#include <KItinerary/ExtractorRepository>
15#include <KItinerary/ExtractorResult>
16#include <KItinerary/PdfDocument>
17
18#include <QFileInfo>
19#include <QJsonArray>
20#include <QJsonDocument>
21#include <QProcess>
22
23using namespace KItinerary;
24
25ExternalProcessor::ExternalProcessor()
26{
27 // find external extractor
28 const QString filepath =
29 QLatin1StringView(CMAKE_INSTALL_FULL_LIBEXECDIR_KF6) +
30 QLatin1StringView("/kitinerary-extractor");
31 QFileInfo fi(filepath);
32 if (!fi.exists() && !fi.isFile() && !fi.isExecutable()) {
33 qCCritical(Log) << "filePath : " << filepath << "Cannot find external extractor:" << fi.fileName();
34 return;
35 }
36 m_externalExtractor = fi.canonicalFilePath();
37}
38
39ExternalProcessor::~ExternalProcessor() = default;
40
41bool ExternalProcessor::canHandleData(const QByteArray &encodedData, QStringView fileName) const
42{
43 return PdfDocument::maybePdf(encodedData) ||
45}
46
48{
50 node.setContent(encodedData);
51 node.setMimeType(QStringLiteral("application/pdf"));
52 return node;
53}
54
56{
57 std::vector<const AbstractExtractor*> extractors;
58 engine->extractorRepository()->extractorsForNode(node, extractors);
59 // consider the implicit conversion to text/plain the PDF processor can do
60 if (node.mimeType() == QLatin1StringView("application/pdf")) {
61 node.setMimeType(QStringLiteral("text/plain"));
62 engine->extractorRepository()->extractorsForNode(node, extractors);
63 node.setMimeType(QStringLiteral("application/pdf"));
64 }
65
66 QStringList extNames;
67 extNames.reserve(extractors.size());
68 std::transform(extractors.begin(), extractors.end(), std::back_inserter(extNames), [](auto ext) { return ext->name(); });
69
70 QProcess proc;
71 proc.setProgram(m_externalExtractor);
72
73 QStringList args({QLatin1StringView("--context-date"),
75 QLatin1StringView("--extractors"),
76 extNames.join(QLatin1Char(';')),
77 QLatin1StringView("--no-validation")});
78 const auto extraPaths = engine->extractorRepository()->additionalSearchPaths();
79 for (const auto &p : extraPaths) {
80 args.push_back(QStringLiteral("--additional-search-path"));
81 args.push_back(p);
82 }
83
84 proc.setArguments(args);
87 if (!proc.waitForStarted(1000)) {
88 qCWarning(Log) << "could not start external extractor" << m_externalExtractor << proc.errorString();
89 return;
90 }
91 proc.write(node.content<QByteArray>());
92 proc.closeWriteChannel();
93 if (!proc.waitForFinished(15000)) {
94 qCWarning(Log) << "external extractor did not exit cleanly" << m_externalExtractor << proc.errorString();
95 return;
96 }
97
98 const auto res = QJsonDocument::fromJson(proc.readAllStandardOutput()).array();
99 node.addResult(res);
100}
ExtractorDocumentNode createNodeFromData(const QByteArray &encodedData) const override
Create a document node from raw data.
void preExtract(ExtractorDocumentNode &node, const ExtractorEngine *engine) const override
Called before extractors are applied to node.
bool canHandleData(const QByteArray &encodedData, QStringView fileName) const override
Fast check whether the given encoded data can possibly be processed by this instance.
A node in the extracted document object tree.
QString mimeType
The MIME type of this node.
QJSValue content
The decoded content of this node.
void addResult(ExtractorResult &&result)
Add additional results from an extraction step.
QDateTime contextDateTime
The best known context date/time at this point in the document tree.
void setContent(const QVariant &content)
Set decoded content.
Semantic data extraction engine.
static bool maybePdf(const QByteArray &data)
Fast check whether data might be a PDF document.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
QString toString(QStringView format, QCalendar cal) const const
QString errorString() const const
qint64 write(const QByteArray &data)
QJsonArray array() const const
QJsonDocument fromJson(const QByteArray &json, QJsonParseError *error)
void reserve(qsizetype size)
ForwardedErrorChannel
void closeWriteChannel()
QByteArray readAllStandardOutput()
void setArguments(const QStringList &arguments)
void setProcessChannelMode(ProcessChannelMode mode)
void setProgram(const QString &program)
void start(OpenMode mode)
bool waitForFinished(int msecs)
bool waitForStarted(int msecs)
QString join(QChar separator) const const
bool endsWith(QChar ch) const const
CaseInsensitive
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:50:01 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.