KItinerary
pdfdocumentprocessor.cpp
37bool PdfDocumentProcessor::canHandleData(const QByteArray &encodedData, QStringView fileName) const
67ExtractorDocumentNode PdfDocumentProcessor::createNodeFromData(const QByteArray &encodedData) const
83ExtractorDocumentNode PdfDocumentProcessor::createNodeFromContent(const QVariant &decodedData) const
98void PdfDocumentProcessor::expandNode(ExtractorDocumentNode &node, const ExtractorEngine *engine) const
108 img.setLoadingHints(PdfImage::AbortOnColorHint | PdfImage::ConvertToGrayscaleHint); // we only care about b/w-ish images for barcode detection
113 const auto barcodeHints = PdfBarcodeUtil::maybeBarcode(img, BarcodeDecoder::Any2D | BarcodeDecoder::Any1D);
125 node.appendChild(childNode); // TODO the old code de-duplicated repeated barcodes here - do we actually need that?
130 // technically not our job to do this here rather than letting the image node processor handle this
131 // but we have the output aspect ratio of the barcode only here, which gives better decoding hints
136 // if this failed, check if the image as a aspect-ratio distorting scale and try again with that
138 BarcodeDocumentProcessorHelper::expandNode(img.applyAspectRatioTransform(imgData), barcodeHints, childNode, engine);
149 if ((engine->hints() & ExtractorEngine::ExtractFullPageRasterImages) && imageCount == 1 && page.text().isEmpty()) {
176void PdfDocumentProcessor::postExtract(ExtractorDocumentNode &node, [[maybe_unused]] const ExtractorEngine *engine) const
200QJSValue PdfDocumentProcessor::contentToScriptValue(const ExtractorDocumentNode &node, QJSEngine *engine) const
ExtractorDocumentNode createNode(const QByteArray &data, QStringView fileName={}, QStringView mimeType={}) const
Create a new document node from data.
Definition extractordocumentnodefactory.cpp:159
A node in the extracted document object tree.
Definition extractordocumentnode.h:50
void setResult(ExtractorResult &&result)
Replace the existing results by result.
Definition extractordocumentnode.cpp:160
void appendChild(ExtractorDocumentNode &child)
Add another child node.
Definition extractordocumentnode.cpp:141
void setContextDateTime(const QDateTime &contextDateTime)
Set the context date/time.
Definition extractordocumentnode.cpp:173
void setContent(const QVariant &content)
Set decoded content.
Definition extractordocumentnode.cpp:120
void setLocation(const QVariant &location)
Set the location information.
Definition extractordocumentnode.cpp:186
@ ExtractFullPageRasterImages
perform expensive image processing on (PDF) documents containing full page raster images
Definition engine/extractorengine.h:164
const ExtractorDocumentNodeFactory * documentNodeFactory() const
Factory for creating new document nodes.
Definition engine/extractorengine.cpp:172
void postExtract(ExtractorDocumentNode &node, const ExtractorEngine *engine) const override
Called after extractors have been applied to node.
Definition pdfdocumentprocessor.cpp:176
bool canHandleData(const QByteArray &encodedData, QStringView fileName) const override
Fast check whether the given encoded data can possibly be processed by this instance.
Definition pdfdocumentprocessor.cpp:37
QJSValue contentToScriptValue(const ExtractorDocumentNode &node, QJSEngine *engine) const override
Create a QJSValue for the node content.
Definition pdfdocumentprocessor.cpp:200
void destroyNode(ExtractorDocumentNode &node) const override
Destroys type-specific data in node.
Definition pdfdocumentprocessor.cpp:205
void expandNode(ExtractorDocumentNode &node, const ExtractorEngine *engine) const override
Create child nodes for node, as far as that's necessary for this document type.
Definition pdfdocumentprocessor.cpp:98
ExtractorDocumentNode createNodeFromData(const QByteArray &encodedData) const override
Create a document node from raw data.
Definition pdfdocumentprocessor.cpp:67
ExtractorDocumentNode createNodeFromContent(const QVariant &decodedData) const override
Create a document node from an already decoded data type.
Definition pdfdocumentprocessor.cpp:83
static PdfDocument * fromData(const QByteArray &data, QObject *parent=nullptr)
Creates a PdfDocument from the given raw data.
Definition pdfdocument.cpp:363
static bool maybePdf(const QByteArray &data)
Fast check whether data might be a PDF document.
Definition pdfdocument.cpp:389
@ AbortOnColorHint
Abort loading when encountering a non black/white pixel, as a shortcut for barcode detection.
Definition pdfimage.h:102
@ ConvertToGrayscaleHint
Convert to QImage::Format_Grayscale8 during loading. More efficient than converting later if all you ...
Definition pdfimage.h:103
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
int year() const const
QDateTime currentDateTime()
QDate date() const const
QJSValue toScriptValue(const T &value)
bool isEmpty() const const
qsizetype size() const const
bool endsWith(QChar ch) const const
CaseInsensitive
T value() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:50:01 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:50:01 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006
KDE's Doxygen guidelines are available online.