KItinerary

pkpassdocumentprocessor.cpp
1/*
2 SPDX-FileCopyrightText: 2019-2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "pkpassdocumentprocessor.h"
8
9#include <KItinerary/DocumentUtil>
10#include <KItinerary/Event>
11#include <KItinerary/ExtractorDocumentNodeFactory>
12#include <KItinerary/ExtractorEngine>
13#include <KItinerary/ExtractorFilter>
14#include <KItinerary/ExtractorResult>
15#include <KItinerary/Flight>
16#include <KItinerary/JsonLdDocument>
17#include <KItinerary/Reservation>
18#include <KItinerary/Ticket>
19#include <KItinerary/TrainTrip>
20
21#include "reservationconverter.h"
22#include "knowledgedb/airportdb.h"
23#include "text/nameoptimizer_p.h"
24#include "text/pricefinder_p.h"
25#include "text/timefinder_p.h"
26
27#include <KPkPass/Barcode>
28#include <KPkPass/BoardingPass>
29#include <KPkPass/Location>
30#include <KPkPass/Field>
31#include <KPkPass/Pass>
32
33#include <QJsonObject>
34#include <QJSEngine>
35#include <QTime>
36#include <QTimeZone>
37#include <QVariant>
38
39using namespace Qt::Literals::StringLiterals;
40using namespace KItinerary;
41
42Q_DECLARE_METATYPE(KItinerary::Internal::OwnedPtr<KPkPass::Pass>)
43
44bool PkPassDocumentProcessor::canHandleData(const QByteArray &encodedData, QStringView fileName) const
45{
46 return encodedData.startsWith("PK\x03\x04") ||
47 fileName.endsWith(QLatin1StringView(".pkpass"), Qt::CaseInsensitive);
48}
49
51{
52 auto pass = KPkPass::Pass::fromData(encodedData);
53 if (!pass) {
54 return {};
55 }
56
58 node.setContent<Internal::OwnedPtr<KPkPass::Pass>>(pass);
59 if (pass->relevantDate().isValid()) {
60 node.setContextDateTime(pass->relevantDate().addDays(-1)); // go a bit back, to compensate for unknown departure timezone at this point
61 }
62 return node;
63}
64
66{
67 auto pass = decodedData.value<KPkPass::Pass*>();
68 if (!pass) {
69 return {};
70 }
71
73 node.setContent(pass);
74 if (pass->relevantDate().isValid()) {
75 node.setContextDateTime(pass->relevantDate().addDays(-1)); // go a bit back, to compensate for unknown departure timezone at this point
76 }
77 return node;
78}
79
81{
82 const auto pass = node.content<KPkPass::Pass*>();
83 const auto barcodes = pass->barcodes();
84 if (barcodes.empty()) {
85 return;
86 }
87
88 for (const auto &barcode : barcodes) {
89 // try to recover binary barcode content (which wont survive a JSON/QString roundtrip)
90 const auto msg = barcode.message();
91 auto data = msg.toUtf8();
92 if (std::any_of(msg.begin(), msg.end(), [](QChar c) { return c.isNonCharacter() || c.isNull() || !c.isPrint(); })) {
93 if (barcode.messageEncoding().compare("iso-8859-1"_L1, Qt::CaseInsensitive) == 0) {
94 data = msg.toLatin1();
95 }
96 }
97 auto child = engine->documentNodeFactory()->createNode(data);
98 node.appendChild(child);
99 }
100}
101
103{
104 destroyIfOwned<KPkPass::Pass>(node);
105}
106
111
112static QList<KPkPass::Field> frontFieldsForPass(KPkPass::Pass *pass) {
114 fields += pass->headerFields();
115 fields += pass->primaryFields();
116 fields += pass->secondaryFields();
117 fields += pass->auxiliaryFields();
118 return fields;
119}
120
121static bool isAirportName(const QString &name, KnowledgeDb::IataCode iataCode)
122{
123 if (name.size() <= 3) {
124 return false;
125 }
126
127 const auto codes = KnowledgeDb::iataCodesFromName(name);
128 return std::find(codes.begin(), codes.end(), iataCode) != codes.end();
129}
130
131static bool isPlausibleGate(const QString &s)
132{
133 if (s.isEmpty() || s.size() > 10 || s.count(QLatin1Char('-')) > 1 || s.count(QLatin1Char(' ')) > 2) {
134 return false;
135 }
136 for (const auto &c : s) {
137 if (!c.isLetter() && !c.isDigit() && c != QLatin1Char(' ') && c != QLatin1Char(' ')) {
138 return false;
139 }
140 }
141 return true;
142}
143
144static Flight extractBoardingPass(KPkPass::Pass *pass, Flight flight)
145{
146 // search for missing information by field key
147 QString departureTerminal;
148 TimeFinder timeFinder;
149 const auto fields = pass->fields();
150 for (const auto &field : fields) {
151 // boarding time
152 if (!flight.boardingTime().isValid() && field.key().contains("boarding"_L1, Qt::CaseInsensitive)) {
153 const auto time = timeFinder.findSingularTime(field.value().toString());
154 if (time.isValid()) {
155 // this misses date, but the postprocessor will fill that in
156 flight.setBoardingTime(QDateTime(QDate(1, 1, 1), time));
157 continue;
158 }
159 }
160 // departure gate
161 if (flight.departureGate().isEmpty() && field.key().contains("gate"_L1, Qt::CaseInsensitive)) {
162 const auto gateStr = field.value().toString();
163 if (isPlausibleGate(gateStr)) {
164 flight.setDepartureGate(gateStr);
165 continue;
166 }
167 }
168 // departure time
169 if (!flight.departureTime().isValid() && field.key().contains("departure"_L1, Qt::CaseInsensitive)) {
170 const auto time = timeFinder.findSingularTime(field.value().toString());
171 if (time.isValid()) {
172 // this misses date, but the postprocessor will fill that in
173 flight.setDepartureTime(QDateTime(QDate(1, 1, 1), time));
174 continue;
175 }
176 }
177
178 if (field.key().contains("terminal"_L1, Qt::CaseInsensitive)) {
179 if (departureTerminal.isNull()) {
180 departureTerminal = field.value().toString();
181 } else {
182 departureTerminal = u""_s; // empty but not null, marking multiple terminal candidates
183 }
184 }
185 }
186
187 if (flight.departureTerminal().isEmpty() && !departureTerminal.isEmpty()) {
188 flight.setDepartureTerminal(departureTerminal);
189 }
190
191 // "relevantDate" is the best guess for the boarding time if we didn't find an explicit field for it
192 if (pass->relevantDate().isValid() && !flight.boardingTime().isValid()) {
193 const auto tz = KnowledgeDb::timezoneForAirport(KnowledgeDb::IataCode{flight.departureAirport().iataCode()});
194 if (tz.isValid()) {
195 flight.setBoardingTime(pass->relevantDate().toTimeZone(tz));
196 } else {
197 flight.setBoardingTime(pass->relevantDate());
198 }
199 }
200
201 // search for missing information in field content
202 const auto depIata = KnowledgeDb::IataCode(flight.departureAirport().iataCode());
203 const auto arrIata = KnowledgeDb::IataCode(flight.arrivalAirport().iataCode());
204 const auto frontFields = frontFieldsForPass(pass);
205 for (const auto &field : frontFields) {
206 // full airport names
207 if (flight.departureAirport().name().isEmpty()) {
208 if (isAirportName(field.value().toString(), depIata)) {
209 auto airport = flight.departureAirport();
210 airport.setName(field.value().toString());
211 flight.setDepartureAirport(airport);
212 } else if (isAirportName(field.label(), depIata)) {
213 auto airport = flight.departureAirport();
214 airport.setName(field.label());
215 flight.setDepartureAirport(airport);
216 }
217 }
218 if (flight.arrivalAirport().name().isEmpty()) {
219 if (isAirportName(field.value().toString(), arrIata)) {
220 auto airport = flight.arrivalAirport();
221 airport.setName(field.value().toString());
222 flight.setArrivalAirport(airport);
223 } else if (isAirportName(field.label(), arrIata)) {
224 auto airport = flight.arrivalAirport();
225 airport.setName(field.label());
226 flight.setArrivalAirport(airport);
227 }
228 }
229 }
230
231 // location is the best guess for the departure airport geo coordinates
232 auto depAirport = flight.departureAirport();
233 auto depGeo = depAirport.geo();
234 if (pass->locations().size() == 1 && !depGeo.isValid()) {
235 const auto loc = pass->locations().at(0);
236 depGeo.setLatitude(loc.latitude());
237 depGeo.setLongitude(loc.longitude());
238 depAirport.setGeo(depGeo);
239 flight.setDepartureAirport(depAirport);
240 }
241
242 // organizationName is the best guess for airline name
243 auto airline = flight.airline();
244 if (airline.name().isEmpty()) {
245 airline.setName(pass->organizationName());
246 flight.setAirline(airline);
247 }
248
249 return flight;
250}
251
252[[nodiscard]] static TrainReservation extractTrainTicket(KPkPass::Pass *pass, TrainReservation res)
253{
254 auto trip = res.reservationFor().value<TrainTrip>();
255 auto ticket = res.reservedTicket().value<Ticket>();
256
257 TimeFinder timeFinder;
258 const auto fields = pass->fields();
259 for (const auto &field : fields) {
260 // departure platform
261 if (trip.departurePlatform().isEmpty() && field.key().contains("track"_L1, Qt::CaseInsensitive)) {
262 const auto platformStr = field.value().toString();
263 if (isPlausibleGate(platformStr)) {
264 trip.setDeparturePlatform(platformStr);
265 continue;
266 }
267 }
268 // departure time
269 if (!trip.departureTime().isValid() && field.key().contains("departure"_L1, Qt::CaseInsensitive)) {
270 const auto time = timeFinder.findSingularTime(field.value().toString());
271 if (time.isValid()) {
272 // this misses date, but the postprocessor will fill that in
273 trip.setDepartureTime(QDateTime(QDate(1, 1, 1), time));
274 continue;
275 }
276 }
277 // coach/seat
278 if (ticket.ticketedSeat().seatSection().isEmpty() && field.key().contains("coach"_L1, Qt::CaseInsensitive)) {
279 auto seat = ticket.ticketedSeat();
280 seat.setSeatSection(field.value().toString());
281 ticket.setTicketedSeat(seat);
282 }
283 if (ticket.ticketedSeat().seatNumber().isEmpty() && field.key().contains("seat"_L1, Qt::CaseInsensitive)) {
284 auto seat = ticket.ticketedSeat();
285 seat.setSeatNumber(field.value().toString());
286 ticket.setTicketedSeat(seat);
287 }
288 }
289
290 // "relevantDate" is the best guess for the departure time if we didn't find an explicit field for it
291 if (pass->relevantDate().isValid() && !trip.departureTime().isValid()) {
292 // TODO try to recover timezone?
293 trip.setDepartureTime(pass->relevantDate());
294 }
295
296 // location is the best guess for the departure station geo coordinates
297 auto depStation = trip.departureStation();
298 auto depGeo = depStation.geo();
299 if (pass->locations().size() == 1 && !depGeo.isValid()) {
300 const auto loc = pass->locations().at(0);
301 depGeo.setLatitude((float)loc.latitude());
302 depGeo.setLongitude((float)loc.longitude());
303 depStation.setGeo(depGeo);
304 trip.setDepartureStation(depStation);
305 }
306
307 // organizationName is the best guess for airline name
308 auto provider = trip.provider();
309 if (provider.name().isEmpty()) {
310 provider.setName(pass->organizationName());
311 trip.setProvider(provider);
312 }
313
314 res.setReservationFor(trip);
315 res.setReservedTicket(ticket);
316 return res;
317}
318
319static void extractEventTicketPass(KPkPass::Pass *pass, EventReservation &eventRes)
320{
321 auto event = eventRes.reservationFor().value<Event>();
322
323 if (event.name().isEmpty()) {
324 event.setName(pass->description());
325 }
326
327 // "relevantDate" is the best guess for the start time
328 if (pass->relevantDate().isValid() && !event.startDate().isValid()) {
329 event.setStartDate(pass->relevantDate());
330
331 // "expirationDate" is the best guess for the end time
332 if (pass->expirationDate().isValid() && pass->relevantDate().date() == pass->expirationDate().date() &&
333 pass->expirationDate() > pass->relevantDate() && !event.endDate().isValid()) {
334 event.setEndDate(pass->expirationDate());
335 }
336 }
337
338 // location is the best guess for the venue
339 auto venue = event.location().value<Place>();
340 auto geo = venue.geo();
341 if (!pass->locations().isEmpty() && !geo.isValid()) {
342 const auto loc = pass->locations().at(0);
343 geo.setLatitude(loc.latitude());
344 geo.setLongitude(loc.longitude());
345 venue.setGeo(geo);
346 if (venue.name().isEmpty()) {
347 venue.setName(loc.relevantText());
348 }
349 event.setLocation(venue);
350 }
351
352 // search for prices
353 PriceFinder priceFinder;
354 std::vector<PriceFinder::Result> prices;
355 const auto fields = pass->fields();
356 for (const auto &field : fields) {
357 priceFinder.findAll(field.value().toString(), prices);
358 }
359 if (const auto price = priceFinder.highest(prices); price.hasResult()) {
360 eventRes.setTotalPrice(price.value);
361 eventRes.setPriceCurrency(price.currency);
362 }
363
364 eventRes.setReservationFor(event);
365}
366
367static Person extractPerson(const KPkPass::Pass *pass, Person person)
368{
369 const auto fields = pass->fields();
370 for (const auto &field : fields) {
371 person = NameOptimizer::optimizeName(field.valueDisplayString(), person);
372 }
373 return person;
374}
375
376void PkPassDocumentProcessor::preExtract(ExtractorDocumentNode &node, [[maybe_unused]] const ExtractorEngine *engine) const
377{
378 const auto pass = node.content<KPkPass::Pass*>();
379 QJsonObject result;
380 if (auto boardingPass = qobject_cast<KPkPass::BoardingPass*>(pass)) {
381 switch (boardingPass->transitType()) {
382 case KPkPass::BoardingPass::Air:
383 result.insert("@type"_L1, "FlightReservation"_L1);
384 break;
385 case KPkPass::BoardingPass::Train:
386 result.insert("@type"_L1, "TrainReservation"_L1);
387 break;
388 case KPkPass::BoardingPass::Bus:
389 result.insert("@type"_L1, "BusReservation"_L1);
390 break;
391 case KPkPass::BoardingPass::Boat:
392 result.insert("@type"_L1, "BoatReservation"_L1);
393 break;
394 case KPkPass::BoardingPass::Generic:
395 return;
396 }
397 } else {
398 switch (pass->type()) {
399 case KPkPass::Pass::BoardingPass:
400 Q_UNREACHABLE(); // handled above
401 case KPkPass::Pass::EventTicket:
402 result.insert("@type"_L1, "EventReservation"_L1);
403 break;
404 case KPkPass::Pass::Coupon:
405 case KPkPass::Pass::StoreCard:
406 case KPkPass::Pass::Generic:
407 return;
408 }
409 }
410
411 // barcode contains the ticket token
412 if (!pass->barcodes().isEmpty()) {
413 const auto barcode = pass->barcodes().at(0);
414 QString token;
415 switch (barcode.format()) {
416 case KPkPass::Barcode::Invalid:
417 break;
418 case KPkPass::Barcode::QR:
419 token = "qrCode:"_L1;
420 break;
421 case KPkPass::Barcode::Aztec:
422 token = "aztecCode:"_L1;
423 break;
424 case KPkPass::Barcode::PDF417:
425 token = "pdf417:"_L1;
426 break;
427 case KPkPass::Barcode::Code128:
428 token = "barcode128:"_L1;
429 }
430 token += barcode.message();
431 QJsonObject ticket = result.value("reservedTicket"_L1).toObject();
432 ticket.insert("@type"_L1, "Ticket"_L1);
433 ticket.insert("ticketToken"_L1, token);
434 result.insert("reservedTicket"_L1, ticket);
435 }
436
437 // explicitly merge with the decoded barcode data, as this would otherwise not match
438 auto res = JsonLdDocument::fromJsonSingular(result);
440 // if this doesn't contain a single IATA BCBP we wont be able to get sufficient information out of this
441 if (node.childNodes().size() != 1 || node.childNodes()[0].result().size() != 1) {
442 return;
443 }
444 res = JsonLdDocument::apply(node.childNodes()[0].result().result().at(0), res).value<FlightReservation>();
445 }
447 // if this is a IATA BCBP the child node extractor will have classified this as a flight
448 if (!node.childNodes().empty() && node.childNodes()[0].result().size() > 1) {
449 return;
450 }
451 if (!node.result().isEmpty()) {
452 if (JsonLd::isA<FlightReservation>(node.childNodes()[0].result().result().at(0))) {
453 const auto flightRes = node.childNodes()[0].result().jsonLdResult().at(0).toObject();
455 } else {
456 res = JsonLdDocument::apply(node.childNodes()[0].result().result().at(0), res).value<TrainReservation>();
457 }
458 }
459 }
460
461 // extract structured data from a pkpass, if the extractor script hasn't done so already
462 switch (pass->type()) {
463 case KPkPass::Pass::BoardingPass:
464 {
465 if (auto boardingPass = qobject_cast<KPkPass::BoardingPass*>(pass)) {
466 switch (boardingPass->transitType()) {
467 case KPkPass::BoardingPass::Air:
468 {
469 auto flightRes = res.value<FlightReservation>();
470 flightRes.setReservationFor(extractBoardingPass(pass, flightRes.reservationFor().value<Flight>()));
471 flightRes.setUnderName(extractPerson(pass, flightRes.underName().value<Person>()));
472 res = flightRes;
473 break;
474 }
475 case KPkPass::BoardingPass::Train:
476 {
477 auto trainRes = res.value<TrainReservation>();
478 trainRes = extractTrainTicket(pass, trainRes);
479 trainRes.setUnderName(extractPerson(pass, trainRes.underName().value<Person>()));
480 res = trainRes;
481 break;
482 }
483 default:
484 if (!node.result().isEmpty()) { // don't overwrite better results from child nodes
485 return;
486 }
487 break;
488 }
489 }
490 break;
491 }
492 case KPkPass::Pass::EventTicket:
493 {
494 auto evRes = res.value<EventReservation>();
495 extractEventTicketPass(pass, evRes);
496 res = evRes;
497 break;
498 }
499 default:
500 break;
501 }
502 node.setResult(QList<QVariant>({res}));
503}
504
505void PkPassDocumentProcessor::postExtract(ExtractorDocumentNode &node, [[maybe_unused]] const ExtractorEngine *engine) const
506{
507 const auto pass = node.content<KPkPass::Pass*>();
508 if (pass->passTypeIdentifier().isEmpty() || pass->serialNumber().isEmpty()) {
509 return;
510 }
511
512 // associate the pass with the result, so we can find the pass again for display
513 auto result = node.result().result();
514 for (auto &res : result) {
515 DocumentUtil::addDocumentId(res, DocumentUtil::idForPkPass(pass->passTypeIdentifier(), pass->serialNumber()));
516 // TODO replace this eventually with the more generic and standard compliant way above
517 JsonLdDocument::writeProperty(res, "pkpassPassTypeIdentifier", pass->passTypeIdentifier());
518 JsonLdDocument::writeProperty(res, "pkpassSerialNumber", pass->serialNumber());
519 // pass->relevantDate() as modification time is inherently unreliable (it wont change most of the time)
520 // so if we have something from an enclosing document, that's probably better
521 if (node.parent().contextDateTime().isValid()) {
523 }
524 }
525 node.setResult(result);
526}
An event reservation.
An event.
Definition event.h:21
ExtractorDocumentNode createNode(const QByteArray &data, QStringView fileName={}, QStringView mimeType={}) const
Create a new document node from data.
A node in the extracted document object tree.
QJsonArray result
Result access for QJSEngine.
void setResult(ExtractorResult &&result)
Replace the existing results by result.
void appendChild(ExtractorDocumentNode &child)
Add another child node.
void setContextDateTime(const QDateTime &contextDateTime)
Set the context date/time.
QJSValue content
The decoded content of this node.
QVariantList childNodes
Child nodes, for QJSEngine access.
QDateTime contextDateTime
The best known context date/time at this point in the document tree.
void setContent(const QVariant &content)
Set decoded content.
KItinerary::ExtractorDocumentNode parent
The parent node, or a null node if this is the root node.
Semantic data extraction engine.
const ExtractorDocumentNodeFactory * documentNodeFactory() const
Factory for creating new document nodes.
A flight reservation.
Definition reservation.h:90
A flight.
Definition flight.h:25
static void writeProperty(QVariant &obj, const char *name, const QVariant &value)
Set property name on object obj to value value.
static QVariant apply(const QVariant &lhs, const QVariant &rhs)
Apply all properties of rhs on to lhs.
static QVariant fromJsonSingular(const QJsonObject &obj)
Convert a single JSON-LD object into an instantiated data type.
A person.
Definition person.h:20
Processor for Apple Wallet pass files.
void preExtract(ExtractorDocumentNode &node, const ExtractorEngine *engine) const override
Called before extractors are applied to node.
ExtractorDocumentNode createNodeFromContent(const QVariant &decodedData) const override
Create a document node from an already decoded data type.
QJSValue contentToScriptValue(const ExtractorDocumentNode &node, QJSEngine *engine) const override
Create a QJSValue for the node content.
ExtractorDocumentNode createNodeFromData(const QByteArray &encodedData) const override
Create a document node from raw data.
void postExtract(ExtractorDocumentNode &node, const ExtractorEngine *engine) const override
Called after extractors have been applied to node.
void destroyNode(ExtractorDocumentNode &node) const override
Destroys type-specific data in node.
void expandNode(ExtractorDocumentNode &node, const ExtractorEngine *engine) const override
Create child nodes for node, as far as that's necessary for this document type.
Base class for places.
Definition place.h:69
A booked ticket.
Definition ticket.h:41
A train reservation.
A train trip.
Definition traintrip.h:24
static Pass * fromData(const QByteArray &data, QObject *parent=nullptr)
AKONADI_CALENDAR_EXPORT KCalendarCore::Event::Ptr event(const Akonadi::Item &item)
bool addDocumentId(QVariant &obj, const QString &id)
Add a document id to the object obj.
QString idForPkPass(const QString &passTypeIdentifier, const QString &serialNumber)
Determine a document identifier for a Apple Wallet pass.
bool isA(const QVariant &value)
Returns true if value is of type T.
Definition datatypes.h:24
QTimeZone timezoneForAirport(IataCode iataCode)
Returns the timezone the airport with IATA code iataCode is in.
Definition airportdb.cpp:40
AlphaId< uint16_t, 3 > IataCode
IATA airport code.
Definition iatacode.h:17
std::vector< IataCode > iataCodesFromName(QStringView name)
Returns all possible IATA code candidates for the given airport name.
GeoCoordinates geo(const QVariant &location)
Returns the geo coordinates of a given location.
QJsonObject flightToTrain(const QJsonObject &flightRes)
Convert a flight reservation to a train reservation.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
bool isValid() const const
QString toString(QStringView format, QCalendar cal) const const
QJSValue toScriptValue(const T &value)
bool isEmpty() const const
iterator insert(QLatin1StringView key, const QJsonValue &value)
qsizetype count() const const
bool isEmpty() const const
bool isNull() const const
qsizetype size() const const
CaseInsensitive
T value() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 18 2024 12:09:59 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.