KItinerary

extractorpostprocessor.cpp
1/*
2 SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "config-kitinerary.h"
8#include "extractorpostprocessor.h"
9#include "extractorpostprocessor_p.h"
10#include "extractorvalidator.h"
11#include "flightpostprocessor_p.h"
12#include "stringutil.h"
13
14#include "iata/iatabcbpparser.h"
15#include "jsonlddocument.h"
16#include "logging.h"
17#include "mergeutil.h"
18#include "sortutil.h"
19#include "text/addressparser_p.h"
20
21#include "knowledgedb/airportdb.h"
22#include "knowledgedb/timezonedb_p.h"
23#include "knowledgedb/trainstationdb.h"
24
25#include <KItinerary/Action>
26#include <KItinerary/BoatTrip>
27#include <KItinerary/BusTrip>
28#include <KItinerary/Event>
29#include <KItinerary/Flight>
30#include <KItinerary/Organization>
31#include <KItinerary/Person>
32#include <KItinerary/Place>
33#include <KItinerary/ProgramMembership>
34#include <KItinerary/RentalCar>
35#include <KItinerary/Reservation>
36#include <KItinerary/Taxi>
37#include <KItinerary/Ticket>
38#include <KItinerary/TrainTrip>
39#include <KItinerary/Visit>
40
41#include <KCountry>
42
43#include <QDebug>
44#include <QJsonArray>
45#include <QJsonDocument>
46#include <QTimeZone>
47#include <QUrl>
48
49#if HAVE_PHONENUMBER
50#include <phonenumbers/phonenumberutil.h>
51#endif
52
53#include <algorithm>
54#include <cstring>
55
56using namespace Qt::Literals::StringLiterals;
57using namespace KItinerary;
58
59ExtractorPostprocessor::ExtractorPostprocessor()
60 : d(new ExtractorPostprocessorPrivate)
61{
62}
63
64ExtractorPostprocessor::ExtractorPostprocessor(ExtractorPostprocessor &&) noexcept = default;
66
67void ExtractorPostprocessor::process(const QList<QVariant> &data) {
68 d->m_resultFinalized = false;
69 d->m_data.reserve(d->m_data.size() + data.size());
70 for (auto elem : data) {
71 // reservation types
73 elem = d->processFlightReservation(elem.value<FlightReservation>());
74 } else if (JsonLd::isA<TrainReservation>(elem)) {
75 elem = d->processTrainReservation(elem.value<TrainReservation>());
76 } else if (JsonLd::isA<LodgingReservation>(elem)) {
77 elem = d->processLodgingReservation(elem.value<LodgingReservation>());
79 elem = d->processFoodEstablishmentReservation(elem.value<FoodEstablishmentReservation>());
80 } else if (JsonLd::isA<TouristAttractionVisit>(elem)) {
81 elem = d->processTouristAttractionVisit(elem.value<TouristAttractionVisit>());
82 } else if (JsonLd::isA<BusReservation>(elem)) {
83 elem = d->processBusReservation(elem.value<BusReservation>());
84 } else if (JsonLd::isA<BoatReservation>(elem)) {
85 elem = d->processBoatReservation(elem.value<BoatReservation>());
86 } else if (JsonLd::isA<EventReservation>(elem)) {
87 elem = d->processEventReservation(elem.value<EventReservation>());
88 } else if (JsonLd::isA<RentalCarReservation>(elem)) {
89 elem = d->processRentalCarReservation(elem.value<RentalCarReservation>());
90 } else if (JsonLd::isA<TaxiReservation>(elem)) {
91 elem = d->processTaxiReservation(elem.value<TaxiReservation>());
92 }
93
94 // "reservationFor" types
95 else if (JsonLd::isA<LodgingBusiness>(elem)) {
96 elem = d->processPlace(elem.value<LodgingBusiness>());
97 } else if (JsonLd::isA<FoodEstablishment>(elem)) {
98 elem = d->processPlace(elem.value<FoodEstablishment>());
99 } else if (JsonLd::isA<Event>(elem)) {
100 elem = d->processEvent(elem.value<Event>());
101 } else if (JsonLd::isA<LocalBusiness>(elem)) {
102 elem = d->processPlace(elem.value<LocalBusiness>());
103 }
104
105 // non-reservation types
106 else if (JsonLd::isA<ProgramMembership>(elem)) {
107 elem = d->processProgramMembership(elem.value<ProgramMembership>());
108 } else if (JsonLd::isA<Ticket>(elem)) {
109 elem = d->processTicket(elem.value<Ticket>());
110 }
111
112 d->mergeOrAppend(elem);
113 }
114}
115
116[[nodiscard]] static QVariant mergeTicket(QVariant lhs, const QVariant &rhs)
117{
118 const auto rhsTicket = JsonLdDocument::readProperty(rhs, "reservedTicket");
119 const auto lhsTicket = JsonLdDocument::readProperty(lhs, "reservedTicket");
120 JsonLdDocument::writeProperty(lhs, "reservedTicket", MergeUtil::merge(lhsTicket, rhsTicket));
121 return lhs;
122}
123
125 if (!d->m_resultFinalized) {
126 // fold elements we have reservations for into those reservations
127 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
128 if (JsonLd::isA<Reservation>(*it)) {
129 ++it;
130 continue;
131 }
132
133 bool merged = false;
134 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
135 const auto resFor = JsonLdDocument::readProperty(*it2, "reservationFor");
136 if (MergeUtil::isSame(resFor, *it)) {
137 JsonLdDocument::writeProperty(*it2, "reservationFor", MergeUtil::merge(resFor, *it));
138 merged = true;
139 }
140 }
141
142 if (merged) {
143 it = d->m_data.erase(it);
144 } else {
145 ++it;
146 }
147 }
148
149 // search for "triangular" patterns, ie. a location change element that has a matching departure
150 // and matching arrival to two different other location change elements (A->C vs A->B + B->C).
151 // we remove those, as the fine-granular results are better
152 if (d->m_data.size() >= 3) {
153 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
154 auto depIt = it;
155 auto arrIt = it;
156 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
157 if (it == it2) {
158 continue;
159 }
160 if (MergeUtil::hasSameDeparture(*it, *it2)) {
161 depIt = it2;
162 }
163 if (MergeUtil::hasSameArrival(*it, *it2)) {
164 arrIt = it2;
165 }
166 }
167
168 if (depIt != it && arrIt != it && depIt != arrIt) {
169 (*depIt) = mergeTicket(*depIt, *it);
170 (*arrIt) = mergeTicket(*arrIt, *it);
171 it = d->m_data.erase(it);
172 } else {
173 ++it;
174 }
175 }
176 }
177
178 d->m_resultFinalized = true;
179 }
180
181 std::stable_sort(d->m_data.begin(), d->m_data.end(), SortUtil::isBefore);
182 return d->m_data;
183}
184
186{
187 d->m_contextDate = dt;
188}
189
190void ExtractorPostprocessor::setValidationEnabled([[maybe_unused]] bool validate)
191{
192}
193
194void ExtractorPostprocessorPrivate::mergeOrAppend(const QVariant &elem)
195{
196 const auto it = std::find_if(m_data.begin(), m_data.end(), [elem](const QVariant &other) {
197 return MergeUtil::isSame(elem, other);
198 });
199
200 if (it == m_data.end()) {
201 m_data.push_back(elem);
202 } else {
203 *it = MergeUtil::merge(*it, elem);
204 }
205}
206
207QVariant ExtractorPostprocessorPrivate::processFlightReservation(FlightReservation res) const
208{
209 // expand ticketToken for IATA BCBP data
210 const auto bcbp = res.reservedTicket().value<Ticket>().ticketTokenData().toString();
211 if (!bcbp.isEmpty()) {
212 const auto bcbpData = IataBcbpParser::parse(bcbp, m_contextDate);
213 if (bcbpData.size() == 1) {
214 res = JsonLdDocument::apply(bcbpData.at(0), res).value<FlightReservation>();
215 // standardize on the BCBP booking reference, not some secondary one we might have in structured data for example
216 res.setReservationNumber(bcbpData.at(0).value<FlightReservation>().reservationNumber());
217 } else {
218 for (const auto &data : bcbpData) {
219 if (MergeUtil::isSame(res, data)) {
221 break;
222 }
223 }
224 }
225 }
226
227 res.setBoardingGroup(StringUtil::simplifiedNoPlaceholder(res.boardingGroup()));
228 if (res.reservationFor().isValid()) {
229 FlightPostProcessor p;
230 res.setReservationFor(p.processFlight(res.reservationFor().value<Flight>()));
231 }
232 return processReservation(res);
233}
234
235TrainReservation ExtractorPostprocessorPrivate::processTrainReservation(TrainReservation res) const
236{
237 if (res.reservationFor().isValid()) {
238 res.setReservationFor(processTrainTrip(res.reservationFor().value<TrainTrip>()));
239 }
240 return processReservation(res);
241}
242
243TrainTrip ExtractorPostprocessorPrivate::processTrainTrip(TrainTrip trip) const
244{
245 trip.setArrivalPlatform(trip.arrivalPlatform().trimmed());
246 trip.setDeparturePlatform(trip.departurePlatform().trimmed());
247 trip.setDepartureStation(processStation(trip.departureStation()));
248 trip.setArrivalStation(processStation(trip.arrivalStation()));
249 trip.setDepartureTime(processTripTime(trip.departureTime(), trip.departureDay(), trip.departureStation()));
250 trip.setArrivalTime(processTripTime(trip.arrivalTime(), trip.departureDay(), trip.arrivalStation()));
251 trip.setTrainNumber(trip.trainNumber().simplified());
252 trip.setTrainName(trip.trainName().simplified());
253
254 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
255 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
256 if (duration < 0 && duration > -3600*24 && trip.departureTime().timeSpec() == trip.arrivalTime().timeSpec()) {
257 trip.setArrivalTime(trip.arrivalTime().addDays(1));
258 }
259
260 return trip;
261}
262
263template <typename T>
264static void applyStationData(const KnowledgeDb::TrainStation &record, T &station)
265{
266 if (!station.geo().isValid() && record.coordinate.isValid()) {
268 geo.setLatitude(record.coordinate.latitude);
269 geo.setLongitude(record.coordinate.longitude);
270 station.setGeo(geo);
271 }
272 auto addr = station.address();
273 if (addr.addressCountry().isEmpty() && record.country.isValid()) {
274 addr.setAddressCountry(record.country.toString());
275 station.setAddress(addr);
276 }
277}
278
279template <typename T>
280static void applyStationCountry(const QString &isoCode, T &station)
281{
282 auto addr = station.address();
283 if (addr.addressCountry().isEmpty()) {
284 addr.setAddressCountry(isoCode.toUpper());
285 station.setAddress(addr);
286 }
287}
288
289template<typename T>
290T ExtractorPostprocessorPrivate::processStation(T station)
291{
292 const auto id = station.identifier();
293 if (id.isEmpty()) { // empty -> null cleanup, to have more compact json-ld output
294 station.setIdentifier(QString());
295 } else if (id.startsWith("sncf:"_L1) && id.size() == 10) {
297 applyStationData(record, station);
298 applyStationCountry(id.mid(5, 2).toUpper(), station);
299 } else if (id.startsWith("ibnr:"_L1) && id.size() == 12) {
300 const auto record = KnowledgeDb::stationForIbnr(KnowledgeDb::IBNR{id.mid(5).toUInt()});
301 applyStationData(record, station);
302 const auto country = KnowledgeDb::countryIdForUicCode(QStringView(id).mid(5, 2).toUShort()) .toString();
303 applyStationCountry(country, station);
304 } else if (id.startsWith("uic:"_L1) && id.size() == 11) {
305 const auto record = KnowledgeDb::stationForUic( KnowledgeDb::UICStation{id.mid(4).toUInt()});
306 applyStationData(record, station);
307 const auto country = KnowledgeDb::countryIdForUicCode(QStringView(id).mid(4, 2).toUShort()) .toString();
308 applyStationCountry(country, station);
309 } else if (id.startsWith("ir:"_L1) && id.size() > 4) {
310 const auto record = KnowledgeDb::stationForIndianRailwaysStationCode(id.mid(3));
311 applyStationData(record, station);
312 } else if (id.startsWith("benerail:"_L1) && id.size() == 14) {
314 applyStationData(record, station);
315 applyStationCountry(id.mid(9, 2).toUpper(), station);
316 } else if (id.startsWith("vrfi:"_L1) && id.size() >= 7 && id.size() <= 9) {
318 applyStationData(record, station);
319 } else if (id.startsWith("iata:"_L1) && id.size() == 8) {
320 const auto iataCode = KnowledgeDb::IataCode(QStringView(id).mid(5));
321 const auto record = KnowledgeDb::stationForIataCode(iataCode);
322 applyStationData(record, station);
323 // fall back to the airport with the matching IATA code for the country
324 // information we cannot use the coordinate though, as that points to the
325 // actual airport, not the station
326 applyStationCountry(KnowledgeDb::countryForAirport(iataCode).toString(), station);
327 } else if (id.startsWith("amtrak:"_L1) && id.size() == 10) {
328 const auto record = KnowledgeDb::stationForAmtrakStationCode(KnowledgeDb::AmtrakStationCode(QStringView(id).mid(7)));
329 applyStationData(record, station);
330 } else if (id.startsWith("via:"_L1) && id.size() == 8) {
331 const auto record = KnowledgeDb::stationForViaRailStationCode(KnowledgeDb::ViaRailStationCode(QStringView(id).mid(4)));
332 applyStationData(record, station);
333 } else if (id.startsWith("uk:"_L1) && id.size() == 6) {
334 const auto record = KnowledgeDb::stationForUkRailwayStationCode(KnowledgeDb::UKRailwayStationCode(QStringView(id).mid(3)));
335 applyStationData(record, station);
336 }
337
338 return processPlace(station);
339}
340
341template <typename T>
342QDateTime ExtractorPostprocessorPrivate::processTripTime(QDateTime dt, QDate departureDay, const T& place) const
343{
344 if (!dt.isValid()) {
345 return dt;
346 }
347
348 if (dt.date().year() <= 1970 && departureDay.isValid()) { // we just have the time, but not the day
349 dt.setDate(departureDay);
350 }
351 return processTimeForLocation(dt, place);
352}
353
354BusReservation ExtractorPostprocessorPrivate::processBusReservation(BusReservation res) const
355{
356 if (res.reservationFor().isValid()) {
357 res.setReservationFor(processBusTrip(res.reservationFor().value<BusTrip>()));
358 }
359 return processReservation(res);
360}
361
362BusTrip ExtractorPostprocessorPrivate::processBusTrip(BusTrip trip) const
363{
364 trip.setDepartureBusStop(processStation(trip.departureBusStop()));
365 trip.setArrivalBusStop(processStation(trip.arrivalBusStop()));
366 trip.setDepartureTime(processTripTime(trip.departureTime(), trip.departureDay(), trip.departureBusStop()));
367 trip.setArrivalTime(processTripTime(trip.arrivalTime(), trip.departureDay(), trip.arrivalBusStop()));
368 trip.setBusNumber(trip.busNumber().simplified());
369 trip.setBusName(trip.busName().simplified());
370 return trip;
371}
372
373BoatReservation ExtractorPostprocessorPrivate::processBoatReservation(BoatReservation res) const
374{
375 if (res.reservationFor().isValid()) {
376 res.setReservationFor(processBoatTrip(res.reservationFor().value<BoatTrip>()));
377 }
378 return processReservation(res);
379}
380
381BoatTrip ExtractorPostprocessorPrivate::processBoatTrip(BoatTrip trip) const
382{
383 trip.setDepartureBoatTerminal(processPlace(trip.departureBoatTerminal()));
384 trip.setArrivalBoatTerminal(processPlace(trip.arrivalBoatTerminal()));
385 trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBoatTerminal()));
386 trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBoatTerminal()));
387
388 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
389 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
390 if (duration < 0 && duration > -3600*24) {
391 trip.setArrivalTime(trip.arrivalTime().addDays(1));
392 }
393
394 return trip;
395}
396
397LodgingReservation ExtractorPostprocessorPrivate::processLodgingReservation(LodgingReservation res) const
398{
399 if (res.reservationFor().isValid()) {
400 res.setReservationFor(processPlace(res.reservationFor().value<LodgingBusiness>()));
401 res.setCheckinTime(processTimeForLocation(res.checkinTime(), res.reservationFor().value<LodgingBusiness>()));
402 res.setCheckoutTime(processTimeForLocation(res.checkoutTime(), res.reservationFor().value<LodgingBusiness>()));
403 }
404 return processReservation(res);
405}
406
407TaxiReservation ExtractorPostprocessorPrivate::processTaxiReservation(TaxiReservation res) const
408{
409 res.setPickupLocation(processPlace(res.pickupLocation()));
410 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
411 return processReservation(res);
412}
413
414RentalCarReservation ExtractorPostprocessorPrivate::processRentalCarReservation(RentalCarReservation res) const
415{
416 if (res.reservationFor().isValid()) {
417 res.setReservationFor(processRentalCar(res.reservationFor().value<RentalCar>()));
418 }
419 res.setPickupLocation(processPlace(res.pickupLocation()));
420 res.setDropoffLocation(processPlace(res.dropoffLocation()));
421 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
422 res.setDropoffTime(processTimeForLocation(res.dropoffTime(), res.dropoffLocation()));
423 return processReservation(res);
424}
425
426RentalCar ExtractorPostprocessorPrivate::processRentalCar(RentalCar car) const
427{
428 car.setName(car.name().trimmed());
429 return car;
430}
431
432FoodEstablishmentReservation ExtractorPostprocessorPrivate::processFoodEstablishmentReservation(FoodEstablishmentReservation res) const
433{
434 if (res.reservationFor().isValid()) {
435 res.setReservationFor(processPlace(res.reservationFor().value<FoodEstablishment>()));
436 res.setStartTime(processTimeForLocation(res.startTime(), res.reservationFor().value<FoodEstablishment>()));
437 res.setEndTime(processTimeForLocation(res.endTime(), res.reservationFor().value<FoodEstablishment>()));
438 }
439 return processReservation(res);
440}
441
442TouristAttractionVisit ExtractorPostprocessorPrivate::processTouristAttractionVisit(TouristAttractionVisit visit) const
443{
444 visit.setTouristAttraction(processPlace(visit.touristAttraction()));
445 visit.setArrivalTime(processTimeForLocation(visit.arrivalTime(), visit.touristAttraction()));
446 visit.setDepartureTime(processTimeForLocation(visit.departureTime(), visit.touristAttraction()));
447 return visit;
448}
449
450EventReservation ExtractorPostprocessorPrivate::processEventReservation(EventReservation res) const
451{
452 if (res.reservationFor().isValid()) {
453 res.setReservationFor(processEvent(res.reservationFor().value<Event>()));
454 }
455 return processReservation(res);
456}
457
458KItinerary::Event ExtractorPostprocessorPrivate::processEvent(KItinerary::Event event) const
459{
460 event.setName(StringUtil::clean(event.name()));
461
462 // normalize location to be a Place
463 if (JsonLd::isA<PostalAddress>(event.location())) {
464 Place place;
465 place.setAddress(event.location().value<PostalAddress>());
466 event.setLocation(place);
467 }
468
469 if (JsonLd::isA<Place>(event.location())) {
470 event.setLocation(processPlace(event.location().value<Place>()));
471
472 // try to obtain timezones if we have a location
473 event.setStartDate(processTimeForLocation(event.startDate(), event.location().value<Place>()));
474 event.setEndDate(processTimeForLocation(event.endDate(), event.location().value<Place>()));
475 event.setDoorTime(processTimeForLocation(event.doorTime(), event.location().value<Place>()));
476 }
477
478 return event;
479}
480
481static QString processCurrency(const QString &currency)
482{
483 if (currency.size() != 3 || !std::all_of(currency.begin(), currency.end(), [](QChar c) { return c.isUpper(); })) {
484 return {};
485 }
486 return currency;
487}
488
489Ticket ExtractorPostprocessorPrivate::processTicket(Ticket ticket) const
490{
491 ticket.setName(StringUtil::clean(ticket.name()));
492 ticket.setTicketNumber(ticket.ticketNumber().simplified());
493 ticket.setUnderName(processPerson(ticket.underName()));
494 ticket.setTicketedSeat(processSeat(ticket.ticketedSeat()));
495 ticket.setPriceCurrency(processCurrency(ticket.priceCurrency()));
496 return ticket;
497}
498
499ProgramMembership ExtractorPostprocessorPrivate::processProgramMembership(ProgramMembership program) const
500{
501 // remove empty dummy entries found eg. in ERA FCB data
502 if (const auto name = program.programName(); std::none_of(name.begin(), name.end(), [](QChar c) { return c.isLetter(); })) {
503 program.setProgramName(QString());
504 }
505
506 program.setProgramName(program.programName().simplified());
507 // avoid emitting spurious empty ProgramMembership objects caused by empty elements in JSON-LD/Microdata input
508 if (program.programName().isEmpty() && !program.programName().isNull()) {
509 program.setProgramName(QString());
510 }
511 program.setMember(processPerson(program.member()));
512 return program;
513}
514
515Seat ExtractorPostprocessorPrivate::processSeat(Seat seat) const
516{
517 seat.setSeatSection(seat.seatSection().simplified());
518 seat.setSeatRow(seat.seatRow().simplified());
519 seat.setSeatNumber(seat.seatNumber().simplified());
520 seat.setSeatingType(seat.seatingType().simplified());
521 return seat;
522}
523
524template <typename T>
525T ExtractorPostprocessorPrivate::processReservation(T res) const
526{
527 res.setUnderName(processPerson(res.underName().template value<Person>()));
528 res.setPotentialAction(processActions(res.potentialAction()));
529 res.setReservationNumber(res.reservationNumber().trimmed());
530 res.setProgramMembershipUsed(processProgramMembership(res.programMembershipUsed()));
531 res.setPriceCurrency(processCurrency(res.priceCurrency()));
532
533 if (JsonLd::isA<Ticket>(res.reservedTicket())) {
534 res.setReservedTicket(processTicket(res.reservedTicket().template value<Ticket>()));
535 }
536 return res;
537}
538
539static constexpr const char* name_prefixes[] = {
540 "DR", "MR", "MRS", "MS"
541};
542
543static bool isSeparator(QChar c)
544{
545 return c == QLatin1Char(' ') || c == QLatin1Char('/');
546}
547
548static QString simplifyNamePart(QString n)
549{
550 n = n.simplified();
551
552 for (auto prefix : name_prefixes) {
553 const int prefixLen = std::strlen(prefix);
554 if (n.size() > prefixLen + 2 &&
555 n.startsWith(QLatin1StringView(prefix, prefixLen),
557 isSeparator(n[prefixLen])) {
558 return n.mid(prefixLen + 1);
559 }
560 if (n.size() > prefixLen + 2 &&
561 n.endsWith(QLatin1StringView(prefix, prefixLen),
563 isSeparator(n[n.size() - prefixLen - 1])) {
564 return n.left(n.size() - prefixLen - 1);
565 }
566 }
567
568 return n;
569}
570
571KItinerary::Person ExtractorPostprocessorPrivate::processPerson(KItinerary::Person person) const
572{
573 person.setName(simplifyNamePart(person.name()));
574 person.setFamilyName(simplifyNamePart(person.familyName()));
575 person.setGivenName(simplifyNamePart(person.givenName()));
576
577 // fill name with name parts, if it's empty
578 if ((person.name().isEmpty() || person.name() == person.familyName() || person.name() == person.givenName())
579 && !person.familyName().isEmpty() && !person.givenName().isEmpty())
580 {
581 person.setName(person.givenName() + QLatin1Char(' ') + person.familyName());
582 }
583
584 return person;
585}
586
587PostalAddress ExtractorPostprocessorPrivate::processAddress(PostalAddress addr, const QString &phoneNumber, const GeoCoordinates &geo)
588{
589 addr.setAddressCountry(addr.addressCountry().simplified());
590
591 // convert to ISO 3166-1 alpha-2 country codes
592 if (addr.addressCountry().size() > 2) {
593 QString alpha2Code;
594
595 // try ISO 3166-1 alpha-3, we get that e.g. from Flixbus
596 if (addr.addressCountry().size() == 3) {
597 alpha2Code = KCountry::fromAlpha3(addr.addressCountry()).alpha2();
598 }
599 if (alpha2Code.isEmpty()) {
600 alpha2Code = KCountry::fromName(addr.addressCountry()).alpha2();
601 }
602 if (!alpha2Code.isEmpty()) {
603 addr.setAddressCountry(alpha2Code);
604 }
605 }
606
607 // upper case country codes
608 if (addr.addressCountry().size() == 2) {
609 addr.setAddressCountry(addr.addressCountry().toUpper());
610 }
611
612 // normalize strings
613 addr.setStreetAddress(addr.streetAddress().simplified());
614 addr.setPostalCode(addr.postalCode().simplified());
615 addr.setAddressLocality(addr.addressLocality().simplified());
616 addr.setAddressRegion(addr.addressRegion().simplified());
617
618#if HAVE_PHONENUMBER
619 // recover country from phone number, if we have that
620 if (!phoneNumber.isEmpty() && addr.addressCountry().size() != 2) {
621 const auto phoneStr = phoneNumber.toStdString();
622 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
623 i18n::phonenumbers::PhoneNumber number;
624 if (util->ParseAndKeepRawInput(phoneStr, "ZZ", &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
625 std::string isoCode;
626 util->GetRegionCodeForNumber(number, &isoCode);
627 if (!isoCode.empty() && isoCode != "ZZ") {
628 addr.setAddressCountry(QString::fromStdString(isoCode));
629 }
630 }
631 }
632#endif
633
634 if (geo.isValid() && addr.addressCountry().size() != 2) {
635 const auto country = KCountry::fromLocation(geo.latitude(), geo.longitude());
636 if (country.isValid()) {
637 addr.setAddressCountry(country.alpha2());
638 }
639 }
640
641 AddressParser addrParser;
642 addrParser.setFallbackCountry(KCountry::fromQLocale(QLocale().territory()).alpha2());
643 addrParser.parse(addr);
644 addr = addrParser.result();
645 return addr;
646}
647
648QString ExtractorPostprocessorPrivate::processPhoneNumber(const QString &phoneNumber, const PostalAddress &addr)
649{
650#if HAVE_PHONENUMBER
651 // or complete the phone number if we know the country
652 if (!phoneNumber.isEmpty() && addr.addressCountry().size() == 2) {
653 auto phoneStr = phoneNumber.toStdString();
654 const auto isoCode = addr.addressCountry().toStdString();
655 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
656 i18n::phonenumbers::PhoneNumber number;
657 if (util->ParseAndKeepRawInput(phoneStr, isoCode, &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
658 if (number.country_code_source() == i18n::phonenumbers::PhoneNumber_CountryCodeSource_FROM_DEFAULT_COUNTRY) {
659 util->Format(number, i18n::phonenumbers::PhoneNumberUtil::INTERNATIONAL, &phoneStr);
660 return QString::fromStdString(phoneStr);
661 }
662 }
663 }
664#else
665 Q_UNUSED(addr)
666#endif
667 return phoneNumber.simplified();
668}
669
670QVariantList ExtractorPostprocessorPrivate::processActions(QVariantList actions) const
671{
672 // remove non-actions and actions with invalid URLs
673 QUrl viewUrl;
674 for (auto it = actions.begin(); it != actions.end();) {
675 if (!JsonLd::canConvert<Action>(*it)) {
676 it = actions.erase(it);
677 continue;
678 }
679
680 const auto action = JsonLd::convert<Action>(*it);
681 if (!action.target().isValid()) {
682 it = actions.erase(it);
683 continue;
684 }
685
686 if (JsonLd::isA<ViewAction>(*it)) {
687 viewUrl = action.target();
688 }
689 ++it;
690 }
691
692 // normalize the order, so JSON comparison still yields correct results
693 std::sort(actions.begin(), actions.end(), [](const QVariant &lhs, const QVariant &rhs) {
694 return strcmp(lhs.typeName(), rhs.typeName()) < 0;
695 });
696
697 // remove actions that don't actually have their own target, or duplicates
698 QUrl prevUrl;
699 const char* prevType = nullptr;
700 for (auto it = actions.begin(); it != actions.end();) {
701 const auto action = JsonLd::convert<Action>(*it);
702 const auto isDuplicate = action.target() == prevUrl && (prevType ? strcmp(prevType, (*it).typeName()) == 0 : false);
703 if ((JsonLd::isA<ViewAction>(*it) || action.target() != viewUrl) && !isDuplicate) {
704 prevUrl = action.target();
705 prevType = (*it).typeName();
706 ++it;
707 } else {
708 it = actions.erase(it);
709 }
710 }
711
712 return actions;
713}
714
715template <typename T>
716QDateTime ExtractorPostprocessorPrivate::processTimeForLocation(QDateTime dt, const T &place) const
717{
718 if (!dt.isValid() ) {
719 return dt;
720 }
721 if ((dt.timeSpec() == Qt::TimeZone && dt.timeZone() != QTimeZone::utc())) {
722 if (KnowledgeDb::isPlausibleTimeZone(dt.timeZone(), place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion())) {
723 return dt;
724 }
725 // drop timezones where we are sure they don't match the location
727 }
728
729 const auto tz = KnowledgeDb::timezoneForLocation(place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion());
730 if (!tz.isValid()) {
731 return dt;
732 }
733
734 // prefer our timezone over externally provided UTC offset, if they match
735 if (dt.timeSpec() == Qt::OffsetFromUTC && tz.offsetFromUtc(dt) != dt.offsetFromUtc()) {
736 qCDebug(Log) << "UTC offset clashes with expected timezone!" << dt << dt.offsetFromUtc() << tz.id() << tz.offsetFromUtc(dt);
737 return dt;
738 }
739
740 if (dt.timeSpec() == Qt::OffsetFromUTC || dt.timeSpec() == Qt::LocalTime) {
741 dt.setTimeZone(tz);
742 } else if (dt.timeSpec() == Qt::UTC || (dt.timeSpec() == Qt::TimeZone && dt.timeZone() == QTimeZone::utc())) {
743 dt = dt.toTimeZone(tz);
744 }
745 return dt;
746}
static KCountry fromLocation(float latitude, float longitude)
static KCountry fromName(QStringView name)
static KCountry fromQLocale(QLocale::Country country)
QString alpha2() const
static KCountry fromAlpha3(const char *alpha3Code)
A boat or ferry reservation.
A boat or ferry trip.
Definition boattrip.h:23
A bus reservation.
A bus trip.
Definition bustrip.h:22
QDate departureDay
The scheduled day of departure.
Definition bustrip.h:40
An event reservation.
An event.
Definition event.h:21
Post-process extracted data to filter out garbage and augment data from other sources.
void setContextDate(const QDateTime &dt)
The date the reservation(s) processed here have been made, if known.
QList< QVariant > result() const
This returns the final result of all previously executed processing steps followed by sorting and fil...
void setValidationEnabled(bool validate)
Enable or disable validation.
void process(const QList< QVariant > &data)
This will normalize and augment the given data elements and merge them with already added data elemen...
A flight reservation.
Definition reservation.h:90
A flight.
Definition flight.h:25
Food-related business (such as a restaurant, or a bakery).
Geographic coordinates.
Definition place.h:23
static void writeProperty(QVariant &obj, const char *name, const QVariant &value)
Set property name on object obj to value value.
static QVariant apply(const QVariant &lhs, const QVariant &rhs)
Apply all properties of rhs on to lhs.
static QVariant readProperty(const QVariant &obj, const char *name)
Read property name on object obj.
constexpr bool isValid() const
Returns true if this is a valid identifier.
Definition alphaid.h:56
QString toString() const
Returns a string representation of this identifier.
Definition alphaid.h:75
A hotel reservation.
Definition reservation.h:77
static QVariant merge(const QVariant &lhs, const QVariant &rhs)
Merge the two given objects.
static bool hasSameArrival(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same arrival.
static bool isSame(const QVariant &lhs, const QVariant &rhs)
Checks if two Reservation or Trip values refer to the same booking element.
static bool hasSameDeparture(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same departure.
Base class for places.
Definition place.h:69
Postal address.
Definition place.h:46
QString addressCountry
The country this address is in, as ISO 3166-1 alpha 2 code.
Definition place.h:53
A frequent traveler, bonus points or discount scheme program membership.
A Rental Car reservation.
A car rental.
Definition rentalcar.h:22
A reserved seat.
Definition ticket.h:23
A Taxi reservation.
A booked ticket.
Definition ticket.h:41
A train reservation.
A train trip.
Definition traintrip.h:24
QDate departureDay
The scheduled day of departure.
Definition traintrip.h:42
AKONADI_CALENDAR_EXPORT KCalendarCore::Event::Ptr event(const Akonadi::Item &item)
char * toString(const EngineQuery &query)
KIOCORE_EXPORT QString number(KIO::filesize_t size)
QList< QVariant > parse(const QString &message, const QDateTime &externalIssueDateTime=QDateTime())
Parses the bar coded boarding pass message message into a list of FlightReservation instances.
bool isA(const QVariant &value)
Returns true if value is of type T.
Definition datatypes.h:24
bool canConvert(const QVariant &value)
Checks if the given value can be up-cast to T.
Definition datatypes.h:31
T convert(const QVariant &value)
Up-cast value to T.
Definition datatypes.h:47
AlphaId< UnalignedNumber< 3 >, 4 > ViaRailStationCode
Via Rail station code.
TrainStation stationForViaRailStationCode(ViaRailStationCode code)
Lookup train station data by Via Rail station code.
CountryId countryIdForUicCode(uint16_t uicCountryCode)
Look up country ISO code from a UIC country code.
Definition countrydb.cpp:82
TrainStation stationForUkRailwayStationCode(UKRailwayStationCode code)
Lookup train station data by UK railway station code.
AlphaId< uint16_t, 3 > IataCode
IATA airport code.
Definition iatacode.h:17
TrainStation stationForIbnr(IBNR ibnr)
Lookup train station data by IBNR.
TrainStation stationForAmtrakStationCode(AmtrakStationCode code)
Lookup train station data by Amtrak station code.
AlphaId< uint16_t, 3 > UKRailwayStationCode
UK railway station code.
TrainStation stationForUic(UICStation uic)
Lookup train station data by UIC station id.
TrainStation stationForIndianRailwaysStationCode(const QString &code)
Lookup train station data by Indian Railways station code.
TrainStation stationForSncfStationId(SncfStationId sncfId)
Lookup train station data by SNCF station id.
KnowledgeDb::CountryId countryForAirport(IataCode iataCode)
Returns the country the airport with IATA code iataCode is in.
Definition airportdb.cpp:50
TrainStation stationForVRStationCode(VRStationCode vrStation)
Lookup train station data by VR (Finland) station code.
TrainStation stationForIataCode(IataCode iataCode)
Lookup train station data by IATA location code.
TrainStation stationForBenerailId(BenerailStationId id)
Lookup train station data by Benerail station identifier.
AlphaId< uint16_t, 3 > AmtrakStationCode
Amtrak staion codes.
GeoCoordinates geo(const QVariant &location)
Returns the geo coordinates of a given location.
bool isBefore(const QVariant &lhs, const QVariant &rhs)
Sorting function for top-level reservation/visit/event elements.
Definition sortutil.cpp:169
QString clean(const QString &s)
Cleans up extra white spaces and XML entities from s.
QString simplifiedNoPlaceholder(const QString &s)
Same as QString::simplified() and dropping everything that just contains punctuation or dash characer...
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
QStringView country(QStringView ifopt)
bool isValid(int year, int month, int day)
int year() const const
QDateTime addDays(qint64 ndays) const const
QDate date() const const
bool isValid() const const
int offsetFromUtc() const const
qint64 secsTo(const QDateTime &other) const const
void setDate(QDate date)
void setTimeZone(const QTimeZone &toZone)
Qt::TimeSpec timeSpec() const const
QTimeZone timeZone() const const
QDateTime toTimeZone(const QTimeZone &timeZone) const const
iterator begin()
iterator end()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString fromStdString(const std::string &str)
bool isEmpty() const const
bool isNull() const const
QString left(qsizetype n) const const
QString mid(qsizetype position, qsizetype n) const const
QString simplified() const const
qsizetype size() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
std::string toStdString() const const
QString toUpper() const const
QString trimmed() const const
CaseInsensitive
TimeZone
QTimeZone utc()
bool isValid() const const
T value() const const
Train station entry in the station table.
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Feb 28 2025 11:55:14 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.