KItinerary

extractorpostprocessor.cpp
1/*
2 SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "config-kitinerary.h"
8#include "extractorpostprocessor.h"
9#include "extractorpostprocessor_p.h"
10#include "extractorvalidator.h"
11#include "flightpostprocessor_p.h"
12#include "stringutil.h"
13
14#include "iata/iatabcbpparser.h"
15#include "jsonlddocument.h"
16#include "logging.h"
17#include "mergeutil.h"
18#include "sortutil.h"
19#include "text/addressparser_p.h"
20
21#include "knowledgedb/airportdb.h"
22#include "knowledgedb/timezonedb_p.h"
23#include "knowledgedb/trainstationdb.h"
24
25#include <KItinerary/Action>
26#include <KItinerary/BoatTrip>
27#include <KItinerary/BusTrip>
28#include <KItinerary/Event>
29#include <KItinerary/Flight>
30#include <KItinerary/Organization>
31#include <KItinerary/Person>
32#include <KItinerary/Place>
33#include <KItinerary/ProgramMembership>
34#include <KItinerary/RentalCar>
35#include <KItinerary/Reservation>
36#include <KItinerary/Taxi>
37#include <KItinerary/Ticket>
38#include <KItinerary/TrainTrip>
39#include <KItinerary/Visit>
40
41#include <KCountry>
42
43#include <QDebug>
44#include <QJsonArray>
45#include <QJsonDocument>
46#include <QTimeZone>
47#include <QUrl>
48
49#if HAVE_PHONENUMBER
50#include <phonenumbers/phonenumberutil.h>
51#endif
52
53#include <algorithm>
54#include <cstring>
55
56using namespace Qt::Literals::StringLiterals;
57using namespace KItinerary;
58
59ExtractorPostprocessor::ExtractorPostprocessor()
60 : d(new ExtractorPostprocessorPrivate)
61{
62}
63
64ExtractorPostprocessor::ExtractorPostprocessor(ExtractorPostprocessor &&) noexcept = default;
66
67void ExtractorPostprocessor::process(const QList<QVariant> &data) {
68 d->m_resultFinalized = false;
69 d->m_data.reserve(d->m_data.size() + data.size());
70 for (auto elem : data) {
71 // reservation types
73 elem = d->processFlightReservation(elem.value<FlightReservation>());
74 } else if (JsonLd::isA<TrainReservation>(elem)) {
75 elem = d->processTrainReservation(elem.value<TrainReservation>());
76 } else if (JsonLd::isA<LodgingReservation>(elem)) {
77 elem = d->processLodgingReservation(elem.value<LodgingReservation>());
79 elem = d->processFoodEstablishmentReservation(elem.value<FoodEstablishmentReservation>());
80 } else if (JsonLd::isA<TouristAttractionVisit>(elem)) {
81 elem = d->processTouristAttractionVisit(elem.value<TouristAttractionVisit>());
82 } else if (JsonLd::isA<BusReservation>(elem)) {
83 elem = d->processBusReservation(elem.value<BusReservation>());
84 } else if (JsonLd::isA<BoatReservation>(elem)) {
85 elem = d->processBoatReservation(elem.value<BoatReservation>());
86 } else if (JsonLd::isA<EventReservation>(elem)) {
87 elem = d->processEventReservation(elem.value<EventReservation>());
88 } else if (JsonLd::isA<RentalCarReservation>(elem)) {
89 elem = d->processRentalCarReservation(elem.value<RentalCarReservation>());
90 } else if (JsonLd::isA<TaxiReservation>(elem)) {
91 elem = d->processTaxiReservation(elem.value<TaxiReservation>());
92 }
93
94 // "reservationFor" types
95 else if (JsonLd::isA<LodgingBusiness>(elem)) {
96 elem = d->processPlace(elem.value<LodgingBusiness>());
97 } else if (JsonLd::isA<FoodEstablishment>(elem)) {
98 elem = d->processPlace(elem.value<FoodEstablishment>());
99 } else if (JsonLd::isA<Event>(elem)) {
100 elem = d->processEvent(elem.value<Event>());
101 } else if (JsonLd::isA<LocalBusiness>(elem)) {
102 elem = d->processPlace(elem.value<LocalBusiness>());
103 }
104
105 // non-reservation types
106 else if (JsonLd::isA<ProgramMembership>(elem)) {
107 elem = d->processProgramMembership(elem.value<ProgramMembership>());
108 } else if (JsonLd::isA<Ticket>(elem)) {
109 elem = d->processTicket(elem.value<Ticket>());
110 }
111
112 d->mergeOrAppend(elem);
113 }
114}
115
117 if (!d->m_resultFinalized) {
118 // fold elements we have reservations for into those reservations
119 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
120 if (JsonLd::isA<Reservation>(*it)) {
121 ++it;
122 continue;
123 }
124
125 bool merged = false;
126 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
127 const auto resFor = JsonLdDocument::readProperty(*it2, "reservationFor");
128 if (MergeUtil::isSame(resFor, *it)) {
129 JsonLdDocument::writeProperty(*it2, "reservationFor", MergeUtil::merge(resFor, *it));
130 merged = true;
131 }
132 }
133
134 if (merged) {
135 it = d->m_data.erase(it);
136 } else {
137 ++it;
138 }
139 }
140
141 // search for "triangular" patterns, ie. a location change element that has a matching departure
142 // and matching arrival to two different other location change elements (A->C vs A->B + B->C).
143 // we remove those, as the fine-granular results are better
144 if (d->m_data.size() >= 3) {
145 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
146 auto depIt = it;
147 auto arrIt = it;
148 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
149 if (it == it2) {
150 continue;
151 }
152 if (MergeUtil::hasSameDeparture(*it, *it2)) {
153 depIt = it2;
154 }
155 if (MergeUtil::hasSameArrival(*it, *it2)) {
156 arrIt = it2;
157 }
158 }
159
160 if (depIt != it && arrIt != it && depIt != arrIt) {
161 it = d->m_data.erase(it);
162 } else {
163 ++it;
164 }
165 }
166 }
167
168 d->m_resultFinalized = true;
169 }
170
171 std::stable_sort(d->m_data.begin(), d->m_data.end(), SortUtil::isBefore);
172 return d->m_data;
173}
174
176{
177 d->m_contextDate = dt;
178}
179
180void ExtractorPostprocessor::setValidationEnabled([[maybe_unused]] bool validate)
181{
182}
183
184void ExtractorPostprocessorPrivate::mergeOrAppend(const QVariant &elem)
185{
186 const auto it = std::find_if(m_data.begin(), m_data.end(), [elem](const QVariant &other) {
187 return MergeUtil::isSame(elem, other);
188 });
189
190 if (it == m_data.end()) {
191 m_data.push_back(elem);
192 } else {
193 *it = MergeUtil::merge(*it, elem);
194 }
195}
196
197QVariant ExtractorPostprocessorPrivate::processFlightReservation(FlightReservation res) const
198{
199 // expand ticketToken for IATA BCBP data
200 const auto bcbp = res.reservedTicket().value<Ticket>().ticketTokenData().toString();
201 if (!bcbp.isEmpty()) {
202 const auto bcbpData = IataBcbpParser::parse(bcbp, m_contextDate);
203 if (bcbpData.size() == 1) {
204 res = JsonLdDocument::apply(bcbpData.at(0), res).value<FlightReservation>();
205 // standardize on the BCBP booking reference, not some secondary one we might have in structured data for example
206 res.setReservationNumber(bcbpData.at(0).value<FlightReservation>().reservationNumber());
207 } else {
208 for (const auto &data : bcbpData) {
209 if (MergeUtil::isSame(res, data)) {
211 break;
212 }
213 }
214 }
215 }
216
217 if (res.reservationFor().isValid()) {
218 FlightPostProcessor p;
219 res.setReservationFor(p.processFlight(res.reservationFor().value<Flight>()));
220 }
221 return processReservation(res);
222}
223
224TrainReservation ExtractorPostprocessorPrivate::processTrainReservation(TrainReservation res) const
225{
226 if (res.reservationFor().isValid()) {
227 res.setReservationFor(processTrainTrip(res.reservationFor().value<TrainTrip>()));
228 }
229 return processReservation(res);
230}
231
232TrainTrip ExtractorPostprocessorPrivate::processTrainTrip(TrainTrip trip) const
233{
234 trip.setArrivalPlatform(trip.arrivalPlatform().trimmed());
235 trip.setDeparturePlatform(trip.departurePlatform().trimmed());
236 trip.setDepartureStation(processTrainStation(trip.departureStation()));
237 trip.setArrivalStation(processTrainStation(trip.arrivalStation()));
238 trip.setDepartureTime(processTrainTripTime(trip.departureTime(), trip.departureDay(), trip.departureStation()));
239 trip.setArrivalTime(processTrainTripTime(trip.arrivalTime(), trip.departureDay(), trip.arrivalStation()));
240 trip.setTrainNumber(trip.trainNumber().simplified());
241 trip.setTrainName(trip.trainName().simplified());
242
243 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
244 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
245 if (duration < 0 && duration > -3600*24 && trip.departureTime().timeSpec() == trip.arrivalTime().timeSpec()) {
246 trip.setArrivalTime(trip.arrivalTime().addDays(1));
247 }
248
249 return trip;
250}
251
252static void applyStationData(const KnowledgeDb::TrainStation &record, TrainStation &station)
253{
254 if (!station.geo().isValid() && record.coordinate.isValid()) {
256 geo.setLatitude(record.coordinate.latitude);
257 geo.setLongitude(record.coordinate.longitude);
258 station.setGeo(geo);
259 }
260 auto addr = station.address();
261 if (addr.addressCountry().isEmpty() && record.country.isValid()) {
262 addr.setAddressCountry(record.country.toString());
263 station.setAddress(addr);
264 }
265}
266
267static void applyStationCountry(const QString &isoCode, TrainStation &station)
268{
269 auto addr = station.address();
270 if (addr.addressCountry().isEmpty()) {
271 addr.setAddressCountry(isoCode.toUpper());
272 station.setAddress(addr);
273 }
274}
275
276TrainStation ExtractorPostprocessorPrivate::processTrainStation(TrainStation station) const
277{
278 const auto id = station.identifier();
279 if (id.isEmpty()) { // empty -> null cleanup, to have more compact json-ld output
280 station.setIdentifier(QString());
281 } else if (id.startsWith("sncf:"_L1) && id.size() == 10) {
283 applyStationData(record, station);
284 applyStationCountry(id.mid(5, 2).toUpper(), station);
285 } else if (id.startsWith("ibnr:"_L1) && id.size() == 12) {
286 const auto record = KnowledgeDb::stationForIbnr(KnowledgeDb::IBNR{id.mid(5).toUInt()});
287 applyStationData(record, station);
288 const auto country = KnowledgeDb::countryIdForUicCode(QStringView(id).mid(5, 2).toUShort()) .toString();
289 applyStationCountry(country, station);
290 } else if (id.startsWith("uic:"_L1) && id.size() == 11) {
291 const auto record = KnowledgeDb::stationForUic( KnowledgeDb::UICStation{id.mid(4).toUInt()});
292 applyStationData(record, station);
293 const auto country = KnowledgeDb::countryIdForUicCode(QStringView(id).mid(4, 2).toUShort()) .toString();
294 applyStationCountry(country, station);
295 } else if (id.startsWith("ir:"_L1) && id.size() > 4) {
296 const auto record = KnowledgeDb::stationForIndianRailwaysStationCode(id.mid(3));
297 applyStationData(record, station);
298 } else if (id.startsWith("benerail:"_L1) && id.size() == 14) {
300 applyStationData(record, station);
301 applyStationCountry(id.mid(9, 2).toUpper(), station);
302 } else if (id.startsWith("vrfi:"_L1) && id.size() >= 7 && id.size() <= 9) {
304 applyStationData(record, station);
305 } else if (id.startsWith("iata:"_L1) && id.size() == 8) {
306 const auto iataCode = KnowledgeDb::IataCode(QStringView(id).mid(5));
307 const auto record = KnowledgeDb::stationForIataCode(iataCode);
308 applyStationData(record, station);
309 // fall back to the airport with the matching IATA code for the country
310 // information we cannot use the coordinate though, as that points to the
311 // actual airport, not the station
312 applyStationCountry(KnowledgeDb::countryForAirport(iataCode).toString(), station);
313 } else if (id.startsWith("amtrak:"_L1) && id.size() == 10) {
315 applyStationData(record, station);
316 } else if (id.startsWith("via:"_L1) && id.size() == 8) {
318 applyStationData(record, station);
319 } else if (id.startsWith("uk:"_L1) && id.size() == 6) {
321 applyStationData(record, station);
322 }
323
324 return processPlace(station);
325}
326
327QDateTime ExtractorPostprocessorPrivate::processTrainTripTime(QDateTime dt, QDate departureDay, const TrainStation& station) const
328{
329 if (!dt.isValid()) {
330 return dt;
331 }
332
333 if (dt.date().year() <= 1970 && departureDay.isValid()) { // we just have the time, but not the day
334 dt.setDate(departureDay);
335 }
336 return processTimeForLocation(dt, station);
337}
338
339BusReservation ExtractorPostprocessorPrivate::processBusReservation(BusReservation res) const
340{
341 if (res.reservationFor().isValid()) {
342 res.setReservationFor(processBusTrip(res.reservationFor().value<BusTrip>()));
343 }
344 return processReservation(res);
345}
346
347BusTrip ExtractorPostprocessorPrivate::processBusTrip(BusTrip trip) const
348{
349 trip.setDepartureBusStop(processPlace(trip.departureBusStop()));
350 trip.setArrivalBusStop(processPlace(trip.arrivalBusStop()));
351 trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBusStop()));
352 trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBusStop()));
353 trip.setBusNumber(trip.busNumber().simplified());
354 trip.setBusName(trip.busName().simplified());
355 return trip;
356}
357
358BoatReservation ExtractorPostprocessorPrivate::processBoatReservation(BoatReservation res) const
359{
360 if (res.reservationFor().isValid()) {
361 res.setReservationFor(processBoatTrip(res.reservationFor().value<BoatTrip>()));
362 }
363 return processReservation(res);
364}
365
366BoatTrip ExtractorPostprocessorPrivate::processBoatTrip(BoatTrip trip) const
367{
368 trip.setDepartureBoatTerminal(processPlace(trip.departureBoatTerminal()));
369 trip.setArrivalBoatTerminal(processPlace(trip.arrivalBoatTerminal()));
370 trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBoatTerminal()));
371 trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBoatTerminal()));
372
373 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
374 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
375 if (duration < 0 && duration > -3600*24) {
376 trip.setArrivalTime(trip.arrivalTime().addDays(1));
377 }
378
379 return trip;
380}
381
382LodgingReservation ExtractorPostprocessorPrivate::processLodgingReservation(LodgingReservation res) const
383{
384 if (res.reservationFor().isValid()) {
385 res.setReservationFor(processPlace(res.reservationFor().value<LodgingBusiness>()));
386 res.setCheckinTime(processTimeForLocation(res.checkinTime(), res.reservationFor().value<LodgingBusiness>()));
387 res.setCheckoutTime(processTimeForLocation(res.checkoutTime(), res.reservationFor().value<LodgingBusiness>()));
388 }
389 return processReservation(res);
390}
391
392TaxiReservation ExtractorPostprocessorPrivate::processTaxiReservation(TaxiReservation res) const
393{
394 res.setPickupLocation(processPlace(res.pickupLocation()));
395 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
396 return processReservation(res);
397}
398
399RentalCarReservation ExtractorPostprocessorPrivate::processRentalCarReservation(RentalCarReservation res) const
400{
401 if (res.reservationFor().isValid()) {
402 res.setReservationFor(processRentalCar(res.reservationFor().value<RentalCar>()));
403 }
404 res.setPickupLocation(processPlace(res.pickupLocation()));
405 res.setDropoffLocation(processPlace(res.dropoffLocation()));
406 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
407 res.setDropoffTime(processTimeForLocation(res.dropoffTime(), res.dropoffLocation()));
408 return processReservation(res);
409}
410
411RentalCar ExtractorPostprocessorPrivate::processRentalCar(RentalCar car) const
412{
413 car.setName(car.name().trimmed());
414 return car;
415}
416
417FoodEstablishmentReservation ExtractorPostprocessorPrivate::processFoodEstablishmentReservation(FoodEstablishmentReservation res) const
418{
419 if (res.reservationFor().isValid()) {
420 res.setReservationFor(processPlace(res.reservationFor().value<FoodEstablishment>()));
421 res.setStartTime(processTimeForLocation(res.startTime(), res.reservationFor().value<FoodEstablishment>()));
422 res.setEndTime(processTimeForLocation(res.endTime(), res.reservationFor().value<FoodEstablishment>()));
423 }
424 return processReservation(res);
425}
426
427TouristAttractionVisit ExtractorPostprocessorPrivate::processTouristAttractionVisit(TouristAttractionVisit visit) const
428{
429 visit.setTouristAttraction(processPlace(visit.touristAttraction()));
430 visit.setArrivalTime(processTimeForLocation(visit.arrivalTime(), visit.touristAttraction()));
431 visit.setDepartureTime(processTimeForLocation(visit.departureTime(), visit.touristAttraction()));
432 return visit;
433}
434
435EventReservation ExtractorPostprocessorPrivate::processEventReservation(EventReservation res) const
436{
437 if (res.reservationFor().isValid()) {
438 res.setReservationFor(processEvent(res.reservationFor().value<Event>()));
439 }
440 return processReservation(res);
441}
442
443KItinerary::Event ExtractorPostprocessorPrivate::processEvent(KItinerary::Event event) const
444{
445 event.setName(StringUtil::clean(event.name()));
446
447 // normalize location to be a Place
448 if (JsonLd::isA<PostalAddress>(event.location())) {
449 Place place;
450 place.setAddress(event.location().value<PostalAddress>());
451 event.setLocation(place);
452 }
453
454 if (JsonLd::isA<Place>(event.location())) {
455 event.setLocation(processPlace(event.location().value<Place>()));
456
457 // try to obtain timezones if we have a location
458 event.setStartDate(processTimeForLocation(event.startDate(), event.location().value<Place>()));
459 event.setEndDate(processTimeForLocation(event.endDate(), event.location().value<Place>()));
460 event.setDoorTime(processTimeForLocation(event.doorTime(), event.location().value<Place>()));
461 }
462
463 return event;
464}
465
466static QString processCurrency(const QString &currency)
467{
468 if (currency.size() != 3 || !std::all_of(currency.begin(), currency.end(), [](QChar c) { return c.isUpper(); })) {
469 return {};
470 }
471 return currency;
472}
473
474Ticket ExtractorPostprocessorPrivate::processTicket(Ticket ticket) const
475{
476 ticket.setName(StringUtil::clean(ticket.name()));
477 ticket.setTicketNumber(ticket.ticketNumber().simplified());
478 ticket.setUnderName(processPerson(ticket.underName()));
479 ticket.setTicketedSeat(processSeat(ticket.ticketedSeat()));
480 ticket.setPriceCurrency(processCurrency(ticket.priceCurrency()));
481 return ticket;
482}
483
484ProgramMembership ExtractorPostprocessorPrivate::processProgramMembership(ProgramMembership program) const
485{
486 // remove empty dummy entries found eg. in ERA FCB data
487 if (const auto name = program.programName(); std::none_of(name.begin(), name.end(), [](QChar c) { return c.isLetter(); })) {
488 program.setProgramName(QString());
489 }
490
491 program.setProgramName(program.programName().simplified());
492 // avoid emitting spurious empty ProgramMembership objects caused by empty elements in JSON-LD/Microdata input
493 if (program.programName().isEmpty() && !program.programName().isNull()) {
494 program.setProgramName(QString());
495 }
496 program.setMember(processPerson(program.member()));
497 return program;
498}
499
500Seat ExtractorPostprocessorPrivate::processSeat(Seat seat) const
501{
502 seat.setSeatSection(seat.seatSection().simplified());
503 seat.setSeatRow(seat.seatRow().simplified());
504 seat.setSeatNumber(seat.seatNumber().simplified());
505 seat.setSeatingType(seat.seatingType().simplified());
506 return seat;
507}
508
509template <typename T>
510T ExtractorPostprocessorPrivate::processReservation(T res) const
511{
512 res.setUnderName(processPerson(res.underName().template value<Person>()));
513 res.setPotentialAction(processActions(res.potentialAction()));
514 res.setReservationNumber(res.reservationNumber().trimmed());
515 res.setProgramMembershipUsed(processProgramMembership(res.programMembershipUsed()));
516 res.setPriceCurrency(processCurrency(res.priceCurrency()));
517
518 if (JsonLd::isA<Ticket>(res.reservedTicket())) {
519 res.setReservedTicket(processTicket(res.reservedTicket().template value<Ticket>()));
520 }
521 return res;
522}
523
524static constexpr const char* name_prefixes[] = {
525 "DR", "MR", "MRS", "MS"
526};
527
528static bool isSeparator(QChar c)
529{
530 return c == QLatin1Char(' ') || c == QLatin1Char('/');
531}
532
533static QString simplifyNamePart(QString n)
534{
535 n = n.simplified();
536
537 for (auto prefix : name_prefixes) {
538 const int prefixLen = std::strlen(prefix);
539 if (n.size() > prefixLen + 2 &&
540 n.startsWith(QLatin1StringView(prefix, prefixLen),
542 isSeparator(n[prefixLen])) {
543 return n.mid(prefixLen + 1);
544 }
545 if (n.size() > prefixLen + 2 &&
546 n.endsWith(QLatin1StringView(prefix, prefixLen),
548 isSeparator(n[n.size() - prefixLen - 1])) {
549 return n.left(n.size() - prefixLen - 1);
550 }
551 }
552
553 return n;
554}
555
556KItinerary::Person ExtractorPostprocessorPrivate::processPerson(KItinerary::Person person) const
557{
558 person.setName(simplifyNamePart(person.name()));
559 person.setFamilyName(simplifyNamePart(person.familyName()));
560 person.setGivenName(simplifyNamePart(person.givenName()));
561
562 // fill name with name parts, if it's empty
563 if ((person.name().isEmpty() || person.name() == person.familyName() || person.name() == person.givenName())
564 && !person.familyName().isEmpty() && !person.givenName().isEmpty())
565 {
566 person.setName(person.givenName() + QLatin1Char(' ') + person.familyName());
567 }
568
569 return person;
570}
571
572PostalAddress ExtractorPostprocessorPrivate::processAddress(PostalAddress addr, const QString &phoneNumber, const GeoCoordinates &geo)
573{
574 addr.setAddressCountry(addr.addressCountry().simplified());
575
576 // convert to ISO 3166-1 alpha-2 country codes
577 if (addr.addressCountry().size() > 2) {
578 QString alpha2Code;
579
580 // try ISO 3166-1 alpha-3, we get that e.g. from Flixbus
581 if (addr.addressCountry().size() == 3) {
582 alpha2Code = KCountry::fromAlpha3(addr.addressCountry()).alpha2();
583 }
584 if (alpha2Code.isEmpty()) {
585 alpha2Code = KCountry::fromName(addr.addressCountry()).alpha2();
586 }
587 if (!alpha2Code.isEmpty()) {
588 addr.setAddressCountry(alpha2Code);
589 }
590 }
591
592 // upper case country codes
593 if (addr.addressCountry().size() == 2) {
594 addr.setAddressCountry(addr.addressCountry().toUpper());
595 }
596
597 // normalize strings
598 addr.setStreetAddress(addr.streetAddress().simplified());
599 addr.setPostalCode(addr.postalCode().simplified());
600 addr.setAddressLocality(addr.addressLocality().simplified());
601 addr.setAddressRegion(addr.addressRegion().simplified());
602
603#if HAVE_PHONENUMBER
604 // recover country from phone number, if we have that
605 if (!phoneNumber.isEmpty() && addr.addressCountry().size() != 2) {
606 const auto phoneStr = phoneNumber.toStdString();
607 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
608 i18n::phonenumbers::PhoneNumber number;
609 if (util->ParseAndKeepRawInput(phoneStr, "ZZ", &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
610 std::string isoCode;
611 util->GetRegionCodeForNumber(number, &isoCode);
612 if (!isoCode.empty() && isoCode != "ZZ") {
613 addr.setAddressCountry(QString::fromStdString(isoCode));
614 }
615 }
616 }
617#endif
618
619 if (geo.isValid() && addr.addressCountry().size() != 2) {
620 const auto country = KCountry::fromLocation(geo.latitude(), geo.longitude());
621 if (country.isValid()) {
622 addr.setAddressCountry(country.alpha2());
623 }
624 }
625
626 AddressParser addrParser;
627 addrParser.setFallbackCountry(KCountry::fromQLocale(QLocale().territory()).alpha2());
628 addrParser.parse(addr);
629 addr = addrParser.result();
630 return addr;
631}
632
633QString ExtractorPostprocessorPrivate::processPhoneNumber(const QString &phoneNumber, const PostalAddress &addr)
634{
635#if HAVE_PHONENUMBER
636 // or complete the phone number if we know the country
637 if (!phoneNumber.isEmpty() && addr.addressCountry().size() == 2) {
638 auto phoneStr = phoneNumber.toStdString();
639 const auto isoCode = addr.addressCountry().toStdString();
640 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
641 i18n::phonenumbers::PhoneNumber number;
642 if (util->ParseAndKeepRawInput(phoneStr, isoCode, &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
643 if (number.country_code_source() == i18n::phonenumbers::PhoneNumber_CountryCodeSource_FROM_DEFAULT_COUNTRY) {
644 util->Format(number, i18n::phonenumbers::PhoneNumberUtil::INTERNATIONAL, &phoneStr);
645 return QString::fromStdString(phoneStr);
646 }
647 }
648 }
649#else
650 Q_UNUSED(addr)
651#endif
652 return phoneNumber.simplified();
653}
654
655QVariantList ExtractorPostprocessorPrivate::processActions(QVariantList actions) const
656{
657 // remove non-actions and actions with invalid URLs
658 QUrl viewUrl;
659 for (auto it = actions.begin(); it != actions.end();) {
660 if (!JsonLd::canConvert<Action>(*it)) {
661 it = actions.erase(it);
662 continue;
663 }
664
665 const auto action = JsonLd::convert<Action>(*it);
666 if (!action.target().isValid()) {
667 it = actions.erase(it);
668 continue;
669 }
670
671 if (JsonLd::isA<ViewAction>(*it)) {
672 viewUrl = action.target();
673 }
674 ++it;
675 }
676
677 // normalize the order, so JSON comparison still yields correct results
678 std::sort(actions.begin(), actions.end(), [](const QVariant &lhs, const QVariant &rhs) {
679 return strcmp(lhs.typeName(), rhs.typeName()) < 0;
680 });
681
682 // remove actions that don't actually have their own target, or duplicates
683 QUrl prevUrl;
684 const char* prevType = nullptr;
685 for (auto it = actions.begin(); it != actions.end();) {
686 const auto action = JsonLd::convert<Action>(*it);
687 const auto isDuplicate = action.target() == prevUrl && (prevType ? strcmp(prevType, (*it).typeName()) == 0 : false);
688 if ((JsonLd::isA<ViewAction>(*it) || action.target() != viewUrl) && !isDuplicate) {
689 prevUrl = action.target();
690 prevType = (*it).typeName();
691 ++it;
692 } else {
693 it = actions.erase(it);
694 }
695 }
696
697 return actions;
698}
699
700template <typename T>
701QDateTime ExtractorPostprocessorPrivate::processTimeForLocation(QDateTime dt, const T &place) const
702{
703 if (!dt.isValid() ) {
704 return dt;
705 }
706 if ((dt.timeSpec() == Qt::TimeZone && dt.timeZone() != QTimeZone::utc())) {
707 if (KnowledgeDb::isPlausibleTimeZone(dt.timeZone(), place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion())) {
708 return dt;
709 }
710 // drop timezones where we are sure they don't match the location
712 }
713
714 const auto tz = KnowledgeDb::timezoneForLocation(place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion());
715 if (!tz.isValid()) {
716 return dt;
717 }
718
719 // prefer our timezone over externally provided UTC offset, if they match
720 if (dt.timeSpec() == Qt::OffsetFromUTC && tz.offsetFromUtc(dt) != dt.offsetFromUtc()) {
721 qCDebug(Log) << "UTC offset clashes with expected timezone!" << dt << dt.offsetFromUtc() << tz.id() << tz.offsetFromUtc(dt);
722 return dt;
723 }
724
725 if (dt.timeSpec() == Qt::OffsetFromUTC || dt.timeSpec() == Qt::LocalTime) {
726 dt.setTimeZone(tz);
727 } else if (dt.timeSpec() == Qt::UTC || (dt.timeSpec() == Qt::TimeZone && dt.timeZone() == QTimeZone::utc())) {
728 dt = dt.toTimeZone(tz);
729 }
730 return dt;
731}
static KCountry fromLocation(float latitude, float longitude)
bool isValid() const
static KCountry fromName(QStringView name)
static KCountry fromQLocale(QLocale::Country country)
QString alpha2() const
static KCountry fromAlpha3(const char *alpha3Code)
A boat or ferry reservation.
A boat or ferry trip.
Definition boattrip.h:23
A bus reservation.
A bus trip.
Definition bustrip.h:22
An event reservation.
An event.
Definition event.h:21
Post-process extracted data to filter out garbage and augment data from other sources.
void setContextDate(const QDateTime &dt)
The date the reservation(s) processed here have been made, if known.
QList< QVariant > result() const
This returns the final result of all previously executed processing steps followed by sorting and fil...
void setValidationEnabled(bool validate)
Enable or disable validation.
A flight reservation.
Definition reservation.h:90
A flight.
Definition flight.h:25
Food-related business (such as a restaurant, or a bakery).
Geographic coordinates.
Definition place.h:23
static void writeProperty(QVariant &obj, const char *name, const QVariant &value)
Set property name on object obj to value value.
static QVariant apply(const QVariant &lhs, const QVariant &rhs)
Apply all properties of rhs on to lhs.
static QVariant readProperty(const QVariant &obj, const char *name)
Read property name on object obj.
constexpr bool isValid() const
Returns true if this is a valid identifier.
Definition alphaid.h:56
QString toString() const
Returns a string representation of this identifier.
Definition alphaid.h:75
A hotel reservation.
Definition reservation.h:77
static QVariant merge(const QVariant &lhs, const QVariant &rhs)
Merge the two given objects.
static bool hasSameArrival(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same arrival.
static bool isSame(const QVariant &lhs, const QVariant &rhs)
Checks if two Reservation or Trip values refer to the same booking element.
static bool hasSameDeparture(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same departure.
A person.
Definition person.h:20
Base class for places.
Definition place.h:69
QString identifier
Identifier.
Definition place.h:85
Postal address.
Definition place.h:46
QString addressCountry
The country this address is in, as ISO 3166-1 alpha 2 code.
Definition place.h:53
A frequent traveler, bonus points or discount scheme program membership.
A Rental Car reservation.
A car rental.
Definition rentalcar.h:22
A reserved seat.
Definition ticket.h:23
A Taxi reservation.
A booked ticket.
Definition ticket.h:41
A train reservation.
Train station.
Definition place.h:126
A train trip.
Definition traintrip.h:24
QDate departureDay
The scheduled day of departure.
Definition traintrip.h:42
AKONADI_CALENDAR_EXPORT KCalendarCore::Event::Ptr event(const Akonadi::Item &item)
char * toString(const EngineQuery &query)
KIOCORE_EXPORT QString number(KIO::filesize_t size)
QList< QVariant > parse(const QString &message, const QDateTime &externalIssueDateTime=QDateTime())
Parses the bar coded boarding pass message message into a list of FlightReservation instances.
bool isA(const QVariant &value)
Returns true if value is of type T.
Definition datatypes.h:24
bool canConvert(const QVariant &value)
Checks if the given value can be up-cast to T.
Definition datatypes.h:31
T convert(const QVariant &value)
Up-cast value to T.
Definition datatypes.h:47
TrainStation stationForViaRailStationCode(ViaRailStationCode code)
Lookup train station data by Via Rail station code.
CountryId countryIdForUicCode(uint16_t uicCountryCode)
Look up country ISO code from a UIC country code.
Definition countrydb.cpp:82
TrainStation stationForUkRailwayStationCode(UKRailwayStationCode code)
Lookup train station data by UK railway station code.
AlphaId< uint16_t, 3 > IataCode
IATA airport code.
Definition iatacode.h:17
TrainStation stationForIbnr(IBNR ibnr)
Lookup train station data by IBNR.
TrainStation stationForAmtrakStationCode(AmtrakStationCode code)
Lookup train station data by Amtrak station code.
TrainStation stationForUic(UICStation uic)
Lookup train station data by UIC station id.
TrainStation stationForIndianRailwaysStationCode(const QString &code)
Lookup train station data by Indian Railways station code.
TrainStation stationForSncfStationId(SncfStationId sncfId)
Lookup train station data by SNCF station id.
KnowledgeDb::CountryId countryForAirport(IataCode iataCode)
Returns the country the airport with IATA code iataCode is in.
Definition airportdb.cpp:50
TrainStation stationForVRStationCode(VRStationCode vrStation)
Lookup train station data by VR (Finland) station code.
TrainStation stationForIataCode(IataCode iataCode)
Lookup train station data by IATA location code.
TrainStation stationForBenerailId(BenerailStationId id)
Lookup train station data by Benerail station identifier.
GeoCoordinates geo(const QVariant &location)
Returns the geo coordinates of a given location.
bool isBefore(const QVariant &lhs, const QVariant &rhs)
Sorting function for top-level reservation/visit/event elements.
Definition sortutil.cpp:169
QString clean(const QString &s)
Cleans up extra white spaces and XML entities from s.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
KI18NLOCALEDATA_EXPORT KCountry country(const char *ianaId)
bool isValid(int year, int month, int day)
int year() const const
QDateTime addDays(qint64 ndays) const const
QDate date() const const
bool isValid() const const
int offsetFromUtc() const const
qint64 secsTo(const QDateTime &other) const const
void setDate(QDate date)
void setTimeZone(const QTimeZone &toZone)
Qt::TimeSpec timeSpec() const const
QTimeZone timeZone() const const
QDateTime toTimeZone(const QTimeZone &timeZone) const const
iterator begin()
iterator end()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString fromStdString(const std::string &str)
bool isEmpty() const const
bool isNull() const const
QString left(qsizetype n) const const
QString mid(qsizetype position, qsizetype n) const const
QString simplified() const const
qsizetype size() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
std::string toStdString() const const
QString toUpper() const const
QString trimmed() const const
CaseInsensitive
TimeZone
QTimeZone utc()
bool isValid() const const
T value() const const
Train station entry in the station table.
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:50:00 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.