KItinerary

extractorpostprocessor.cpp
1/*
2 SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "config-kitinerary.h"
8#include "extractorpostprocessor.h"
9#include "extractorpostprocessor_p.h"
10#include "extractorvalidator.h"
11#include "flightpostprocessor_p.h"
12#include "stringutil.h"
13
14#include "iata/iatabcbpparser.h"
15#include "jsonlddocument.h"
16#include "logging.h"
17#include "mergeutil.h"
18#include "sortutil.h"
19#include "text/addressparser_p.h"
20
21#include "knowledgedb/airportdb.h"
22#include "knowledgedb/timezonedb_p.h"
23#include "knowledgedb/trainstationdb.h"
24
25#include <KItinerary/Action>
26#include <KItinerary/BoatTrip>
27#include <KItinerary/BusTrip>
28#include <KItinerary/Event>
29#include <KItinerary/Flight>
30#include <KItinerary/Organization>
31#include <KItinerary/Person>
32#include <KItinerary/Place>
33#include <KItinerary/ProgramMembership>
34#include <KItinerary/RentalCar>
35#include <KItinerary/Reservation>
36#include <KItinerary/Taxi>
37#include <KItinerary/Ticket>
38#include <KItinerary/TrainTrip>
39#include <KItinerary/Visit>
40
41#include <KCountry>
42
43#include <QDebug>
44#include <QJsonArray>
45#include <QJsonDocument>
46#include <QTimeZone>
47#include <QUrl>
48
49#if HAVE_PHONENUMBER
50#include <phonenumbers/phonenumberutil.h>
51#endif
52
53#include <algorithm>
54#include <cmath>
55#include <cstring>
56
57using namespace Qt::Literals::StringLiterals;
58using namespace KItinerary;
59
60ExtractorPostprocessor::ExtractorPostprocessor()
61 : d(new ExtractorPostprocessorPrivate)
62{
63}
64
65ExtractorPostprocessor::ExtractorPostprocessor(ExtractorPostprocessor &&) noexcept = default;
67
68void ExtractorPostprocessor::process(const QList<QVariant> &data) {
69 d->m_resultFinalized = false;
70 d->m_data.reserve(d->m_data.size() + data.size());
71 for (auto elem : data) {
72 // reservation types
74 elem = d->processFlightReservation(elem.value<FlightReservation>());
75 } else if (JsonLd::isA<TrainReservation>(elem)) {
76 elem = d->processTrainReservation(elem.value<TrainReservation>());
77 } else if (JsonLd::isA<LodgingReservation>(elem)) {
78 elem = d->processLodgingReservation(elem.value<LodgingReservation>());
80 elem = d->processFoodEstablishmentReservation(elem.value<FoodEstablishmentReservation>());
81 } else if (JsonLd::isA<TouristAttractionVisit>(elem)) {
82 elem = d->processTouristAttractionVisit(elem.value<TouristAttractionVisit>());
83 } else if (JsonLd::isA<BusReservation>(elem)) {
84 elem = d->processBusReservation(elem.value<BusReservation>());
85 } else if (JsonLd::isA<BoatReservation>(elem)) {
86 elem = d->processBoatReservation(elem.value<BoatReservation>());
87 } else if (JsonLd::isA<EventReservation>(elem)) {
88 elem = d->processEventReservation(elem.value<EventReservation>());
89 } else if (JsonLd::isA<RentalCarReservation>(elem)) {
90 elem = d->processRentalCarReservation(elem.value<RentalCarReservation>());
91 } else if (JsonLd::isA<TaxiReservation>(elem)) {
92 elem = d->processTaxiReservation(elem.value<TaxiReservation>());
93 }
94
95 // "reservationFor" types
96 else if (JsonLd::isA<LodgingBusiness>(elem)) {
97 elem = d->processPlace(elem.value<LodgingBusiness>());
98 } else if (JsonLd::isA<FoodEstablishment>(elem)) {
99 elem = d->processPlace(elem.value<FoodEstablishment>());
100 } else if (JsonLd::isA<Event>(elem)) {
101 elem = d->processEvent(elem.value<Event>());
102 } else if (JsonLd::isA<LocalBusiness>(elem)) {
103 elem = d->processPlace(elem.value<LocalBusiness>());
104 }
105
106 // non-reservation types
107 else if (JsonLd::isA<ProgramMembership>(elem)) {
108 elem = d->processProgramMembership(elem.value<ProgramMembership>());
109 } else if (JsonLd::isA<Ticket>(elem)) {
110 elem = d->processTicket(elem.value<Ticket>());
111 }
112
113 d->mergeOrAppend(elem);
114 }
115}
116
117[[nodiscard]] static QVariant mergeTicket(QVariant lhs, const QVariant &rhs)
118{
119 const auto rhsTicket = JsonLdDocument::readProperty(rhs, "reservedTicket");
120 const auto lhsTicket = JsonLdDocument::readProperty(lhs, "reservedTicket");
121 JsonLdDocument::writeProperty(lhs, "reservedTicket", MergeUtil::merge(lhsTicket, rhsTicket));
122 return lhs;
123}
124
126 if (!d->m_resultFinalized) {
127 // fold elements we have reservations for into those reservations
128 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
129 if (JsonLd::isA<Reservation>(*it)) {
130 ++it;
131 continue;
132 }
133
134 bool merged = false;
135 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
136 const auto resFor = JsonLdDocument::readProperty(*it2, "reservationFor");
137 if (MergeUtil::isSame(resFor, *it)) {
138 JsonLdDocument::writeProperty(*it2, "reservationFor", MergeUtil::merge(resFor, *it));
139 merged = true;
140 }
141 }
142
143 if (merged) {
144 it = d->m_data.erase(it);
145 } else {
146 ++it;
147 }
148 }
149
150 // search for "triangular" patterns, ie. a location change element that has a matching departure
151 // and matching arrival to two different other location change elements (A->C vs A->B + B->C).
152 // we remove those, as the fine-granular results are better
153 if (d->m_data.size() >= 3) {
154 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
155 auto depIt = it;
156 auto arrIt = it;
157 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
158 if (it == it2) {
159 continue;
160 }
161 if (MergeUtil::hasSameDeparture(*it, *it2)) {
162 depIt = it2;
163 }
164 if (MergeUtil::hasSameArrival(*it, *it2)) {
165 arrIt = it2;
166 }
167 }
168
169 if (depIt != it && arrIt != it && depIt != arrIt) {
170 (*depIt) = mergeTicket(*depIt, *it);
171 (*arrIt) = mergeTicket(*arrIt, *it);
172 it = d->m_data.erase(it);
173 } else {
174 ++it;
175 }
176 }
177 }
178
179 d->m_resultFinalized = true;
180 }
181
182 std::stable_sort(d->m_data.begin(), d->m_data.end(), SortUtil::isBefore);
183 return d->m_data;
184}
185
187{
188 d->m_contextDate = dt;
189}
190
191void ExtractorPostprocessor::setValidationEnabled([[maybe_unused]] bool validate)
192{
193}
194
195void ExtractorPostprocessorPrivate::mergeOrAppend(const QVariant &elem)
196{
197 const auto it = std::find_if(m_data.begin(), m_data.end(), [elem](const QVariant &other) {
198 return MergeUtil::isSame(elem, other);
199 });
200
201 if (it == m_data.end()) {
202 m_data.push_back(elem);
203 } else {
204 *it = MergeUtil::merge(*it, elem);
205 }
206}
207
208QVariant ExtractorPostprocessorPrivate::processFlightReservation(FlightReservation res) const
209{
210 // expand ticketToken for IATA BCBP data
211 const auto bcbp = res.reservedTicket().value<Ticket>().ticketTokenData().toString();
212 if (!bcbp.isEmpty()) {
213 const auto bcbpData = IataBcbpParser::parse(bcbp, m_contextDate);
214 if (bcbpData.size() == 1) {
215 res = JsonLdDocument::apply(bcbpData.at(0), res).value<FlightReservation>();
216 // standardize on the BCBP booking reference, not some secondary one we might have in structured data for example
217 res.setReservationNumber(bcbpData.at(0).value<FlightReservation>().reservationNumber());
218 } else {
219 for (const auto &data : bcbpData) {
220 if (MergeUtil::isSame(res, data)) {
222 break;
223 }
224 }
225 }
226 }
227
228 res.setBoardingGroup(StringUtil::simplifiedNoPlaceholder(res.boardingGroup()));
229 if (res.reservationFor().isValid()) {
230 FlightPostProcessor p;
231 res.setReservationFor(p.processFlight(res.reservationFor().value<Flight>()));
232 }
233 return processReservation(res);
234}
235
236TrainReservation ExtractorPostprocessorPrivate::processTrainReservation(TrainReservation res) const
237{
238 if (res.reservationFor().isValid()) {
239 res.setReservationFor(processTrainTrip(res.reservationFor().value<TrainTrip>()));
240 }
241 return processReservation(res);
242}
243
244TrainTrip ExtractorPostprocessorPrivate::processTrainTrip(TrainTrip trip) const
245{
246 trip.setArrivalPlatform(trip.arrivalPlatform().trimmed());
247 trip.setDeparturePlatform(trip.departurePlatform().trimmed());
248 trip.setDepartureStation(processStation(trip.departureStation()));
249 trip.setArrivalStation(processStation(trip.arrivalStation()));
250 trip.setDepartureTime(processTripTime(trip.departureTime(), trip.departureDay(), trip.departureStation()));
251 trip.setArrivalTime(processTripTime(trip.arrivalTime(), trip.departureDay(), trip.arrivalStation()));
252 trip.setTrainNumber(trip.trainNumber().simplified());
253 trip.setTrainName(trip.trainName().simplified());
254
255 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
256 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
257 if (duration < 0 && duration > -3600*24 && trip.departureTime().timeSpec() == trip.arrivalTime().timeSpec()) {
258 trip.setArrivalTime(trip.arrivalTime().addDays(1));
259 }
260
261 return trip;
262}
263
264template <typename T>
265static void applyStationData(const KnowledgeDb::TrainStation &record, T &station)
266{
267 if (!station.geo().isValid() && record.coordinate.isValid()) {
269 geo.setLatitude(record.coordinate.latitude);
270 geo.setLongitude(record.coordinate.longitude);
271 station.setGeo(geo);
272 }
273 auto addr = station.address();
274 if (addr.addressCountry().isEmpty() && record.country.isValid()) {
275 addr.setAddressCountry(record.country.toString());
276 station.setAddress(addr);
277 }
278}
279
280template <typename T>
281static void applyStationCountry(const QString &isoCode, T &station)
282{
283 auto addr = station.address();
284 if (addr.addressCountry().isEmpty()) {
285 addr.setAddressCountry(isoCode.toUpper());
286 station.setAddress(addr);
287 }
288}
289
290template<typename T>
291T ExtractorPostprocessorPrivate::processStation(T station)
292{
293 const auto id = station.identifier();
294 if (id.isEmpty()) { // empty -> null cleanup, to have more compact json-ld output
295 station.setIdentifier(QString());
296 } else if (id.startsWith("sncf:"_L1) && id.size() == 10) {
298 applyStationData(record, station);
299 applyStationCountry(id.mid(5, 2).toUpper(), station);
300 } else if (id.startsWith("ibnr:"_L1) && id.size() == 12) {
301 const auto record = KnowledgeDb::stationForIbnr(KnowledgeDb::IBNR{id.mid(5).toUInt()});
302 applyStationData(record, station);
303 const auto country = KnowledgeDb::countryIdForUicCode(QStringView(id).mid(5, 2).toUShort()) .toString();
304 applyStationCountry(country, station);
305 } else if (id.startsWith("uic:"_L1) && id.size() == 11) {
306 const auto record = KnowledgeDb::stationForUic( KnowledgeDb::UICStation{id.mid(4).toUInt()});
307 applyStationData(record, station);
308 const auto country = KnowledgeDb::countryIdForUicCode(QStringView(id).mid(4, 2).toUShort()) .toString();
309 applyStationCountry(country, station);
310 } else if (id.startsWith("ir:"_L1) && id.size() > 4) {
311 const auto record = KnowledgeDb::stationForIndianRailwaysStationCode(id.mid(3));
312 applyStationData(record, station);
313 } else if (id.startsWith("benerail:"_L1) && id.size() == 14) {
315 applyStationData(record, station);
316 applyStationCountry(id.mid(9, 2).toUpper(), station);
317 } else if (id.startsWith("vrfi:"_L1) && id.size() >= 7 && id.size() <= 9) {
319 applyStationData(record, station);
320 } else if (id.startsWith("iata:"_L1) && id.size() == 8) {
321 const auto iataCode = KnowledgeDb::IataCode(QStringView(id).mid(5));
322 const auto record = KnowledgeDb::stationForIataCode(iataCode);
323 applyStationData(record, station);
324 // fall back to the airport with the matching IATA code for the country
325 // information we cannot use the coordinate though, as that points to the
326 // actual airport, not the station
327 applyStationCountry(KnowledgeDb::countryForAirport(iataCode).toString(), station);
328 } else if (id.startsWith("amtrak:"_L1) && id.size() == 10) {
329 const auto record = KnowledgeDb::stationForAmtrakStationCode(KnowledgeDb::AmtrakStationCode(QStringView(id).mid(7)));
330 applyStationData(record, station);
331 } else if (id.startsWith("via:"_L1) && id.size() == 8) {
332 const auto record = KnowledgeDb::stationForViaRailStationCode(KnowledgeDb::ViaRailStationCode(QStringView(id).mid(4)));
333 applyStationData(record, station);
334 } else if (id.startsWith("uk:"_L1) && id.size() == 6) {
335 const auto record = KnowledgeDb::stationForUkRailwayStationCode(KnowledgeDb::UKRailwayStationCode(QStringView(id).mid(3)));
336 applyStationData(record, station);
337 }
338
339 return processPlace(station);
340}
341
342template <typename T>
343QDateTime ExtractorPostprocessorPrivate::processTripTime(QDateTime dt, QDate departureDay, const T& place) const
344{
345 if (!dt.isValid()) {
346 return dt;
347 }
348
349 if (dt.date().year() <= 1970 && departureDay.isValid()) { // we just have the time, but not the day
350 dt.setDate(departureDay);
351 }
352 return processTimeForLocation(dt, place);
353}
354
355BusReservation ExtractorPostprocessorPrivate::processBusReservation(BusReservation res) const
356{
357 if (res.reservationFor().isValid()) {
358 res.setReservationFor(processBusTrip(res.reservationFor().value<BusTrip>()));
359 }
360 return processReservation(res);
361}
362
363BusTrip ExtractorPostprocessorPrivate::processBusTrip(BusTrip trip) const
364{
365 trip.setDepartureBusStop(processStation(trip.departureBusStop()));
366 trip.setArrivalBusStop(processStation(trip.arrivalBusStop()));
367 trip.setDepartureTime(processTripTime(trip.departureTime(), trip.departureDay(), trip.departureBusStop()));
368 trip.setArrivalTime(processTripTime(trip.arrivalTime(), trip.departureDay(), trip.arrivalBusStop()));
369 trip.setBusNumber(trip.busNumber().simplified());
370 trip.setBusName(trip.busName().simplified());
371 return trip;
372}
373
374BoatReservation ExtractorPostprocessorPrivate::processBoatReservation(BoatReservation res) const
375{
376 if (res.reservationFor().isValid()) {
377 res.setReservationFor(processBoatTrip(res.reservationFor().value<BoatTrip>()));
378 }
379 return processReservation(res);
380}
381
382BoatTrip ExtractorPostprocessorPrivate::processBoatTrip(BoatTrip trip) const
383{
384 trip.setDepartureBoatTerminal(processPlace(trip.departureBoatTerminal()));
385 trip.setArrivalBoatTerminal(processPlace(trip.arrivalBoatTerminal()));
386 trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBoatTerminal()));
387 trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBoatTerminal()));
388
389 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
390 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
391 if (duration < 0 && duration > -3600*24) {
392 trip.setArrivalTime(trip.arrivalTime().addDays(1));
393 }
394
395 return trip;
396}
397
398LodgingReservation ExtractorPostprocessorPrivate::processLodgingReservation(LodgingReservation res) const
399{
400 if (res.reservationFor().isValid()) {
401 res.setReservationFor(processPlace(res.reservationFor().value<LodgingBusiness>()));
402 res.setCheckinTime(processTimeForLocation(res.checkinTime(), res.reservationFor().value<LodgingBusiness>()));
403 res.setCheckoutTime(processTimeForLocation(res.checkoutTime(), res.reservationFor().value<LodgingBusiness>()));
404 }
405 return processReservation(res);
406}
407
408TaxiReservation ExtractorPostprocessorPrivate::processTaxiReservation(TaxiReservation res) const
409{
410 res.setPickupLocation(processPlace(res.pickupLocation()));
411 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
412 return processReservation(res);
413}
414
415RentalCarReservation ExtractorPostprocessorPrivate::processRentalCarReservation(RentalCarReservation res) const
416{
417 if (res.reservationFor().isValid()) {
418 res.setReservationFor(processRentalCar(res.reservationFor().value<RentalCar>()));
419 }
420 res.setPickupLocation(processPlace(res.pickupLocation()));
421 res.setDropoffLocation(processPlace(res.dropoffLocation()));
422 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
423 res.setDropoffTime(processTimeForLocation(res.dropoffTime(), res.dropoffLocation()));
424 return processReservation(res);
425}
426
427RentalCar ExtractorPostprocessorPrivate::processRentalCar(RentalCar car) const
428{
429 car.setName(car.name().trimmed());
430 return car;
431}
432
433FoodEstablishmentReservation ExtractorPostprocessorPrivate::processFoodEstablishmentReservation(FoodEstablishmentReservation res) const
434{
435 if (res.reservationFor().isValid()) {
436 res.setReservationFor(processPlace(res.reservationFor().value<FoodEstablishment>()));
437 res.setStartTime(processTimeForLocation(res.startTime(), res.reservationFor().value<FoodEstablishment>()));
438 res.setEndTime(processTimeForLocation(res.endTime(), res.reservationFor().value<FoodEstablishment>()));
439 }
440 return processReservation(res);
441}
442
443TouristAttractionVisit ExtractorPostprocessorPrivate::processTouristAttractionVisit(TouristAttractionVisit visit) const
444{
445 visit.setTouristAttraction(processPlace(visit.touristAttraction()));
446 visit.setArrivalTime(processTimeForLocation(visit.arrivalTime(), visit.touristAttraction()));
447 visit.setDepartureTime(processTimeForLocation(visit.departureTime(), visit.touristAttraction()));
448 return visit;
449}
450
451EventReservation ExtractorPostprocessorPrivate::processEventReservation(EventReservation res) const
452{
453 if (res.reservationFor().isValid()) {
454 res.setReservationFor(processEvent(res.reservationFor().value<Event>()));
455 }
456 return processReservation(res);
457}
458
459KItinerary::Event ExtractorPostprocessorPrivate::processEvent(KItinerary::Event event) const
460{
461 event.setName(StringUtil::clean(event.name()));
462
463 // normalize location to be a Place
464 if (JsonLd::isA<PostalAddress>(event.location())) {
465 Place place;
466 place.setAddress(event.location().value<PostalAddress>());
467 event.setLocation(place);
468 }
469
470 if (JsonLd::isA<Place>(event.location())) {
471 event.setLocation(processPlace(event.location().value<Place>()));
472
473 // try to obtain timezones if we have a location
474 event.setStartDate(processTimeForLocation(event.startDate(), event.location().value<Place>()));
475 event.setEndDate(processTimeForLocation(event.endDate(), event.location().value<Place>()));
476 event.setDoorTime(processTimeForLocation(event.doorTime(), event.location().value<Place>()));
477 }
478
479 return event;
480}
481
482static QString processCurrency(const QString &currency)
483{
484 if (currency.size() != 3 || !std::all_of(currency.begin(), currency.end(), [](QChar c) { return c.isUpper(); })) {
485 return {};
486 }
487 return currency;
488}
489
490Ticket ExtractorPostprocessorPrivate::processTicket(Ticket ticket) const
491{
492 ticket.setName(StringUtil::clean(ticket.name()));
493 ticket.setTicketNumber(ticket.ticketNumber().simplified());
494 ticket.setUnderName(processPerson(ticket.underName()));
495 ticket.setTicketedSeat(processSeat(ticket.ticketedSeat()));
496 ticket.setPriceCurrency(processCurrency(ticket.priceCurrency()));
497 return ticket;
498}
499
500ProgramMembership ExtractorPostprocessorPrivate::processProgramMembership(ProgramMembership program) const
501{
502 // remove empty dummy entries found eg. in ERA FCB data
503 if (const auto name = program.programName(); std::none_of(name.begin(), name.end(), [](QChar c) { return c.isLetter(); })) {
504 program.setProgramName(QString());
505 }
506
507 program.setProgramName(program.programName().simplified());
508 // avoid emitting spurious empty ProgramMembership objects caused by empty elements in JSON-LD/Microdata input
509 if (program.programName().isEmpty() && !program.programName().isNull()) {
510 program.setProgramName(QString());
511 }
512 program.setMember(processPerson(program.member()));
513 return program;
514}
515
516Seat ExtractorPostprocessorPrivate::processSeat(Seat seat) const
517{
518 seat.setSeatSection(seat.seatSection().simplified());
519 seat.setSeatRow(seat.seatRow().simplified());
520 seat.setSeatNumber(seat.seatNumber().simplified());
521 seat.setSeatingType(seat.seatingType().simplified());
522 return seat;
523}
524
525template <typename T>
526T ExtractorPostprocessorPrivate::processReservation(T res) const
527{
528 res.setUnderName(processPerson(res.underName().template value<Person>()));
529 res.setPotentialAction(processActions(res.potentialAction()));
530 res.setReservationNumber(res.reservationNumber().trimmed());
531 res.setProgramMembershipUsed(processProgramMembership(res.programMembershipUsed()));
532 res.setPriceCurrency(processCurrency(res.priceCurrency()));
533
534 if (JsonLd::isA<Ticket>(res.reservedTicket())) {
535 // move information that can exist in Ticket and Reservation up to the latter
536 auto ticket = processTicket(res.reservedTicket().template value<Ticket>());
537 if (res.underName().isNull() && !ticket.name().isEmpty()) {
538 res.setUnderName(ticket.underName());
539 ticket.setUnderName({});
540 } else if (ticket.underName() == res.underName().template value<Person>()) {
541 ticket.setUnderName({});
542 }
543
544 if ((!res.priceCurrency().isEmpty() && res.priceCurrency() == ticket.priceCurrency())
545 && (!std::isnan(res.totalPrice()) && res.totalPrice() == ticket.totalPrice())) {
546 ticket.setPriceCurrency({});
547 ticket.setTotalPrice(NAN);
548 }
549
550 if (ticket.ticketNumber() == res.reservationNumber()) {
551 ticket.setTicketNumber({});
552 }
553
554 res.setReservedTicket(processTicket(ticket));
555 }
556 return res;
557}
558
559static constexpr const char* name_prefixes[] = {
560 "DR", "MR", "MRS", "MS"
561};
562
563static bool isSeparator(QChar c)
564{
565 return c == QLatin1Char(' ') || c == QLatin1Char('/');
566}
567
568static QString simplifyNamePart(QString n)
569{
570 n = n.simplified();
571
572 for (auto prefix : name_prefixes) {
573 const int prefixLen = std::strlen(prefix);
574 if (n.size() > prefixLen + 2 &&
575 n.startsWith(QLatin1StringView(prefix, prefixLen),
577 isSeparator(n[prefixLen])) {
578 return n.mid(prefixLen + 1);
579 }
580 if (n.size() > prefixLen + 2 &&
581 n.endsWith(QLatin1StringView(prefix, prefixLen),
583 isSeparator(n[n.size() - prefixLen - 1])) {
584 return n.left(n.size() - prefixLen - 1);
585 }
586 }
587
588 return n;
589}
590
591KItinerary::Person ExtractorPostprocessorPrivate::processPerson(KItinerary::Person person) const
592{
593 person.setName(simplifyNamePart(person.name()));
594 person.setFamilyName(simplifyNamePart(person.familyName()));
595 person.setGivenName(simplifyNamePart(person.givenName()));
596
597 // fill name with name parts, if it's empty
598 if ((person.name().isEmpty() || person.name() == person.familyName() || person.name() == person.givenName())
599 && !person.familyName().isEmpty() && !person.givenName().isEmpty())
600 {
601 person.setName(person.givenName() + QLatin1Char(' ') + person.familyName());
602 }
603
604 return person;
605}
606
607PostalAddress ExtractorPostprocessorPrivate::processAddress(PostalAddress addr, const QString &phoneNumber, const GeoCoordinates &geo)
608{
609 addr.setAddressCountry(addr.addressCountry().simplified());
610
611 // convert to ISO 3166-1 alpha-2 country codes
612 if (addr.addressCountry().size() > 2) {
613 QString alpha2Code;
614
615 // try ISO 3166-1 alpha-3, we get that e.g. from Flixbus
616 if (addr.addressCountry().size() == 3) {
617 alpha2Code = KCountry::fromAlpha3(addr.addressCountry()).alpha2();
618 }
619 if (alpha2Code.isEmpty()) {
620 alpha2Code = KCountry::fromName(addr.addressCountry()).alpha2();
621 }
622 if (!alpha2Code.isEmpty()) {
623 addr.setAddressCountry(alpha2Code);
624 }
625 }
626
627 // upper case country codes
628 if (addr.addressCountry().size() == 2) {
629 addr.setAddressCountry(addr.addressCountry().toUpper());
630 }
631
632 // normalize strings
633 addr.setStreetAddress(addr.streetAddress().simplified());
634 addr.setPostalCode(addr.postalCode().simplified());
635 addr.setAddressLocality(addr.addressLocality().simplified());
636 addr.setAddressRegion(addr.addressRegion().simplified());
637
638#if HAVE_PHONENUMBER
639 // recover country from phone number, if we have that
640 if (!phoneNumber.isEmpty() && addr.addressCountry().size() != 2) {
641 const auto phoneStr = phoneNumber.toStdString();
642 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
643 i18n::phonenumbers::PhoneNumber number;
644 if (util->ParseAndKeepRawInput(phoneStr, "ZZ", &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
645 std::string isoCode;
646 util->GetRegionCodeForNumber(number, &isoCode);
647 if (!isoCode.empty() && isoCode != "ZZ") {
648 addr.setAddressCountry(QString::fromStdString(isoCode));
649 }
650 }
651 }
652#endif
653
654 if (geo.isValid() && addr.addressCountry().size() != 2) {
655 const auto country = KCountry::fromLocation(geo.latitude(), geo.longitude());
656 if (country.isValid()) {
657 addr.setAddressCountry(country.alpha2());
658 }
659 }
660
661 AddressParser addrParser;
662 addrParser.setFallbackCountry(KCountry::fromQLocale(QLocale().territory()).alpha2());
663 addrParser.parse(addr);
664 addr = addrParser.result();
665 return addr;
666}
667
668QString ExtractorPostprocessorPrivate::processPhoneNumber(const QString &phoneNumber, const PostalAddress &addr)
669{
670#if HAVE_PHONENUMBER
671 // or complete the phone number if we know the country
672 if (!phoneNumber.isEmpty() && addr.addressCountry().size() == 2) {
673 auto phoneStr = phoneNumber.toStdString();
674 const auto isoCode = addr.addressCountry().toStdString();
675 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
676 i18n::phonenumbers::PhoneNumber number;
677 if (util->ParseAndKeepRawInput(phoneStr, isoCode, &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
678 if (number.country_code_source() == i18n::phonenumbers::PhoneNumber_CountryCodeSource_FROM_DEFAULT_COUNTRY) {
679 util->Format(number, i18n::phonenumbers::PhoneNumberUtil::INTERNATIONAL, &phoneStr);
680 return QString::fromStdString(phoneStr);
681 }
682 }
683 }
684#else
685 Q_UNUSED(addr)
686#endif
687 return phoneNumber.simplified();
688}
689
690QVariantList ExtractorPostprocessorPrivate::processActions(QVariantList actions) const
691{
692 // remove non-actions and actions with invalid URLs
693 QUrl viewUrl;
694 for (auto it = actions.begin(); it != actions.end();) {
695 if (!JsonLd::canConvert<Action>(*it)) {
696 it = actions.erase(it);
697 continue;
698 }
699
700 const auto action = JsonLd::convert<Action>(*it);
701 if (!action.target().isValid()) {
702 it = actions.erase(it);
703 continue;
704 }
705
706 if (JsonLd::isA<ViewAction>(*it)) {
707 viewUrl = action.target();
708 }
709 ++it;
710 }
711
712 // normalize the order, so JSON comparison still yields correct results
713 std::sort(actions.begin(), actions.end(), [](const QVariant &lhs, const QVariant &rhs) {
714 return strcmp(lhs.typeName(), rhs.typeName()) < 0;
715 });
716
717 // remove actions that don't actually have their own target, or duplicates
718 QUrl prevUrl;
719 const char* prevType = nullptr;
720 for (auto it = actions.begin(); it != actions.end();) {
721 const auto action = JsonLd::convert<Action>(*it);
722 const auto isDuplicate = action.target() == prevUrl && (prevType ? strcmp(prevType, (*it).typeName()) == 0 : false);
723 if ((JsonLd::isA<ViewAction>(*it) || action.target() != viewUrl) && !isDuplicate) {
724 prevUrl = action.target();
725 prevType = (*it).typeName();
726 ++it;
727 } else {
728 it = actions.erase(it);
729 }
730 }
731
732 return actions;
733}
734
735template <typename T>
736QDateTime ExtractorPostprocessorPrivate::processTimeForLocation(QDateTime dt, const T &place) const
737{
738 if (!dt.isValid() ) {
739 return dt;
740 }
741 if ((dt.timeSpec() == Qt::TimeZone && dt.timeZone() != QTimeZone::utc())) {
742 if (KnowledgeDb::isPlausibleTimeZone(dt.timeZone(), place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion())) {
743 return dt;
744 }
745 // drop timezones where we are sure they don't match the location
747 }
748
749 const auto tz = KnowledgeDb::timezoneForLocation(place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion());
750 if (!tz.isValid()) {
751 return dt;
752 }
753
754 // prefer our timezone over externally provided UTC offset, if they match
755 if (dt.timeSpec() == Qt::OffsetFromUTC && tz.offsetFromUtc(dt) != dt.offsetFromUtc()) {
756 qCDebug(Log) << "UTC offset clashes with expected timezone!" << dt << dt.offsetFromUtc() << tz.id() << tz.offsetFromUtc(dt);
757 return dt;
758 }
759
760 if (dt.timeSpec() == Qt::OffsetFromUTC || dt.timeSpec() == Qt::LocalTime) {
761 dt.setTimeZone(tz);
762 } else if (dt.timeSpec() == Qt::UTC || (dt.timeSpec() == Qt::TimeZone && dt.timeZone() == QTimeZone::utc())) {
763 dt = dt.toTimeZone(tz);
764 }
765 return dt;
766}
static KCountry fromLocation(float latitude, float longitude)
bool isValid() const
static KCountry fromName(QStringView name)
static KCountry fromQLocale(QLocale::Country country)
QString alpha2() const
static KCountry fromAlpha3(const char *alpha3Code)
A boat or ferry reservation.
A boat or ferry trip.
Definition boattrip.h:23
A bus reservation.
A bus trip.
Definition bustrip.h:22
QDate departureDay
The scheduled day of departure.
Definition bustrip.h:40
An event reservation.
An event.
Definition event.h:21
Post-process extracted data to filter out garbage and augment data from other sources.
void setContextDate(const QDateTime &dt)
The date the reservation(s) processed here have been made, if known.
QList< QVariant > result() const
This returns the final result of all previously executed processing steps followed by sorting and fil...
void setValidationEnabled(bool validate)
Enable or disable validation.
void process(const QList< QVariant > &data)
This will normalize and augment the given data elements and merge them with already added data elemen...
A flight reservation.
Definition reservation.h:90
A flight.
Definition flight.h:25
Food-related business (such as a restaurant, or a bakery).
Geographic coordinates.
Definition place.h:23
static void writeProperty(QVariant &obj, const char *name, const QVariant &value)
Set property name on object obj to value value.
static QVariant apply(const QVariant &lhs, const QVariant &rhs)
Apply all properties of rhs on to lhs.
static QVariant readProperty(const QVariant &obj, const char *name)
Read property name on object obj.
constexpr bool isValid() const
Returns true if this is a valid identifier.
Definition alphaid.h:56
QString toString() const
Returns a string representation of this identifier.
Definition alphaid.h:75
A hotel reservation.
Definition reservation.h:77
static QVariant merge(const QVariant &lhs, const QVariant &rhs)
Merge the two given objects.
static bool hasSameArrival(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same arrival.
static bool isSame(const QVariant &lhs, const QVariant &rhs)
Checks if two Reservation or Trip values refer to the same booking element.
static bool hasSameDeparture(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same departure.
Base class for places.
Definition place.h:69
Postal address.
Definition place.h:46
QString addressCountry
The country this address is in, as ISO 3166-1 alpha 2 code.
Definition place.h:53
A frequent traveler, bonus points or discount scheme program membership.
A Rental Car reservation.
A car rental.
Definition rentalcar.h:22
A reserved seat.
Definition ticket.h:23
A Taxi reservation.
A booked ticket.
Definition ticket.h:41
A train reservation.
A train trip.
Definition traintrip.h:24
QDate departureDay
The scheduled day of departure.
Definition traintrip.h:42
AKONADI_CALENDAR_EXPORT KCalendarCore::Event::Ptr event(const Akonadi::Item &item)
char * toString(const EngineQuery &query)
KIOCORE_EXPORT QString number(KIO::filesize_t size)
QList< QVariant > parse(const QString &message, const QDateTime &externalIssueDateTime=QDateTime())
Parses the bar coded boarding pass message message into a list of FlightReservation instances.
bool isA(const QVariant &value)
Returns true if value is of type T.
Definition datatypes.h:24
bool canConvert(const QVariant &value)
Checks if the given value can be up-cast to T.
Definition datatypes.h:31
T convert(const QVariant &value)
Up-cast value to T.
Definition datatypes.h:47
AlphaId< UnalignedNumber< 3 >, 4 > ViaRailStationCode
Via Rail station code.
TrainStation stationForViaRailStationCode(ViaRailStationCode code)
Lookup train station data by Via Rail station code.
CountryId countryIdForUicCode(uint16_t uicCountryCode)
Look up country ISO code from a UIC country code.
Definition countrydb.cpp:82
TrainStation stationForUkRailwayStationCode(UKRailwayStationCode code)
Lookup train station data by UK railway station code.
AlphaId< uint16_t, 3 > IataCode
IATA airport code.
Definition iatacode.h:17
TrainStation stationForIbnr(IBNR ibnr)
Lookup train station data by IBNR.
TrainStation stationForAmtrakStationCode(AmtrakStationCode code)
Lookup train station data by Amtrak station code.
AlphaId< uint16_t, 3 > UKRailwayStationCode
UK railway station code.
TrainStation stationForUic(UICStation uic)
Lookup train station data by UIC station id.
TrainStation stationForIndianRailwaysStationCode(const QString &code)
Lookup train station data by Indian Railways station code.
TrainStation stationForSncfStationId(SncfStationId sncfId)
Lookup train station data by SNCF station id.
KnowledgeDb::CountryId countryForAirport(IataCode iataCode)
Returns the country the airport with IATA code iataCode is in.
Definition airportdb.cpp:50
TrainStation stationForVRStationCode(VRStationCode vrStation)
Lookup train station data by VR (Finland) station code.
TrainStation stationForIataCode(IataCode iataCode)
Lookup train station data by IATA location code.
TrainStation stationForBenerailId(BenerailStationId id)
Lookup train station data by Benerail station identifier.
AlphaId< uint16_t, 3 > AmtrakStationCode
Amtrak staion codes.
GeoCoordinates geo(const QVariant &location)
Returns the geo coordinates of a given location.
bool isBefore(const QVariant &lhs, const QVariant &rhs)
Sorting function for top-level reservation/visit/event elements.
Definition sortutil.cpp:169
QString clean(const QString &s)
Cleans up extra white spaces and XML entities from s.
QString simplifiedNoPlaceholder(const QString &s)
Same as QString::simplified() and dropping everything that just contains punctuation or dash characer...
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
KI18NLOCALEDATA_EXPORT KCountry country(const char *ianaId)
bool isValid(int year, int month, int day)
int year() const const
QDateTime addDays(qint64 ndays) const const
QDate date() const const
bool isValid() const const
int offsetFromUtc() const const
qint64 secsTo(const QDateTime &other) const const
void setDate(QDate date)
void setTimeZone(const QTimeZone &toZone)
Qt::TimeSpec timeSpec() const const
QTimeZone timeZone() const const
QDateTime toTimeZone(const QTimeZone &timeZone) const const
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString fromStdString(const std::string &str)
bool isEmpty() const const
bool isNull() const const
QString left(qsizetype n) const const
QString mid(qsizetype position, qsizetype n) const const
QString simplified() const const
qsizetype size() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
std::string toStdString() const const
QString toUpper() const const
QString trimmed() const const
CaseInsensitive
TimeZone
QTimeZone utc()
bool isValid() const const
T value() const const
Train station entry in the station table.
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Apr 11 2025 11:58:38 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.