KItinerary

genericboardingpassextractor.cpp
1/*
2 SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "genericboardingpassextractor.h"
8#include "flightutil_p.h"
9#include "locationutil.h"
10#include "logging.h"
11#include "stringutil.h"
12
13#include "knowledgedb/airportdb.h"
14#include "knowledgedb/airportnametokenizer_p.h"
15#include "pdf/pdfdocument.h"
16#include "text/terminalfinder_p.h"
17#include "text/timefinder_p.h"
18
19#include <KItinerary/ExtractorDocumentNode>
20#include <KItinerary/ExtractorResult>
21#include <KItinerary/Flight>
22#include <KItinerary/Reservation>
23
24#include <QDebug>
25#include <QTimeZone>
26
27#include <chrono>
28#include <unordered_map>
29
30using namespace KItinerary;
31
32constexpr inline auto BOARDING_TO_DEPARTURE_MIN = std::chrono::minutes(20);
33constexpr inline auto BOARDING_TO_DEPARTURE_MAX = std::chrono::minutes(75);
34constexpr inline auto CHECKIN_TO_BOARDING_MIN = std::chrono::minutes(0);
35constexpr inline auto CHECKIN_TO_BOARDING_MAX = std::chrono::minutes(35);
36constexpr inline auto BOARDING_TO_GATE_CLOSE_MIN = std::chrono::minutes(15);
37constexpr inline auto BOARDING_TO_GATE_CLOSE_MAX = std::chrono::minutes(30);
38constexpr inline auto GATE_CLOSE_TO_DEPARTURE_MIN = std::chrono::minutes(10);
39constexpr inline auto GATE_CLOSE_TO_DEPARTURE_MAX = std::chrono::minutes(15);
40constexpr inline auto MINIMUM_FLIGHT_TIME = std::chrono::minutes(60);
41
42GenericBoardingPassExtractor::GenericBoardingPassExtractor()
43{
44 m_filter.setMimeType(QStringLiteral("internal/iata-bcbp"));
45 m_filter.setScope(ExtractorFilter::Descendants);
46}
47
48GenericBoardingPassExtractor::~GenericBoardingPassExtractor() = default;
49
51{
52 return QStringLiteral("<Generic PDF Boarding Pass>");
53}
54
56{
57 return node.content<PdfDocument*>() && m_filter.matches(node);
58}
59
60static void mergeOrAppend(QStringList &l, QStringView s)
61{
62 for (auto &n : l) {
63 if (n.compare(s, Qt::CaseInsensitive) == 0) {
65 return;
66 }
67 }
68 l.push_back(s.toString());
69}
70
71[[nodiscard]] static int airportDistance(KnowledgeDb::IataCode from, KnowledgeDb::IataCode to)
72{
73 const auto fromCoord = KnowledgeDb::coordinateForAirport(from);
74 const auto toCoord = KnowledgeDb::coordinateForAirport(to);
75 if (!fromCoord.isValid() || !toCoord.isValid()) {
76 return std::numeric_limits<int>::max();
77 }
78 return LocationUtil::distance({fromCoord.latitude, fromCoord.longitude}, {toCoord.latitude, toCoord.longitude});
79}
80
81[[nodiscard]] static bool isPlausibleBoardingTime(const QDateTime &boarding, const QDateTime &departure)
82{
83 const std::chrono::seconds boardingToDep(boarding.secsTo(departure));
84 return boardingToDep >= BOARDING_TO_DEPARTURE_MIN && boardingToDep <= BOARDING_TO_DEPARTURE_MAX;
85}
86
87[[nodiscard]] static std::chrono::seconds flightDuration(const QDateTime &fromTime, const QDateTime &toTime, KnowledgeDb::IataCode from, KnowledgeDb::IataCode to)
88{
89 // times are local, so convert them to the right timezone first
90 auto fromDt = fromTime;
92 auto toDt = toTime;
94 return std::chrono::seconds(fromDt.secsTo(toDt));
95}
96
97[[nodiscard]] static bool isPlausibleFlightTime(const QDateTime &fromTime, const QDateTime &toTime, KnowledgeDb::IataCode from, KnowledgeDb::IataCode to)
98{
99 const auto duration = flightDuration(fromTime, toTime, from, to);
100 if (duration < MINIMUM_FLIGHT_TIME) {
101 return false;
102 }
103
104 const auto distance = airportDistance(from, to);
105 return FlightUtil::isPlausibleDistanceForDuration(distance, duration);
106}
107
108[[nodiscard]] static bool isPlausibleCheckinClose(const QDateTime &checkinClose, const QDateTime &boarding)
109{
110 const std::chrono::seconds d(checkinClose.secsTo(boarding));
111 return d >= CHECKIN_TO_BOARDING_MIN && d <= CHECKIN_TO_BOARDING_MAX;
112}
113
114[[nodiscard]] static bool isPlausibleGateClose(const QDateTime &boarding, const QDateTime &gateClose, const QDateTime &departure)
115{
116 const std::chrono::seconds gateOpen(boarding.secsTo(gateClose));
117 const std::chrono::seconds gateCloseToDep(gateClose.secsTo(departure));
118
119 return gateOpen >= BOARDING_TO_GATE_CLOSE_MIN && gateOpen <= BOARDING_TO_GATE_CLOSE_MAX
120 && gateCloseToDep >= GATE_CLOSE_TO_DEPARTURE_MIN && gateCloseToDep <= GATE_CLOSE_TO_DEPARTURE_MAX;
121}
122
123[[nodiscard]] static bool conflictIfSet(const QDateTime &lhs, const QDateTime &rhs)
124{
125 return lhs.isValid() && rhs.isValid() && lhs != rhs;
126}
127
128static void applyFlightTimes(QList<QVariant> &result, const QDateTime &boarding, const QDateTime &dep, const QDateTime &arr)
129{
130 for (auto &res : result) {
131 auto flightRes = res.value<FlightReservation>();
132 auto flight = flightRes.reservationFor().value<Flight>();
133
134 // check if already set times match, otherwise discard the entire set
135 if (conflictIfSet(flight.boardingTime(), boarding) || conflictIfSet(flight.departureTime(), dep) || conflictIfSet(flight.arrivalTime(), arr)) {
136 continue;
137 }
138
139 // apply not yet set times
140 if (!flight.boardingTime().isValid() && boarding.isValid()) {
141 flight.setBoardingTime(boarding);
142 }
143 if (!flight.departureTime().isValid() && dep.isValid()) {
144 flight.setDepartureTime(dep);
145 }
146 if (!flight.arrivalTime().isValid() && arr.isValid()) {
147 flight.setArrivalTime(arr);
148 }
149 flightRes.setReservationFor(flight);
150 res = flightRes;
151 }
152}
153
155{
156 static TerminalFinder terminalFinder(u"^", u"(?=\\b|\\s|$)");
157
158 QList<QVariant> fullResult;
159
160 const auto pdf = node.content<PdfDocument*>();
161
162 std::vector<ExtractorDocumentNode> bcbpNodes;
163 m_filter.allMatches(node, bcbpNodes);
164 bcbpNodes.erase(std::remove_if(bcbpNodes.begin(), bcbpNodes.end(), [](const auto &node) {
165 return node.location().userType() != QMetaType::Int || node.result().isEmpty();
166 }), bcbpNodes.end());
167 std::sort(bcbpNodes.begin(), bcbpNodes.end(), [](const auto &lhs, const auto &rhs) { return lhs.location().toInt() < rhs.location().toInt(); });
168
169 for (auto it = bcbpNodes.begin(); it != bcbpNodes.end(); ++it) {
170 QDate departureDay;
171 KnowledgeDb::IataCode from, to;
172 QList<QVariant> result;
173
174 // 1 determine which airports we need to look for on the same page
175 const auto pageNum = (*it).location().toInt();
176 std::unordered_map<KnowledgeDb::IataCode, QStringList> airportNames;
177 std::unordered_map<KnowledgeDb::IataCode, QString> terminalNames;
178 for (auto it2 = it; it2 != bcbpNodes.end() && (*it2).location().toInt() == pageNum; ++it2) {
179 const auto flightReservations = (*it).result().result();
180 for (const auto &flightRes : flightReservations) {
181 const auto flight = flightRes.value<FlightReservation>().reservationFor().value<Flight>();
182 if (!flight.departureAirport().iataCode().isEmpty()) {
183 from = KnowledgeDb::IataCode{flight.departureAirport().iataCode()};
184 airportNames[from] = QStringList();
185 terminalNames[from] = QString();
186 }
187 if (!flight.arrivalAirport().iataCode().isEmpty()) {
188 to = KnowledgeDb::IataCode{flight.arrivalAirport().iataCode()};
189 airportNames[to] = QStringList();
190 terminalNames[to] = QString();
191 }
192 departureDay = flight.departureDay();
193 }
194 }
195
196 // 2 tokenize the page and scan for airport names
197 const auto page = pdf->page(pageNum);
198 qCDebug(Log) << "scanning page" << pageNum << "for airport names";
199 const auto pageText = page.text();
200 AirportNameTokenizer tokenizer(pageText);
201 while (tokenizer.hasNext()) {
202 const auto s = tokenizer.next();
203 if (s.compare(QLatin1StringView("international"),
204 Qt::CaseInsensitive) == 0) {
205 qCDebug(Log) << " ignoring" << s;
206 continue;
207 }
208
209 // IATA code of one of the airports
210 if (const auto code = KnowledgeDb::IataCode(s); !s.isNull() && airportNames.find(KnowledgeDb::IataCode{s}) != airportNames.end()) {
211 // also look for terminal information after the IATA code itself
212 const auto offset = s.size() + s.data() - pageText.data();
213 const auto res = terminalFinder.find(QStringView(pageText).mid(offset));
214 if (res.hasResult() && res.name != s.toString()) {
215 terminalNames[code] = res.name;
216 }
217
218 qCDebug(Log) << " found own IATA code" << s;
219 continue;
220 }
221
222 const auto iataCodes = KnowledgeDb::iataCodesFromName(s);
223 for (const auto code : iataCodes) {
224 auto it2 = airportNames.find(code);
225 if (it2 != airportNames.end()) {
226 qCDebug(Log) << " found candidate:" << s << iataCodes;
227 mergeOrAppend((*it2).second, s);
228
229 // look for a following terminal name at the position after s
230 const auto offset = s.size() + s.data() - pageText.data();
231 const auto res = terminalFinder.find(QStringView(pageText).mid(offset));
232 if (res.hasResult() && res.name != code.toString()) {
233 terminalNames[(*it2).first] = res.name;
234 }
235 }
236 }
237 }
238
239 // 3 augment the results with what we found
240 const auto flightReservations = (*it).result().result();
241 for (const auto &res : flightReservations) {
242 auto flightRes = res.value<FlightReservation>();
243 auto flight = flightRes.reservationFor().value<Flight>();
244 auto airport = flight.departureAirport();
245 airport.setName(airportNames[KnowledgeDb::IataCode{airport.iataCode()}].join(QLatin1Char(' ')));
246 flight.setDepartureAirport(airport);
247 flight.setDepartureTerminal(terminalNames[KnowledgeDb::IataCode{airport.iataCode()}]);
248 airport = flight.arrivalAirport();
249 airport.setName(airportNames[KnowledgeDb::IataCode{airport.iataCode()}].join(QLatin1Char(' ')));
250 flight.setArrivalAirport(airport);
251 flight.setArrivalTerminal(terminalNames[KnowledgeDb::IataCode{airport.iataCode()}]);
252 flightRes.setReservationFor(flight);
253 result.push_back(std::move(flightRes));
254 }
255
256 // 4 if there's only a single leg on this page, try to see if we can determine times
257 if (airportNames.size() == 2) {
258 TimeFinder timeFinder;
259 timeFinder.find(pageText);
260 std::vector<QDateTime> times;
261 for (const auto &res : timeFinder.results()) {
262 switch (res.dateTime.userType()) {
263 case QMetaType::QTime:
264 times.push_back(QDateTime(departureDay, res.dateTime.toTime()));
265 break;
267 if (res.dateTime.toDateTime().date() == departureDay) {
268 times.push_back(res.dateTime.toDateTime());
269 }
270 break;
271 case QMetaType::QDate:
272 default:
273 break;
274 }
275 }
276 std::sort(times.begin(), times.end());
277 times.erase(std::unique(times.begin(), times.end()), times.end());
278 qCDebug(Log) << times;
279 if (times.size() == 2) {
280 // boarding/departure only, and on the same day
281 if (isPlausibleBoardingTime(times[0], times[1]) && !isPlausibleFlightTime(times[0], times[1], from, to)) {
282 applyFlightTimes(result, times[0], times[1], {});
283 }
284 } else if (times.size() == 3) {
285 // boarding/departure/arrival on the same day
286 if (isPlausibleBoardingTime(times[0], times[1]) && isPlausibleFlightTime(times[1], times[2], from, to)) {
287 applyFlightTimes(result, times[0], times[1], times[2]);
288 // boarding/departure on the same day, arrival on the next day
289 } else if (isPlausibleBoardingTime(times[1], times[2]) && isPlausibleFlightTime(times[2], times[0].addDays(1), from, to)) {
290 applyFlightTimes(result, times[1], times[2], times[0].addDays(1));
291 }
292 // TODO handle boarding before midnight
293 // departure/arrival/duration
294 else if (isPlausibleFlightTime(times[1], times[2], from, to) && flightDuration(times[1], times[2], from, to) == std::chrono::minutes(times[0].time().hour() * 60 + times[0].time().minute())) {
295 applyFlightTimes(result, {}, times[1], times[2]);
296 }
297 } else if (times.size() == 4) {
298 // baggage drop or checkin close/boarding/departure/arrival
299 if (isPlausibleCheckinClose(times[0], times[1]) && isPlausibleBoardingTime(times[1], times[2]) && isPlausibleFlightTime(times[2], times[3], from, to)) {
300 applyFlightTimes(result, times[1], times[2], times[3]);
301 // boarding/gate close/departure/arrival
302 } else if (isPlausibleBoardingTime(times[0], times[2]) && isPlausibleGateClose(times[0], times[1], times[2]) && isPlausibleFlightTime(times[2], times[3], from, to)) {
303 applyFlightTimes(result, times[0], times[2], times[3]);
304 }
305 // TODO across midnight variants
306 }
307 }
308
309 fullResult += result;
310 }
311
312 return fullResult;
313}
A node in the extracted document object tree.
QJSValue content
The decoded content of this node.
Semantic data extraction engine.
void allMatches(const ExtractorDocumentNode &node, std::vector< ExtractorDocumentNode > &matches) const
Checks whether this filter applies to node.
bool matches(const QString &data) const
Check if data matches this filter.
@ Descendants
match any direct or indirect child nodes
Generic extraction result.
A flight reservation.
Definition reservation.h:90
A flight.
Definition flight.h:25
ExtractorResult extract(const ExtractorDocumentNode &node, const ExtractorEngine *engine) const override
Extract data from node.
bool canHandle(const KItinerary::ExtractorDocumentNode &node) const override
Fast check whether this extractor is applicable for node.
QString name() const override
Identifier for this extractor.
PDF document for extraction.
Definition pdfdocument.h:92
QTimeZone timezoneForAirport(IataCode iataCode)
Returns the timezone the airport with IATA code iataCode is in.
Definition airportdb.cpp:40
Coordinate coordinateForAirport(IataCode iataCode)
Returns the geographical coordinates the airport with IATA code iataCode is in.
Definition airportdb.cpp:30
std::vector< IataCode > iataCodesFromName(QStringView name)
Returns all possible IATA code candidates for the given airport name.
int distance(const GeoCoordinates &coord1, const GeoCoordinates &coord2)
Computes the distance between to geo coordinates in meters.
QStringView betterString(QStringView lhs, QStringView rhs)
Assuming both sides are describing the same thing, this tries to find the "better" string.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
KOSM_EXPORT double distance(const std::vector< const OSM::Node * > &path, Coordinate coord)
bool isValid() const const
qint64 secsTo(const QDateTime &other) const const
void setTimeZone(const QTimeZone &toZone)
void push_back(parameter_type value)
int compare(QChar ch) const const
const_pointer data() const const
bool isNull() const const
qsizetype size() const const
QString toString() const const
CaseInsensitive
T value() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 18 2024 12:09:58 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.