KItinerary

genericboardingpassextractor.cpp
1/*
2 SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "genericboardingpassextractor.h"
8#include "flightutil_p.h"
9#include "locationutil.h"
10#include "logging.h"
11#include "stringutil.h"
12
13#include "knowledgedb/airportdb.h"
14#include "knowledgedb/airportnametokenizer_p.h"
15#include "pdf/pdfdocument.h"
16#include "text/terminalfinder_p.h"
17#include "text/timefinder_p.h"
18
19#include <KItinerary/ExtractorDocumentNode>
20#include <KItinerary/ExtractorResult>
21#include <KItinerary/Flight>
22#include <KItinerary/Reservation>
23
24#include <QDebug>
25#include <QTimeZone>
26
27#include <chrono>
28#include <unordered_map>
29
30using namespace KItinerary;
31
32constexpr inline auto BOARDING_TO_DEPARTURE_MIN = std::chrono::minutes(20);
33constexpr inline auto BOARDING_TO_DEPARTURE_MAX = std::chrono::minutes(75);
34constexpr inline auto CHECKIN_TO_BOARDING_MIN = std::chrono::minutes(0);
35constexpr inline auto CHECKIN_TO_BOARDING_MAX = std::chrono::minutes(35);
36constexpr inline auto BOARDING_TO_GATE_CLOSE_MIN = std::chrono::minutes(15);
37constexpr inline auto BOARDING_TO_GATE_CLOSE_MAX = std::chrono::minutes(30);
38constexpr inline auto GATE_CLOSE_TO_DEPARTURE_MIN = std::chrono::minutes(10);
39constexpr inline auto GATE_CLOSE_TO_DEPARTURE_MAX = std::chrono::minutes(15);
40constexpr inline auto MINIMUM_FLIGHT_TIME = std::chrono::minutes(60);
41
42GenericBoardingPassExtractor::GenericBoardingPassExtractor()
43{
44 m_filter.setMimeType(QStringLiteral("internal/iata-bcbp"));
45 m_filter.setScope(ExtractorFilter::Descendants);
46}
47
48GenericBoardingPassExtractor::~GenericBoardingPassExtractor() = default;
49
51{
52 return QStringLiteral("<Generic PDF Boarding Pass>");
53}
54
56{
57 return node.content<PdfDocument*>() && m_filter.matches(node);
58}
59
60static void mergeOrAppend(QStringList &l, QStringView s)
61{
62 for (auto &n : l) {
63 if (n.compare(s, Qt::CaseInsensitive) == 0) {
65 return;
66 }
67 }
68 l.push_back(s.toString());
69}
70
71[[nodiscard]] static int airportDistance(KnowledgeDb::IataCode from, KnowledgeDb::IataCode to)
72{
73 const auto fromCoord = KnowledgeDb::coordinateForAirport(from);
74 const auto toCoord = KnowledgeDb::coordinateForAirport(to);
75 if (!fromCoord.isValid() || !toCoord.isValid()) {
76 return std::numeric_limits<int>::max();
77 }
78 return LocationUtil::distance({fromCoord.latitude, fromCoord.longitude}, {toCoord.latitude, toCoord.longitude});
79}
80
81[[nodiscard]] static bool isPlausibleBoardingTime(const QDateTime &boarding, const QDateTime &departure)
82{
83 const std::chrono::seconds boardingToDep(boarding.secsTo(departure));
84 return boardingToDep >= BOARDING_TO_DEPARTURE_MIN && boardingToDep <= BOARDING_TO_DEPARTURE_MAX;
85}
86
87[[nodiscard]] static std::chrono::seconds flightDuration(const QDateTime &fromTime, const QDateTime &toTime, KnowledgeDb::IataCode from, KnowledgeDb::IataCode to)
88{
89 // times are local, so convert them to the right timezone first
90 auto fromDt = fromTime;
92 auto toDt = toTime;
94 return std::chrono::seconds(fromDt.secsTo(toDt));
95}
96
97[[nodiscard]] static bool isPlausibleFlightTime(const QDateTime &fromTime, const QDateTime &toTime, KnowledgeDb::IataCode from, KnowledgeDb::IataCode to)
98{
99 const auto duration = flightDuration(fromTime, toTime, from, to);
100 if (duration < MINIMUM_FLIGHT_TIME) {
101 return false;
102 }
103
104 const auto distance = airportDistance(from, to);
105 return FlightUtil::isPlausibleDistanceForDuration(distance, duration);
106}
107
108[[nodiscard]] static bool isPlausibleCheckinClose(const QDateTime &checkinClose, const QDateTime &boarding)
109{
110 const std::chrono::seconds d(checkinClose.secsTo(boarding));
111 return d >= CHECKIN_TO_BOARDING_MIN && d <= CHECKIN_TO_BOARDING_MAX;
112}
113
114[[nodiscard]] static bool isPlausibleGateClose(const QDateTime &boarding, const QDateTime &gateClose, const QDateTime &departure)
115{
116 const std::chrono::seconds gateOpen(boarding.secsTo(gateClose));
117 const std::chrono::seconds gateCloseToDep(gateClose.secsTo(departure));
118
119 return gateOpen >= BOARDING_TO_GATE_CLOSE_MIN && gateOpen <= BOARDING_TO_GATE_CLOSE_MAX
120 && gateCloseToDep >= GATE_CLOSE_TO_DEPARTURE_MIN && gateCloseToDep <= GATE_CLOSE_TO_DEPARTURE_MAX;
121}
122
123[[nodiscard]] static bool conflictIfSet(const QDateTime &lhs, const QDateTime &rhs)
124{
125 return lhs.isValid() && rhs.isValid() && lhs != rhs;
126}
127
128static void applyFlightTimes(QList<QVariant> &result, const QDateTime &boarding, const QDateTime &dep, const QDateTime &arr)
129{
130 for (auto &res : result) {
131 auto flightRes = res.value<FlightReservation>();
132 auto flight = flightRes.reservationFor().value<Flight>();
133
134 // check if already set times match, otherwise discard the entire set
135 if (conflictIfSet(flight.boardingTime(), boarding) || conflictIfSet(flight.departureTime(), dep) || conflictIfSet(flight.arrivalTime(), arr)) {
136 continue;
137 }
138
139 // apply not yet set times
140 if (!flight.boardingTime().isValid() && boarding.isValid()) {
141 flight.setBoardingTime(boarding);
142 }
143 if (!flight.departureTime().isValid() && dep.isValid()) {
144 flight.setDepartureTime(dep);
145 }
146 if (!flight.arrivalTime().isValid() && arr.isValid()) {
147 flight.setArrivalTime(arr);
148 }
149 flightRes.setReservationFor(flight);
150 res = flightRes;
151 }
152}
153
155{
156 static TerminalFinder terminalFinder(u"^", u"(?=\\b|\\s|$)");
157
158 QList<QVariant> fullResult;
159
160 const auto pdf = node.content<PdfDocument*>();
161
162 std::vector<ExtractorDocumentNode> bcbpNodes;
163 m_filter.allMatches(node, bcbpNodes);
164 bcbpNodes.erase(std::remove_if(bcbpNodes.begin(), bcbpNodes.end(), [](const auto &node) {
165 return node.location().userType() != QMetaType::Int || node.result().isEmpty();
166 }), bcbpNodes.end());
167 std::sort(bcbpNodes.begin(), bcbpNodes.end(), [](const auto &lhs, const auto &rhs) { return lhs.location().toInt() < rhs.location().toInt(); });
168
169 for (auto it = bcbpNodes.begin(); it != bcbpNodes.end(); ++it) {
170 QDate departureDay;
171 KnowledgeDb::IataCode from, to;
172 QList<QVariant> result;
173
174 // 1 determine which airports we need to look for on the same page
175 const auto pageNum = (*it).location().toInt();
176 std::unordered_map<KnowledgeDb::IataCode, QStringList> airportNames;
177 std::unordered_map<KnowledgeDb::IataCode, QString> terminalNames;
178 for (auto it2 = it; it2 != bcbpNodes.end() && (*it2).location().toInt() == pageNum; ++it2) {
179 const auto flightReservations = (*it).result().result();
180 for (const auto &flightRes : flightReservations) {
181 const auto flight = flightRes.value<FlightReservation>().reservationFor().value<Flight>();
182 if (!flight.departureAirport().iataCode().isEmpty()) {
183 from = KnowledgeDb::IataCode{flight.departureAirport().iataCode()};
184 airportNames[from] = QStringList();
185 terminalNames[from] = QString();
186 }
187 if (!flight.arrivalAirport().iataCode().isEmpty()) {
188 to = KnowledgeDb::IataCode{flight.arrivalAirport().iataCode()};
189 airportNames[to] = QStringList();
190 terminalNames[to] = QString();
191 }
192 departureDay = flight.departureDay();
193 }
194 }
195
196 // 2 tokenize the page and scan for airport names
197 const auto page = pdf->page(pageNum);
198 qCDebug(Log) << "scanning page" << pageNum << "for airport names";
199 const auto pageText = page.text();
200 AirportNameTokenizer tokenizer(pageText);
201 while (tokenizer.hasNext()) {
202 const auto s = tokenizer.next();
203 if (s.compare(QLatin1StringView("international"),
204 Qt::CaseInsensitive) == 0) {
205 qCDebug(Log) << " ignoring" << s;
206 continue;
207 }
208
209 // IATA code of one of the airports
210 if (const auto code = KnowledgeDb::IataCode(s); !s.isNull() && airportNames.find(KnowledgeDb::IataCode{s}) != airportNames.end()) {
211 // also look for terminal information after the IATA code itself
212 const auto offset = s.size() + s.data() - pageText.data();
213 const auto res = terminalFinder.find(QStringView(pageText).mid(offset));
214 if (res.hasResult() && res.name != s.toString()) {
215 terminalNames[code] = res.name;
216 }
217
218 qCDebug(Log) << " found own IATA code" << s;
219 continue;
220 }
221
222 const auto iataCodes = KnowledgeDb::iataCodesFromName(s);
223 for (const auto code : iataCodes) {
224 auto it2 = airportNames.find(code);
225 if (it2 != airportNames.end()) {
226 qCDebug(Log) << " found candidate:" << s << iataCodes;
227 mergeOrAppend((*it2).second, s);
228
229 // look for a following terminal name at the position after s
230 const auto offset = s.size() + s.data() - pageText.data();
231 const auto res = terminalFinder.find(QStringView(pageText).mid(offset));
232 if (res.hasResult() && res.name != code.toString()) {
233 terminalNames[(*it2).first] = res.name;
234 }
235 }
236 }
237 }
238
239 // 3 augment the results with what we found
240 const auto flightReservations = (*it).result().result();
241 for (const auto &res : flightReservations) {
242 auto flightRes = res.value<FlightReservation>();
243 auto flight = flightRes.reservationFor().value<Flight>();
244 auto airport = flight.departureAirport();
245 if (airport.name().isEmpty()) {
246 airport.setName(airportNames[KnowledgeDb::IataCode{airport.iataCode()}].join(QLatin1Char(' ')));
247 }
248 flight.setDepartureAirport(airport);
249 if (flight.departureTerminal().isEmpty()) {
250 flight.setDepartureTerminal(terminalNames[KnowledgeDb::IataCode{airport.iataCode()}]);
251 }
252 airport = flight.arrivalAirport();
253 if (airport.name().isEmpty()) {
254 airport.setName(airportNames[KnowledgeDb::IataCode{airport.iataCode()}].join(QLatin1Char(' ')));
255 }
256 flight.setArrivalAirport(airport);
257 if (flight.arrivalTerminal().isEmpty()) {
258 flight.setArrivalTerminal(terminalNames[KnowledgeDb::IataCode{airport.iataCode()}]);
259 }
260 flightRes.setReservationFor(flight);
261 result.push_back(std::move(flightRes));
262 }
263
264 // 4 if there's only a single leg on this page, try to see if we can determine times
265 if (airportNames.size() == 2) {
266 TimeFinder timeFinder;
267 timeFinder.find(pageText);
268 std::vector<QDateTime> times;
269 for (const auto &res : timeFinder.results()) {
270 switch (res.dateTime.userType()) {
271 case QMetaType::QTime:
272 times.push_back(QDateTime(departureDay, res.dateTime.toTime()));
273 break;
275 if (res.dateTime.toDateTime().date() == departureDay) {
276 times.push_back(res.dateTime.toDateTime());
277 }
278 break;
279 case QMetaType::QDate:
280 default:
281 break;
282 }
283 }
284 std::sort(times.begin(), times.end());
285 times.erase(std::unique(times.begin(), times.end()), times.end());
286 qCDebug(Log) << times;
287 if (times.size() == 2) {
288 // boarding/departure only, and on the same day
289 if (isPlausibleBoardingTime(times[0], times[1]) && !isPlausibleFlightTime(times[0], times[1], from, to)) {
290 applyFlightTimes(result, times[0], times[1], {});
291 }
292 } else if (times.size() == 3) {
293 // boarding/departure/arrival on the same day
294 if (isPlausibleBoardingTime(times[0], times[1]) && isPlausibleFlightTime(times[1], times[2], from, to)) {
295 applyFlightTimes(result, times[0], times[1], times[2]);
296 // boarding/departure on the same day, arrival on the next day
297 } else if (isPlausibleBoardingTime(times[1], times[2]) && isPlausibleFlightTime(times[2], times[0].addDays(1), from, to)) {
298 applyFlightTimes(result, times[1], times[2], times[0].addDays(1));
299 }
300 // TODO handle boarding before midnight
301 // departure/arrival/duration
302 else if (isPlausibleFlightTime(times[1], times[2], from, to) && flightDuration(times[1], times[2], from, to) == std::chrono::minutes(times[0].time().hour() * 60 + times[0].time().minute())) {
303 applyFlightTimes(result, {}, times[1], times[2]);
304 }
305 } else if (times.size() == 4) {
306 // baggage drop or checkin close/boarding/departure/arrival
307 if (isPlausibleCheckinClose(times[0], times[1]) && isPlausibleBoardingTime(times[1], times[2]) && isPlausibleFlightTime(times[2], times[3], from, to)) {
308 applyFlightTimes(result, times[1], times[2], times[3]);
309 // boarding/gate close/departure/arrival
310 } else if (isPlausibleBoardingTime(times[0], times[2]) && isPlausibleGateClose(times[0], times[1], times[2]) && isPlausibleFlightTime(times[2], times[3], from, to)) {
311 applyFlightTimes(result, times[0], times[2], times[3]);
312 }
313 // TODO across midnight variants
314 }
315 }
316
317 fullResult += result;
318 }
319
320 return fullResult;
321}
A node in the extracted document object tree.
QJSValue content
The decoded content of this node.
Semantic data extraction engine.
@ Descendants
match any direct or indirect child nodes
Generic extraction result.
A flight reservation.
Definition reservation.h:90
A flight.
Definition flight.h:25
ExtractorResult extract(const ExtractorDocumentNode &node, const ExtractorEngine *engine) const override
Extract data from node.
bool canHandle(const KItinerary::ExtractorDocumentNode &node) const override
Fast check whether this extractor is applicable for node.
QString name() const override
Identifier for this extractor.
PDF document for extraction.
Definition pdfdocument.h:92
QTimeZone timezoneForAirport(IataCode iataCode)
Returns the timezone the airport with IATA code iataCode is in.
Definition airportdb.cpp:40
Coordinate coordinateForAirport(IataCode iataCode)
Returns the geographical coordinates the airport with IATA code iataCode is in.
Definition airportdb.cpp:30
AlphaId< uint16_t, 3 > IataCode
IATA airport code.
Definition iatacode.h:17
std::vector< IataCode > iataCodesFromName(QStringView name)
Returns all possible IATA code candidates for the given airport name.
int distance(const GeoCoordinates &coord1, const GeoCoordinates &coord2)
Computes the distance between to geo coordinates in meters.
QStringView betterString(QStringView lhs, QStringView rhs)
Assuming both sides are describing the same thing, this tries to find the "better" string.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
KOSM_EXPORT double distance(const std::vector< const OSM::Node * > &path, Coordinate coord)
bool isValid() const const
qint64 secsTo(const QDateTime &other) const const
void setTimeZone(const QTimeZone &toZone)
void push_back(parameter_type value)
int compare(QChar ch) const const
const_pointer data() const const
bool isNull() const const
qsizetype size() const const
QString toString() const const
CaseInsensitive
T value() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 24 2025 11:52:35 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.