KMime

parsers.cpp
1/*
2 kmime_parsers.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 SPDX-FileCopyrightText: 2001 the KMime authors.
6 See file AUTHORS for details
7
8 SPDX-License-Identifier: LGPL-2.0-or-later
9*/
10#include "parsers_p.h"
11#include "util_p.h"
12
13#include <QMimeDatabase>
14#include <QRegularExpression>
15
16#include <cctype>
17
18using namespace KMime::Parser;
19
20namespace KMime
21{
22namespace Parser
23{
24
25MultiPart::MultiPart(const QByteArray &src, const QByteArray &boundary)
26 : m_src(src)
27 , m_boundary(boundary)
28{
29}
30
31bool MultiPart::parse()
32{
33 QByteArray b = "--" + m_boundary;
34 QByteArray part;
35 qsizetype pos1 = 0;
36 qsizetype pos2 = 0;
37 auto blen = b.length();
38
39 m_parts.clear();
40
41 //find the first valid boundary
42 while (true) {
43 if ((pos1 = m_src.indexOf(b, pos1)) == -1 || pos1 == 0 ||
44 m_src[pos1 - 1] == '\n') { //valid boundary found or no boundary at all
45 break;
46 }
47 pos1 += blen; //boundary found but not valid => skip it;
48 }
49
50 if (pos1 > -1) {
51 pos1 += blen;
52 if (m_src[pos1] == '-' && m_src[pos1 + 1] == '-') {
53 // the only valid boundary is the end-boundary
54 // this message is *really* broken
55 pos1 = -1; //we give up
56 } else if ((pos1 - blen) > 1) { //preamble present
57 m_preamble = m_src.left(pos1 - blen - 1);
58 }
59 }
60
61 while (pos1 > -1 && pos2 > -1) {
62
63 //skip the rest of the line for the first boundary - the message-part starts here
64 if ((pos1 = m_src.indexOf('\n', pos1)) > -1) {
65 //now search the next linebreak
66 //now find the next valid boundary
67 pos2 = ++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
68 while (true) {
69 if ((pos2 = m_src.indexOf(b, pos2)) == -1 ||
70 m_src[pos2 - 1] == '\n') { //valid boundary or no more boundaries found
71 break;
72 }
73 pos2 += blen; //boundary is invalid => skip it;
74 }
75
76 if (pos2 == -1) { // no more boundaries found
77 part = m_src.mid(pos1, m_src.length() - pos1); //take the rest of the string
78 m_parts.append(part);
79 pos1 = -1;
80 pos2 = -1; //break;
81 } else {
82 part = m_src.mid(pos1, pos2 - pos1 - 1); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
83 m_parts.append(part);
84 pos2 += blen; //pos2 points now to the first character after the boundary
85 if (m_src[pos2] == '-' && m_src[pos2 + 1] == '-') { //end-boundary
86 pos1 = pos2 + 2; //pos1 points now to the character directly after the end-boundary
87
88 if ((pos1 = m_src.indexOf('\n', pos1)) > -1) { //skip the rest of this line
89 //everything after the end-boundary is considered as the epilouge
90 m_epilouge = m_src.mid(pos1 + 1, m_src.length() - pos1 - 1);
91 }
92 pos1 = -1;
93 pos2 = -1; //break
94 } else {
95 pos1 = pos2; //the search continues ...
96 }
97 }
98 }
99 }
100
101 return !m_parts.isEmpty();
102}
103
104//=============================================================================
105
106NonMimeParser::NonMimeParser(const QByteArray &src) :
107 m_src(src), m_partNr(-1), m_totalNr(-1)
108{
109}
110
111NonMimeParser::~NonMimeParser() = default;
112
113//==============================================================================
114
115[[nodiscard]] static qsizetype findUuencodeBeginMarker(const QByteArray &s, qsizetype startPos)
116{
117 auto idx = startPos;
118 while (true) {
119 idx = s.indexOf("begin ", idx);
120 if (idx < 0 || idx + 9 >= s.size()) {
121 break;
122 }
123 if (std::isdigit(s[idx + 6]) && std::isdigit(s[idx + 7]) && std::isdigit(s[idx + 8])) {
124 return idx;
125 }
126 idx += 6;
127 }
128 return -1;
129}
130
131UUEncoded::UUEncoded(const QByteArray &src, const QByteArray &head) :
132 NonMimeParser(src), m_head(head)
133{}
134
135bool UUEncoded::parse()
136{
137 qsizetype currentPos = 0;
138 bool success = true;
139 bool firstIteration = true;
140
141 while (success) {
142 qsizetype beginPos = currentPos;
143 qsizetype uuStart = currentPos;
144 qsizetype endPos = 0;
145 int lineCount = 0;
146 int MCount = 0;
147 qsizetype pos = 0;
148 qsizetype len = 0;
149 bool containsBegin = false;
150 bool containsEnd = false;
151 QByteArray tmp;
152 QByteArray fileName;
153
154 if ((beginPos = findUuencodeBeginMarker(m_src, currentPos)) > -1 &&
155 (beginPos == 0 || m_src.at(beginPos - 1) == '\n')) {
156 containsBegin = true;
157 uuStart = m_src.indexOf('\n', beginPos);
158 if (uuStart == -1) { //no more line breaks found, we give up
159 success = false;
160 break;
161 } else {
162 uuStart++; //points now at the beginning of the next line
163 }
164 } else {
165 beginPos = currentPos;
166 }
167
168 if (!containsBegin || (endPos = m_src.indexOf("\nend", (uuStart > 0) ? uuStart - 1 : 0)) == -1) {
169 endPos = m_src.length(); //no end found
170 } else {
171 containsEnd = true;
172 }
173
174 if ((containsBegin && containsEnd) || firstIteration) {
175
176 //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
177 //all lines in a uuencoded text start with 'M'
178 for (auto idx = uuStart; idx < endPos; idx++) {
179 if (m_src.at(idx) == '\n') {
180 lineCount++;
181 if (idx + 1 < endPos && m_src.at(idx + 1) == 'M') {
182 idx++;
183 MCount++;
184 }
185
186 // partial version of the check below, for things that can be evaluated
187 // while this loop is still counting
188 if ((lineCount - MCount) > 10) {
189 success = false;
190 break;
191 }
192 }
193 }
194
195 //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
196 if (MCount == 0 || (lineCount - MCount) > 10 || ((!containsBegin || !containsEnd) && (MCount < 15))) {
197 // harder check for split-articles
198 success = false;
199 break; //too many "non-M-Lines" found, we give up
200 }
201
202 const auto subject = KMime::extractHeader(m_head, "Subject");
203 if ((!containsBegin || !containsEnd) && !subject.isNull()) {
204 // message may be split up => parse subject
205 const QRegularExpression subjectRegex(QStringLiteral("[0-9]+/[0-9]+"));
206 const auto match = subjectRegex.match(QLatin1StringView(subject));
207 pos = match.capturedStart(0);
208 len = match.capturedLength(0);
209 if (pos != -1) {
210 tmp = subject.mid(pos, len);
211 pos = tmp.indexOf('/');
212 m_partNr = tmp.left(pos).toInt();
213 m_totalNr = tmp.right(tmp.length() - pos - 1).toInt();
214 } else {
215 success = false;
216 break; //no "part-numbers" found in the subject, we give up
217 }
218 }
219
220 //everything before "begin" is text
221 if (beginPos > 0) {
222 m_text.append(m_src.mid(currentPos, beginPos - currentPos));
223 }
224
225 if (containsBegin) {
226 //everything between "begin ### " and the next LF is considered as the filename
227 fileName = m_src.mid(beginPos + 10, uuStart - beginPos - 11);
228 } else {
229 fileName = "";
230 }
231 m_filenames.append(fileName);
232 //everything between "begin" and "end" is uuencoded
233 m_bins.append(m_src.mid(uuStart, endPos - uuStart + 1));
234 QMimeDatabase db;
235 m_mimeTypes.append(db.mimeTypeForFile(QString::fromUtf8(fileName), QMimeDatabase::MatchExtension).name().toUtf8());
236 firstIteration = false;
237
238 auto next = m_src.indexOf('\n', endPos + 1);
239 if (next == -1) { //no more line breaks found, we give up
240 success = false;
241 break;
242 } else {
243 next++; //points now at the beginning of the next line
244 }
245 currentPos = next;
246
247 } else {
248 success = false;
249 }
250 }
251
252 // append trailing text part of the article (only
253 if (!m_bins.isEmpty() || isPartial()) {
254 m_text.append(m_src.right(m_src.length() - currentPos));
255 return true;
256 }
257
258 return false;
259}
260
261//==============================================================================
262
263YENCEncoded::YENCEncoded(const QByteArray &src) :
264 NonMimeParser(src)
265{
266}
267
268bool YENCEncoded::yencMeta(QByteArray &src, const QByteArray &name, int *value)
269{
270 bool found = false;
271 QByteArray sought = name + '=';
272
273 auto iPos = src.indexOf(sought);
274 if (iPos > -1) {
275 auto pos1 = src.indexOf(' ', iPos);
276 auto pos2 = src.indexOf('\r', iPos);
277 auto pos3 = src.indexOf('\t', iPos);
278 auto pos4 = src.indexOf('\n', iPos);
279 if (pos2 >= 0 && (pos1 < 0 || pos1 > pos2)) {
280 pos1 = pos2;
281 }
282 if (pos3 >= 0 && (pos1 < 0 || pos1 > pos3)) {
283 pos1 = pos3;
284 }
285 if (pos4 >= 0 && (pos1 < 0 || pos1 > pos4)) {
286 pos1 = pos4;
287 }
288 iPos = src.lastIndexOf('=', pos1) + 1;
289 if (iPos < pos1) {
290 char c = src.at(iPos);
291 if (c >= '0' && c <= '9') {
292 found = true;
293 *value = src.mid(iPos, pos1 - iPos).toInt();
294 }
295 }
296 }
297 return found;
298}
299
300bool YENCEncoded::parse()
301{
302 qsizetype currentPos = 0;
303 bool success = true;
304 while (success) {
305 qsizetype beginPos = currentPos;
306 qsizetype yencStart = currentPos;
307 bool containsPart = false;
308 QByteArray fileName;
309
310 if ((beginPos = m_src.indexOf("=ybegin ", currentPos)) > -1 &&
311 (beginPos == 0 || m_src.at(beginPos - 1) == '\n')) {
312 yencStart = m_src.indexOf('\n', beginPos);
313 if (yencStart == -1) { // no more line breaks found, give up
314 success = false;
315 break;
316 } else {
317 yencStart++;
318 if (m_src.indexOf("=ypart", yencStart) == yencStart) {
319 containsPart = true;
320 yencStart = m_src.indexOf('\n', yencStart);
321 if (yencStart == -1) {
322 success = false;
323 break;
324 }
325 yencStart++;
326 }
327 }
328 // Try to identify yenc meta data
329
330 // Filenames can contain any embedded chars until end of line
331 QByteArray meta = m_src.mid(beginPos, yencStart - beginPos);
332 qsizetype namePos = meta.indexOf("name=");
333 if (namePos == -1) {
334 success = false;
335 break;
336 }
337 qsizetype eolPos = meta.indexOf('\r', namePos);
338 if (eolPos == -1) {
339 eolPos = meta.indexOf('\n', namePos);
340 }
341 if (eolPos == -1) {
342 success = false;
343 break;
344 }
345 fileName = meta.mid(namePos + 5, eolPos - (namePos + 5));
346
347 // Other metadata is integer
348 int yencLine;
349 if (!yencMeta(meta, "line", &yencLine)) {
350 success = false;
351 break;
352 }
353 int yencSize;
354 if (!yencMeta(meta, "size", &yencSize)) {
355 success = false;
356 break;
357 }
358
359 int partBegin;
360 int partEnd;
361 if (containsPart) {
362 if (!yencMeta(meta, "part", &m_partNr)) {
363 success = false;
364 break;
365 }
366 if (!yencMeta(meta, "begin", &partBegin) ||
367 !yencMeta(meta, "end", &partEnd)) {
368 success = false;
369 break;
370 }
371 if (!yencMeta(meta, "total", &m_totalNr)) {
372 m_totalNr = m_partNr + 1;
373 }
374 if (yencSize == partEnd - partBegin + 1) {
375 m_totalNr = 1;
376 } else {
377 yencSize = partEnd - partBegin + 1;
378 }
379 }
380
381 // We have a valid yenc header; now we extract the binary data
382 int totalSize = 0;
383 qsizetype pos = yencStart;
384 qsizetype len = m_src.length();
385 bool lineStart = true;
386 int lineLength = 0;
387 bool containsEnd = false;
388 QByteArray binary;
389 binary.resize(yencSize);
390 while (pos < len) {
391 int ch = m_src.at(pos);
392 if (ch < 0) {
393 ch += 256;
394 }
395 if (ch == '\r') {
396 if (lineLength != yencLine && totalSize != yencSize) {
397 break;
398 }
399 pos++;
400 } else if (ch == '\n') {
401 lineStart = true;
402 lineLength = 0;
403 pos++;
404 } else {
405 if (ch == '=') {
406 if (pos + 1 < len) {
407 ch = m_src.at(pos + 1);
408 if (lineStart && ch == 'y') {
409 containsEnd = true;
410 break;
411 }
412 pos += 2;
413 ch -= 64 + 42;
414 if (ch < 0) {
415 ch += 256;
416 }
417 if (totalSize >= yencSize) {
418 break;
419 }
420 binary[totalSize++] = ch;
421 lineLength++;
422 } else {
423 break;
424 }
425 } else {
426 ch -= 42;
427 if (ch < 0) {
428 ch += 256;
429 }
430 if (totalSize >= yencSize) {
431 break;
432 }
433 binary[totalSize++] = ch;
434 lineLength++;
435 pos++;
436 }
437 lineStart = false;
438 }
439 }
440
441 if (!containsEnd) {
442 success = false;
443 break;
444 }
445 if (totalSize != yencSize) {
446 success = false;
447 break;
448 }
449
450 // pos now points to =yend; get end data
451 eolPos = m_src.indexOf('\n', pos);
452 if (eolPos == -1) {
453 success = false;
454 break;
455 }
456 meta = m_src.mid(pos, eolPos - pos);
457 if (!yencMeta(meta, "size", &totalSize)) {
458 success = false;
459 break;
460 }
461 if (totalSize != yencSize) {
462 success = false;
463 break;
464 }
465
466 m_filenames.append(fileName);
467 QMimeDatabase db;
468 m_mimeTypes.append(db.mimeTypeForFile(QString::fromUtf8(fileName), QMimeDatabase::MatchExtension).name().toUtf8());
469 m_bins.append(binary);
470
471 //everything before "begin" is text
472 if (beginPos > 0) {
473 m_text.append(m_src.mid(currentPos, beginPos - currentPos));
474 }
475 currentPos = eolPos + 1;
476
477 } else {
478 success = false;
479 }
480 }
481
482 // append trailing text part of the article
483 if (!m_bins.isEmpty()) {
484 m_text.append(m_src.right(m_src.length() - currentPos));
485 return true;
486 }
487
488 return false;
489}
490
491} // namespace Parser
492
493} // namespace KMime
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
QString name(StandardAction id)
QAction * next(const QObject *recvr, const char *slot, QObject *parent)
QByteArray & append(QByteArrayView data)
char at(qsizetype i) const const
qsizetype indexOf(QByteArrayView bv, qsizetype from) const const
qsizetype lastIndexOf(QByteArrayView bv) const const
QByteArray left(qsizetype len) const const
qsizetype length() const const
QByteArray mid(qsizetype pos, qsizetype len) const const
void resize(qsizetype newSize, char c)
QByteArray right(qsizetype len) const const
int toInt(bool *ok, int base) const const
QMimeType mimeTypeForFile(const QFileInfo &fileInfo, MatchMode mode) const const
QString fromUtf8(QByteArrayView str)
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:48:31 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.