KMime

headerparsing.cpp
1/* -*- c++ -*-
2 kmime_header_parsing.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 SPDX-FileCopyrightText: 2001-2002 Marc Mutz <mutz@kde.org>
6
7 SPDX-License-Identifier: LGPL-2.0-or-later
8*/
9
10#include "headerparsing.h"
11#include "headerparsing_p.h"
12
13#include "headerfactory_p.h"
14#include "headers.h"
15#include "headers_p.h"
16#include "util.h"
17#include "util_p.h"
18#include "codecs_p.h"
19#include "kmime_debug.h"
20#include "warning_p.h"
21
22#include <KCodecs>
23
24#include <QStringDecoder>
25#include <QTimeZone>
26
27#include <cassert>
28#include <cctype> // for isdigit
29
30using namespace KMime;
31using namespace KMime::Types;
32
33namespace KMime
34{
35
36 namespace Types
37 {
38 // Optimization to avoid allocating QStrings when the value isn't encoded
39 struct KMIME_EXPORT QStringOrQPair {
40 QString qstring;
41 QByteArrayView view;
42 };
43 } // namespace Types
44
45namespace HeaderParsing
46{
47
48// parse the encoded-word (scursor points to after the initial '=')
49bool parseEncodedWord(const char *&scursor, const char *const send,
50 QString &result, QByteArray &language,
51 QByteArray &usedCS, const QByteArray &defaultCS)
52{
53 // make sure the caller already did a bit of the work.
54 assert(*(scursor - 1) == '=');
55
56 //
57 // STEP 1:
58 // scan for the charset/language portion of the encoded-word
59 //
60
61 char ch = *scursor++;
62
63 if (ch != '?') {
64 // qCDebug(KMIME_LOG) << "first";
65 //KMIME_WARN_PREMATURE_END_OF( EncodedWord );
66 return false;
67 }
68
69 // remember start of charset (i.e. just after the initial "=?") and
70 // language (just after the first '*') fields:
71 const char *charsetStart = scursor;
72 const char *languageStart = nullptr;
73
74 // find delimiting '?' (and the '*' separating charset and language
75 // tags, if any):
76 for (; scursor != send ; scursor++) {
77 if (*scursor == '?') {
78 break;
79 } else if (*scursor == '*' && languageStart == nullptr) {
80 languageStart = scursor + 1;
81 }
82 }
83
84 // not found? can't be an encoded-word!
85 if (scursor == send || *scursor != '?') {
86 // qCDebug(KMIME_LOG) << "second";
87 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
88 return false;
89 }
90
91 // extract the language information, if any (if languageStart is 0,
92 // language will be null, too):
93 QByteArray maybeLanguage(languageStart, scursor - languageStart);
94 // extract charset information (keep in mind: the size given to the
95 // ctor is one off due to the \0 terminator):
96 QByteArray maybeCharset(charsetStart,
97 (languageStart ? languageStart - 1 : scursor) - charsetStart);
98
99 //
100 // STEP 2:
101 // scan for the encoding portion of the encoded-word
102 //
103
104 // remember start of encoding (just _after_ the second '?'):
105 scursor++;
106 const char *encodingStart = scursor;
107
108 // find next '?' (ending the encoding tag):
109 for (; scursor != send ; scursor++) {
110 if (*scursor == '?') {
111 break;
112 }
113 }
114
115 // not found? Can't be an encoded-word!
116 if (scursor == send || *scursor != '?') {
117 // qCDebug(KMIME_LOG) << "third";
118 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
119 return false;
120 }
121
122 // extract the encoding information:
123 QByteArray maybeEncoding(encodingStart, scursor - encodingStart);
124
125 // qCDebug(KMIME_LOG) << "parseEncodedWord: found charset == \"" << maybeCharset
126 // << "\"; language == \"" << maybeLanguage
127 // << "\"; encoding == \"" << maybeEncoding << "\"";
128
129 //
130 // STEP 3:
131 // scan for encoded-text portion of encoded-word
132 //
133
134 // remember start of encoded-text (just after the third '?'):
135 scursor++;
136 const char *encodedTextStart = scursor;
137
138 // find the '?=' sequence (ending the encoded-text):
139 for (; scursor != send ; scursor++) {
140 if (*scursor == '?') {
141 if (scursor + 1 != send) {
142 if (*(scursor + 1) != '=') { // We expect a '=' after the '?', but we got something else; ignore
143 KMIME_WARN << "Stray '?' in q-encoded word, ignoring this.";
144 continue;
145 } else { // yep, found a '?=' sequence
146 scursor += 2;
147 break;
148 }
149 } else { // The '?' is the last char, but we need a '=' after it!
150 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
151 return false;
152 }
153 }
154 }
155
156 if (*(scursor - 2) != '?' || *(scursor - 1) != '=' ||
157 scursor < encodedTextStart + 2) {
158 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
159 return false;
160 }
161
162 // set end sentinel for encoded-text:
163 const char *const encodedTextEnd = scursor - 2;
164
165 //
166 // STEP 4:
167 // setup decoders for the transfer encoding and the charset
168 //
169
170 // try if there's a codec for the encoding found:
171 KCodecs::Codec *codec = KCodecs::Codec::codecForName(maybeEncoding);
172 if (!codec) {
173 KMIME_WARN_UNKNOWN(Encoding, maybeEncoding);
174 return false;
175 }
176
177 // get an instance of a corresponding decoder:
178 KCodecs::Decoder *dec = codec->makeDecoder();
179 assert(dec);
180
181 // try if there's a (text)codec for the charset found:
182 QStringDecoder textCodec;
183 if (maybeCharset.isEmpty()) {
184 textCodec = QStringDecoder(defaultCS.constData());
185 if (!textCodec.isValid()) {
187 }
188 usedCS = cachedCharset(defaultCS);
189 } else {
190 textCodec = QStringDecoder(maybeCharset.constData());
191 if (textCodec.isValid()) { //no suitable codec found => use default charset
192 usedCS = cachedCharset(defaultCS);
193 } else {
195 usedCS = cachedCharset(maybeCharset);
196 }
197 }
198
199 if (!textCodec.isValid()) {
200 KMIME_WARN_UNKNOWN(Charset, maybeCharset);
201 delete dec;
202 return false;
203 };
204
205 // qCDebug(KMIME_LOG) << "mimeName(): \"" << textCodec->name() << "\"";
206
207 // allocate a temporary buffer to store the 8bit text:
208 const auto encodedTextLength = encodedTextEnd - encodedTextStart;
209 QByteArray buffer;
210 buffer.resize(codec->maxDecodedSizeFor(encodedTextLength));
211 char *bbegin = buffer.data();
212 char *bend = bbegin + buffer.length();
213
214 //
215 // STEP 5:
216 // do the actual decoding
217 //
218
219 if (!dec->decode(encodedTextStart, encodedTextEnd, bbegin, bend)) {
220 KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor("
221 << encodedTextLength << ")\nresult may be truncated";
222 }
223
224 result = textCodec.decode(QByteArrayView(buffer.data(), bbegin - buffer.data()));
225
226 // qCDebug(KMIME_LOG) << "result now: \"" << result << "\"";
227 // cleanup:
228 delete dec;
229 language = maybeLanguage;
230
231 return true;
232}
233
234static inline void eatWhiteSpace(const char *&scursor, const char *const send)
235{
236 while (scursor != send &&
237 (*scursor == ' ' || *scursor == '\n' ||
238 *scursor == '\t' || *scursor == '\r')) {
239 scursor++;
240 }
241}
242
243bool parseAtom(const char*&scursor, const char *const send,
244 QByteArrayView &result, bool allow8Bit)
245{
246 bool success = false;
247 const char *start = scursor;
248
249 while (scursor != send) {
250 signed char ch = *scursor++;
251 if (ch > 0 && isAText(ch)) {
252 // AText: OK
253 success = true;
254 } else if (allow8Bit && ch < 0) {
255 // 8bit char: not OK, but be tolerant.
256 KMIME_WARN_8BIT(ch);
257 success = true;
258 } else {
259 // CTL or special - marking the end of the atom:
260 // re-set sursor to point to the offending
261 // char and return:
262 scursor--;
263 break;
264 }
265 }
266 result = QByteArrayView(start, scursor - start);
267 return success;
268}
269
270bool parseToken(const char*&scursor, const char *const send,
271 QByteArrayView &result, ParseTokenFlags flags)
272{
273 bool success = false;
274 const char *start = scursor;
275
276 while (scursor != send) {
277 signed char ch = *scursor++;
278 if (ch > 0 && isTText(ch)) {
279 // TText: OK
280 success = true;
281 } else if ((flags & ParseTokenAllow8Bit) && ch < 0) {
282 // 8bit char: not OK, but be tolerant.
283 KMIME_WARN_8BIT(ch);
284 success = true;
285 } else if ((flags & ParseTokenRelaxedTText) && ch == '/') {
286 success = true;
287 } else {
288 // CTL or tspecial - marking the end of the atom:
289 // re-set sursor to point to the offending
290 // char and return:
291 scursor--;
292 break;
293 }
294 }
295 result = QByteArrayView(start, scursor - start);
296 return success;
297}
298
299#define READ_ch_OR_FAIL if ( scursor == send ) { \
300 KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \
301 return false; \
302 } else { \
303 ch = *scursor++; \
304 }
305
306// known issues:
307//
308// - doesn't handle quoted CRLF
309
310bool parseGenericQuotedString(const char *&scursor, const char *const send,
311 QString &result, bool isCRLF,
312 const char openChar, const char closeChar)
313{
314 // We are in a quoted-string or domain-literal or comment and the
315 // cursor points to the first char after the openChar.
316 // We will apply unfolding and quoted-pair removal.
317 // We return when we either encounter the end or unescaped openChar
318 // or closeChar.
319 assert(*(scursor - 1) == openChar || *(scursor - 1) == closeChar);
320
321 while (scursor != send) {
322 char ch = *scursor++;
323
324 if (ch == closeChar || ch == openChar) {
325 // end of quoted-string or another opening char:
326 // let caller decide what to do.
327 return true;
328 }
329
330 switch (ch) {
331 case '\\': // quoted-pair
332 // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
333 READ_ch_OR_FAIL;
334 KMIME_WARN_IF_8BIT(ch);
335 result += QLatin1Char(ch);
336 break;
337 case '\r':
338 // ###
339 // The case of lonely '\r' is easy to solve, as they're
340 // not part of Unix Line-ending conventions.
341 // But I see a problem if we are given Unix-native
342 // line-ending-mails, where we cannot determine anymore
343 // whether a given '\n' was part of a CRLF or was occurring
344 // on its own.
345 READ_ch_OR_FAIL;
346 if (ch != '\n') {
347 // CR on its own...
348 KMIME_WARN_LONE(CR);
349 result += QLatin1Char('\r');
350 scursor--; // points to after the '\r' again
351 } else {
352 // CRLF encountered.
353 // lookahead: check for folding
354 READ_ch_OR_FAIL;
355 if (ch == ' ' || ch == '\t') {
356 // correct folding;
357 // position cursor behind the CRLF WSP (unfolding)
358 // and add the WSP to the result
359 result += QLatin1Char(ch);
360 } else {
361 // this is the "shouldn't happen"-case. There is a CRLF
362 // inside a quoted-string without it being part of FWS.
363 // We take it verbatim.
364 KMIME_WARN_NON_FOLDING(CRLF);
365 result += QLatin1StringView("\r\n");
366 // the cursor is decremented again, we need not
367 // duplicate the whole switch here. "ch" could've been
368 // everything (incl. openChar or closeChar).
369 scursor--;
370 }
371 }
372 break;
373 case '\n':
374 // Note: CRLF has been handled above already!
375 // ### LF needs special treatment, depending on whether isCRLF
376 // is true (we can be sure a lonely '\n' was meant this way) or
377 // false ('\n' alone could have meant LF or CRLF in the original
378 // message. This parser assumes CRLF iff the LF is followed by
379 // either WSP (folding) or NULL (premature end of quoted-string;
380 // Should be fixed, since NULL is allowed as per rfc822).
381 READ_ch_OR_FAIL;
382 if (!isCRLF && (ch == ' ' || ch == '\t')) {
383 // folding
384 // correct folding
385 result += QLatin1Char(ch);
386 } else {
387 // non-folding
388 KMIME_WARN_LONE(LF);
389 result += QLatin1Char('\n');
390 // pos is decremented, so's we need not duplicate the whole
391 // switch here. ch could've been everything (incl. <">, "\").
392 scursor--;
393 }
394 break;
395 case '=': {
396 // ### Work around broken clients that send encoded words in quoted-strings
397 // For example, older KMail versions.
398 if (scursor == send) {
399 break;
400 }
401
402 const char *oldscursor = scursor;
403 QString tmp;
404 QByteArray lang;
405 QByteArray charset;
406 if (*scursor++ == '?') {
407 --scursor;
408 if (parseEncodedWord(scursor, send, tmp, lang, charset)) {
409 result += tmp;
410 //qDebug() << " tmp " << tmp;
411 if (scursor == send) {
412 break;
413 } else if (*scursor++ == ' ') { //Workaround Bug 362650 thunderbird add space for each new line
414 if (scursor == send) {
415 --scursor;
416 break;
417 } else if (*scursor++ == '=') {
418 if (scursor == send) {
419 --scursor;
420 --scursor;
421 break;
422 } else if (*scursor++ == '?') {
423 --scursor;
424 --scursor;
425 break;
426 }
427 } else {
428 --scursor;
429 --scursor;
430 }
431 } else {
432 --scursor;
433 }
434
435 break;
436 } else {
437 scursor = oldscursor;
438 }
439 } else {
440 scursor = oldscursor;
441 }
442 // fall through
443 [[fallthrough]];
444 }
445 default:
446 KMIME_WARN_IF_8BIT(ch);
447 result += QLatin1Char(ch);
448 }
449 }
450
451 return false;
452}
453
454// known issues:
455//
456// - doesn't handle encoded-word inside comments.
457
458bool parseComment(const char *&scursor, const char *const send,
459 QString &result, bool isCRLF, bool reallySave)
460{
461 int commentNestingDepth = 1;
462 const char *afterLastClosingParenPos = nullptr;
463 QString maybeCmnt;
464 const char *oldscursor = scursor;
465
466 assert(*(scursor - 1) == '(');
467
468 while (commentNestingDepth) {
469 QString cmntPart;
470 if (parseGenericQuotedString(scursor, send, cmntPart, isCRLF, '(', ')')) {
471 assert(*(scursor - 1) == ')' || *(scursor - 1) == '(');
472 // see the kdoc for the above function for the possible conditions
473 // we have to check:
474 switch (*(scursor - 1)) {
475 case ')':
476 if (reallySave) {
477 // add the chunk that's now surely inside the comment.
478 result += maybeCmnt;
479 result += cmntPart;
480 if (commentNestingDepth > 1) {
481 // don't add the outermost ')'...
482 result += QLatin1Char(')');
483 }
484 maybeCmnt.clear();
485 }
486 afterLastClosingParenPos = scursor;
487 --commentNestingDepth;
488 break;
489 case '(':
490 if (reallySave) {
491 // don't add to "result" yet, because we might find that we
492 // are already outside the (broken) comment...
493 maybeCmnt += cmntPart;
494 maybeCmnt += QLatin1Char('(');
495 }
496 ++commentNestingDepth;
497 break;
498 default: assert(0);
499 } // switch
500 } else {
501 // !parseGenericQuotedString, i.e. premature end
502 if (afterLastClosingParenPos) {
503 scursor = afterLastClosingParenPos;
504 } else {
505 scursor = oldscursor;
506 }
507 return false;
508 }
509 } // while
510
511 return true;
512}
513
514// known issues: none.
515
516bool parsePhrase(const char *&scursor, const char *const send,
517 QString &result, bool isCRLF)
518{
519 enum {
520 None, Phrase, Atom, EncodedWord, QuotedString
521 } found = None;
522
523 QString tmp;
524 QByteArray lang;
525 QByteArray charset;
526 QByteArrayView tmpAtom;
527 const char *successfullyParsed = nullptr;
528 // only used by the encoded-word branch
529 const char *oldscursor;
530 // used to suppress whitespace between adjacent encoded-words
531 // (rfc2047, 6.2):
532 bool lastWasEncodedWord = false;
533
534 while (scursor != send) {
535 char ch = *scursor++;
536 switch (ch) {
537 case '.': // broken, but allow for intorop's sake
538 if (found == None) {
539 --scursor;
540 return false;
541 } else {
542 if (scursor != send && (*scursor == ' ' || *scursor == '\t')) {
543 result += QLatin1StringView(". ");
544 } else {
545 result += QLatin1Char('.');
546 }
547 successfullyParsed = scursor;
548 }
549 break;
550 case '"': // quoted-string
551 tmp.clear();
552 if (parseGenericQuotedString(scursor, send, tmp, isCRLF, '"', '"')) {
553 successfullyParsed = scursor;
554 assert(*(scursor - 1) == '"');
555 switch (found) {
556 case None:
557 found = QuotedString;
558 break;
559 case Phrase:
560 case Atom:
561 case EncodedWord:
562 case QuotedString:
563 found = Phrase;
564 result += QLatin1Char(' '); // rfc822, 3.4.4
565 break;
566 default:
567 assert(0);
568 }
569 lastWasEncodedWord = false;
570 result += tmp;
571 } else {
572 // premature end of quoted string.
573 // What to do? Return leading '"' as special? Return as quoted-string?
574 // We do the latter if we already found something, else signal failure.
575 if (found == None) {
576 return false;
577 } else {
578 result += QLatin1Char(' '); // rfc822, 3.4.4
579 result += tmp;
580 return true;
581 }
582 }
583 break;
584 case '(': // comment
585 // parse it, but ignore content:
586 tmp.clear();
587 if (parseComment(scursor, send, tmp, isCRLF,
588 false /*don't bother with the content*/)) {
589 successfullyParsed = scursor;
590 lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
591 } else {
592 if (found == None) {
593 return false;
594 } else {
595 scursor = successfullyParsed;
596 return true;
597 }
598 }
599 break;
600 case '=': // encoded-word
601 tmp.clear();
602 oldscursor = scursor;
603 lang.clear();
604 charset.clear();
605 if (parseEncodedWord(scursor, send, tmp, lang, charset)) {
606 successfullyParsed = scursor;
607 switch (found) {
608 case None:
609 found = EncodedWord;
610 break;
611 case Phrase:
612 case EncodedWord:
613 case Atom:
614 case QuotedString:
615 if (!lastWasEncodedWord) {
616 result += QLatin1Char(' '); // rfc822, 3.4.4
617 }
618 found = Phrase;
619 break;
620 default: assert(0);
621 }
622 lastWasEncodedWord = true;
623 result += tmp;
624 break;
625 } else {
626 // parse as atom:
627 scursor = oldscursor;
628 }
629 [[fallthrough]];
630 // fall though...
631
632 default: //atom
633 scursor--;
634 if (parseAtom(scursor, send, tmpAtom, true /* allow 8bit */)) {
635 successfullyParsed = scursor;
636 switch (found) {
637 case None:
638 found = Atom;
639 break;
640 case Phrase:
641 case Atom:
642 case EncodedWord:
643 case QuotedString:
644 found = Phrase;
645 result += QLatin1Char(' '); // rfc822, 3.4.4
646 break;
647 default:
648 assert(0);
649 }
650 lastWasEncodedWord = false;
651 result += QLatin1StringView(tmpAtom);
652 } else {
653 if (found == None) {
654 return false;
655 } else {
656 scursor = successfullyParsed;
657 return true;
658 }
659 }
660 }
661 eatWhiteSpace(scursor, send);
662 }
663
664 return found != None;
665}
666
667bool parseDotAtom(const char *&scursor, const char *const send,
668 QByteArrayView &result, bool isCRLF)
669{
670 eatCFWS(scursor, send, isCRLF);
671
672 // always points to just after the last atom parsed:
673 const char *successfullyParsed;
674
675 QByteArrayView maybeAtom;
676 if (!parseAtom(scursor, send, maybeAtom, false /* no 8bit */)) {
677 return false;
678 }
679 result = maybeAtom;
680 successfullyParsed = scursor;
681
682 while (scursor != send) {
683
684 // end of header or no '.' -> return
685 if (scursor == send || *scursor != '.') {
686 return true;
687 }
688 scursor++; // eat '.'
689
690 if (scursor == send || !isAText(*scursor)) {
691 // end of header or no AText, but this time following a '.'!:
692 // reset cursor to just after last successfully parsed char and
693 // return:
694 scursor = successfullyParsed;
695 return true;
696 }
697
698 // try to parse the next atom:
699 maybeAtom = {};
700 if (!parseAtom(scursor, send, maybeAtom, false /*no 8bit*/)) {
701 scursor = successfullyParsed;
702 return true;
703 }
704
705 result = QByteArrayView(result.constData(), result.size() + 1 + maybeAtom.size());
706 successfullyParsed = scursor;
707 }
708
709 scursor = successfullyParsed;
710 return true;
711}
712
713void eatCFWS(const char *&scursor, const char *const send, bool isCRLF)
714{
715 QString dummy;
716
717 while (scursor != send) {
718 const char *oldscursor = scursor;
719
720 char ch = *scursor++;
721
722 switch (ch) {
723 case ' ':
724 case '\t': // whitespace
725 case '\r':
726 case '\n': // folding
727 continue;
728
729 case '(': // comment
730 if (parseComment(scursor, send, dummy, isCRLF, false /*don't save*/)) {
731 continue;
732 }
733 scursor = oldscursor;
734 return;
735
736 default:
737 scursor = oldscursor;
738 return;
739 }
740 }
741}
742
743bool parseDomain(const char *&scursor, const char *const send,
744 QString &result, bool isCRLF)
745{
746 eatCFWS(scursor, send, isCRLF);
747 if (scursor == send) {
748 return false;
749 }
750
751 // domain := dot-atom / domain-literal / atom *("." atom)
752 //
753 // equivalent to:
754 // domain = dot-atom / domain-literal,
755 // since parseDotAtom does allow CFWS between atoms and dots
756
757 if (*scursor == '[') {
758 // domain-literal:
759 QString maybeDomainLiteral;
760 // eat '[':
761 scursor++;
762 while (parseGenericQuotedString(scursor, send, maybeDomainLiteral,
763 isCRLF, '[', ']')) {
764 if (scursor == send) {
765 // end of header: check for closing ']':
766 if (*(scursor - 1) == ']') {
767 // OK, last char was ']':
768 result = maybeDomainLiteral;
769 return true;
770 } else {
771 // not OK, domain-literal wasn't closed:
772 return false;
773 }
774 }
775 // we hit openChar in parseGenericQuotedString.
776 // include it in maybeDomainLiteral and keep on parsing:
777 if (*(scursor - 1) == '[') {
778 maybeDomainLiteral += QLatin1Char('[');
779 continue;
780 }
781 // OK, real end of domain-literal:
782 result = maybeDomainLiteral;
783 return true;
784 }
785 } else {
786 // dot-atom:
787 QByteArrayView maybeDotAtom;
788 if (parseDotAtom(scursor, send, maybeDotAtom, isCRLF)) {
789 // Domain may end with '.', if so preserve it'
790 if (scursor != send && *scursor == '.') {
791 maybeDotAtom = QByteArrayView(maybeDotAtom.constData(), maybeDotAtom.size() + 1);
792 scursor++;
793 }
794 result = QString::fromLatin1(maybeDotAtom);
795 return true;
796 }
797 }
798 return false;
799}
800
801bool parseObsRoute(const char *&scursor, const char *const send,
802 QStringList &result, bool isCRLF, bool save)
803{
804 while (scursor != send) {
805 eatCFWS(scursor, send, isCRLF);
806 if (scursor == send) {
807 return false;
808 }
809
810 // empty entry:
811 if (*scursor == ',') {
812 scursor++;
813 if (save) {
814 result.append(QString());
815 }
816 continue;
817 }
818
819 // empty entry ending the list:
820 if (*scursor == ':') {
821 scursor++;
822 if (save) {
823 result.append(QString());
824 }
825 return true;
826 }
827
828 // each non-empty entry must begin with '@':
829 if (*scursor != '@') {
830 return false;
831 } else {
832 scursor++;
833 }
834
835 QString maybeDomain;
836 if (!parseDomain(scursor, send, maybeDomain, isCRLF)) {
837 return false;
838 }
839 if (save) {
840 result.append(maybeDomain);
841 }
842
843 // eat the following (optional) comma:
844 eatCFWS(scursor, send, isCRLF);
845 if (scursor == send) {
846 return false;
847 }
848 if (*scursor == ':') {
849 scursor++;
850 return true;
851 }
852 if (*scursor == ',') {
853 scursor++;
854 }
855 }
856
857 return false;
858}
859
860bool parseAddrSpec(const char *&scursor, const char *const send,
861 AddrSpec &result, bool isCRLF)
862{
863 //
864 // STEP 1:
865 // local-part := dot-atom / quoted-string / word *("." word)
866 //
867 // this is equivalent to:
868 // local-part := word *("." word)
869
870 QString maybeLocalPart;
871 QString tmp;
872 QByteArrayView tmpAtom;
873
874 while (scursor != send) {
875 // first, eat any whitespace
876 eatCFWS(scursor, send, isCRLF);
877
878 char ch = *scursor++;
879 switch (ch) {
880 case '.': // dot
881 maybeLocalPart += QLatin1Char('.');
882 break;
883
884 case '@':
885 goto SAW_AT_SIGN;
886 break;
887
888 case '"': // quoted-string
889 tmp.clear();
890 if (parseGenericQuotedString(scursor, send, tmp, isCRLF, '"', '"')) {
891 maybeLocalPart += tmp;
892 } else {
893 return false;
894 }
895 break;
896
897 default: // atom
898 scursor--; // re-set scursor to point to ch again
899 if (parseAtom(scursor, send, tmpAtom, false /* no 8bit */)) {
900 maybeLocalPart += QLatin1StringView(tmpAtom);
901 } else {
902 return false; // parseAtom can only fail if the first char is non-atext.
903 }
904 break;
905 }
906 }
907
908 return false;
909
910 //
911 // STEP 2:
912 // domain
913 //
914
915SAW_AT_SIGN:
916
917 assert(*(scursor - 1) == '@');
918
919 QString maybeDomain;
920 if (!parseDomain(scursor, send, maybeDomain, isCRLF)) {
921 return false;
922 }
923
924 result.localPart = maybeLocalPart;
925 result.domain = maybeDomain;
926
927 return true;
928}
929
930bool parseAngleAddr(const char *&scursor, const char *const send,
931 AddrSpec &result, bool isCRLF)
932{
933 // first, we need an opening angle bracket:
934 eatCFWS(scursor, send, isCRLF);
935 if (scursor == send || *scursor != '<') {
936 return false;
937 }
938 scursor++; // eat '<'
939
940 eatCFWS(scursor, send, isCRLF);
941 if (scursor == send) {
942 return false;
943 }
944
945 if (*scursor == '@' || *scursor == ',') {
946 // obs-route: parse, but ignore:
947 KMIME_WARN << "obsolete source route found! ignoring.";
948 QStringList dummy;
949 if (!parseObsRoute(scursor, send, dummy,
950 isCRLF, false /* don't save */)) {
951 return false;
952 }
953 // angle-addr isn't complete until after the '>':
954 if (scursor == send) {
955 return false;
956 }
957 }
958
959 // parse addr-spec:
960 AddrSpec maybeAddrSpec;
961 if (!parseAddrSpec(scursor, send, maybeAddrSpec, isCRLF)) {
962 return false;
963 }
964
965 eatCFWS(scursor, send, isCRLF);
966 if (scursor == send || *scursor != '>') {
967 return false;
968 }
969 scursor++;
970
971 result = maybeAddrSpec;
972 return true;
973
974}
975
976static QString stripQuotes(const QString &input)
977{
978 const QLatin1Char quotes('"');
979 if (input.startsWith(quotes) && input.endsWith(quotes)) {
980 QString stripped(input.mid(1, input.size() - 2));
981 return stripped;
982 } else {
983 return input;
984 }
985}
986
987bool parseMailbox(const char *&scursor, const char *const send,
988 Mailbox &result, bool isCRLF)
989{
990 eatCFWS(scursor, send, isCRLF);
991 if (scursor == send) {
992 return false;
993 }
994
995 AddrSpec maybeAddrSpec;
996 QString maybeDisplayName;
997
998 // first, try if it's a vanilla addr-spec:
999 const char *oldscursor = scursor;
1000 if (parseAddrSpec(scursor, send, maybeAddrSpec, isCRLF)) {
1001 result.setAddress(maybeAddrSpec);
1002 // check for the obsolete form of display-name (as comment):
1003 eatWhiteSpace(scursor, send);
1004 if (scursor != send && *scursor == '(') {
1005 scursor++;
1006 if (!parseComment(scursor, send, maybeDisplayName, isCRLF, true /*keep*/)) {
1007 return false;
1008 }
1009 }
1010 result.setName(stripQuotes(maybeDisplayName));
1011 return true;
1012 }
1013 scursor = oldscursor;
1014
1015 // second, see if there's a display-name:
1016 if (!parsePhrase(scursor, send, maybeDisplayName, isCRLF)) {
1017 // failed: reset cursor, note absent display-name
1018 maybeDisplayName.clear();
1019 scursor = oldscursor;
1020 } else {
1021 // succeeded: eat CFWS
1022 eatCFWS(scursor, send, isCRLF);
1023 if (scursor == send) {
1024 return false;
1025 }
1026 }
1027
1028 // third, parse the angle-addr:
1029 if (!parseAngleAddr(scursor, send, maybeAddrSpec, isCRLF)) {
1030 return false;
1031 }
1032
1033 if (maybeDisplayName.isNull()) {
1034 // check for the obsolete form of display-name (as comment):
1035 eatWhiteSpace(scursor, send);
1036 if (scursor != send && *scursor == '(') {
1037 scursor++;
1038 if (!parseComment(scursor, send, maybeDisplayName, isCRLF, true /*keep*/)) {
1039 return false;
1040 }
1041 }
1042 }
1043
1044 result.setName(stripQuotes(maybeDisplayName));
1045 result.setAddress(maybeAddrSpec);
1046 return true;
1047}
1048
1049bool parseGroup(const char *&scursor, const char *const send,
1050 Address &result, bool isCRLF)
1051{
1052 // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
1053 //
1054 // equivalent to:
1055 // group := display-name ":" [ obs-mbox-list ] ";"
1056
1057 eatCFWS(scursor, send, isCRLF);
1058 if (scursor == send) {
1059 return false;
1060 }
1061
1062 // get display-name:
1063 QString maybeDisplayName;
1064 if (!parsePhrase(scursor, send, maybeDisplayName, isCRLF)) {
1065 return false;
1066 }
1067
1068 // get ":":
1069 eatCFWS(scursor, send, isCRLF);
1070 if (scursor == send || *scursor != ':') {
1071 return false;
1072 }
1073
1074 // KDE5 TODO: Don't expose displayName as public, but rather add setter for it that
1075 // automatically calls removeBidiControlChars
1076 result.displayName = removeBidiControlChars(maybeDisplayName);
1077
1078 // get obs-mbox-list (may contain empty entries):
1079 scursor++;
1080 while (scursor != send) {
1081 eatCFWS(scursor, send, isCRLF);
1082 if (scursor == send) {
1083 return false;
1084 }
1085
1086 // empty entry:
1087 if (*scursor == ',') {
1088 scursor++;
1089 continue;
1090 }
1091
1092 // empty entry ending the list:
1093 if (*scursor == ';') {
1094 scursor++;
1095 return true;
1096 }
1097
1098 Mailbox maybeMailbox;
1099 if (!parseMailbox(scursor, send, maybeMailbox, isCRLF)) {
1100 return false;
1101 }
1102 result.mailboxList.append(maybeMailbox);
1103
1104 eatCFWS(scursor, send, isCRLF);
1105 // premature end:
1106 if (scursor == send) {
1107 return false;
1108 }
1109 // regular end of the list:
1110 if (*scursor == ';') {
1111 scursor++;
1112 return true;
1113 }
1114 // eat regular list entry separator:
1115 if (*scursor == ',') {
1116 scursor++;
1117 }
1118 }
1119 return false;
1120}
1121
1122bool parseAddress(const char *&scursor, const char *const send,
1123 Address &result, bool isCRLF)
1124{
1125 // address := mailbox / group
1126
1127 eatCFWS(scursor, send, isCRLF);
1128 if (scursor == send) {
1129 return false;
1130 }
1131
1132 // first try if it's a single mailbox:
1133 Mailbox maybeMailbox;
1134 const char *oldscursor = scursor;
1135 if (parseMailbox(scursor, send, maybeMailbox, isCRLF)) {
1136 // yes, it is:
1137 result.displayName.clear();
1138 result.mailboxList.append(maybeMailbox);
1139 return true;
1140 }
1141 scursor = oldscursor;
1142
1143 Address maybeAddress;
1144
1145 // no, it's not a single mailbox. Try if it's a group:
1146 if (!parseGroup(scursor, send, maybeAddress, isCRLF)) {
1147 return false;
1148 }
1149
1150 result = maybeAddress;
1151 return true;
1152}
1153
1154bool parseAddressList(const char *&scursor, const char *const send,
1155 AddressList &result, bool isCRLF)
1156{
1157 while (scursor != send) {
1158 eatCFWS(scursor, send, isCRLF);
1159 // end of header: this is OK.
1160 if (scursor == send) {
1161 return true;
1162 }
1163 // empty entry: ignore:
1164 if (*scursor == ',') {
1165 scursor++;
1166 continue;
1167 }
1168 // broken clients might use ';' as list delimiter, accept that as well
1169 if (*scursor == ';') {
1170 scursor++;
1171 continue;
1172 }
1173
1174 // parse one entry
1175 Address maybeAddress;
1176 if (!parseAddress(scursor, send, maybeAddress, isCRLF)) {
1177 return false;
1178 }
1179 result.append(maybeAddress);
1180
1181 eatCFWS(scursor, send, isCRLF);
1182 // end of header: this is OK.
1183 if (scursor == send) {
1184 return true;
1185 }
1186 // comma separating entries: eat it.
1187 if (*scursor == ',') {
1188 scursor++;
1189 }
1190 }
1191 return true;
1192}
1193
1194static bool parseParameter(const char *&scursor, const char *const send,
1195 QPair<QByteArray, QStringOrQPair> &result, bool isCRLF)
1196{
1197 // parameter = regular-parameter / extended-parameter
1198 // regular-parameter = regular-parameter-name "=" value
1199 // extended-parameter =
1200 // value = token / quoted-string
1201 //
1202 // note that rfc2231 handling is out of the scope of this function.
1203 // Therefore we return the attribute as QByteArray and the value as
1204 // (start,length) tuple if we see that the value is encoded
1205 // (trailing asterisk), for parseParameterList to decode...
1206
1207 eatCFWS(scursor, send, isCRLF);
1208 if (scursor == send) {
1209 return false;
1210 }
1211
1212 //
1213 // parse the parameter name:
1214 //
1215 QByteArrayView maybeAttribute;
1216 if (!parseToken(scursor, send, maybeAttribute, ParseTokenNoFlag)) {
1217 return false;
1218 }
1219
1220 eatCFWS(scursor, send, isCRLF);
1221 // premature end: not OK (haven't seen '=' yet).
1222 if (scursor == send || *scursor != '=') {
1223 return false;
1224 }
1225 scursor++; // eat '='
1226
1227 eatCFWS(scursor, send, isCRLF);
1228 if (scursor == send) {
1229 // don't choke on attribute=, meaning the value was omitted:
1230 if (maybeAttribute.endsWith('*')) {
1231 KMIME_WARN << "attribute ends with \"*\", but value is empty!"
1232 "Chopping away \"*\".";
1233 maybeAttribute.chop(1);
1234 }
1235 result = qMakePair(maybeAttribute.toByteArray().toLower(), QStringOrQPair());
1236 return true;
1237 }
1238
1239 const char *oldscursor = scursor;
1240
1241 //
1242 // parse the parameter value:
1243 //
1244 QStringOrQPair maybeValue;
1245 if (*scursor == '"') {
1246 // value is a quoted-string:
1247 scursor++;
1248 if (maybeAttribute.endsWith('*')) {
1249 // attributes ending with "*" designate extended-parameters,
1250 // which cannot have quoted-strings as values. So we remove the
1251 // trailing "*" to not confuse upper layers.
1252 KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!"
1253 "Chopping away \"*\".";
1254 maybeAttribute.chop(1);
1255 }
1256
1257 if (!parseGenericQuotedString(scursor, send, maybeValue.qstring, isCRLF)) {
1258 scursor = oldscursor;
1259 result = qMakePair(maybeAttribute.toByteArray().toLower(), QStringOrQPair());
1260 return false; // this case needs further processing by upper layers!!
1261 }
1262 } else {
1263 // value is a token:
1264 if (!parseToken(scursor, send, maybeValue.view, ParseTokenRelaxedTText)) {
1265 scursor = oldscursor;
1266 result = qMakePair(maybeAttribute.toByteArray().toLower(), QStringOrQPair());
1267 return false; // this case needs further processing by upper layers!!
1268 }
1269 }
1270
1271 result = qMakePair(maybeAttribute.toByteArray().toLower(), maybeValue);
1272 return true;
1273}
1274
1275static bool parseRawParameterList(const char *&scursor, const char *const send,
1276 std::map<QByteArray, QStringOrQPair> &result,
1277 bool isCRLF)
1278{
1279 // we use parseParameter() consecutively to obtain a map of raw
1280 // attributes to raw values. "Raw" here means that we don't do
1281 // rfc2231 decoding and concatenation. This is left to
1282 // parseParameterList(), which will call this function.
1283 //
1284 // The main reason for making this chunk of code a separate
1285 // (private) method is that we can deal with broken parameters
1286 // _here_ and leave the rfc2231 handling solely to
1287 // parseParameterList(), which will still be enough work.
1288 while (scursor != send) {
1289 eatCFWS(scursor, send, isCRLF);
1290 // empty entry ending the list: OK.
1291 if (scursor == send) {
1292 return true;
1293 }
1294 // empty list entry: ignore.
1295 if (*scursor == ';') {
1296 scursor++;
1297 continue;
1298 }
1299 QPair<QByteArray, QStringOrQPair> maybeParameter;
1300 if (!parseParameter(scursor, send, maybeParameter, isCRLF)) {
1301 // we need to do a bit of work if the attribute is not
1302 // NULL. These are the cases marked with "needs further
1303 // processing" in parseParameter(). Specifically, parsing of the
1304 // token or the quoted-string, which should represent the value,
1305 // failed. We take the easy way out and simply search for the
1306 // next ';' to start parsing again. (Another option would be to
1307 // take the text between '=' and ';' as value)
1308 if (maybeParameter.first.isNull()) {
1309 return false;
1310 }
1311 while (scursor != send) {
1312 if (*scursor++ == ';') {
1313 goto IS_SEMICOLON;
1314 }
1315 }
1316 // scursor == send case: end of list.
1317 return true;
1318 IS_SEMICOLON:
1319 // *scursor == ';' case: parse next entry.
1320 continue;
1321 }
1322 // successful parsing brings us here:
1323 result[maybeParameter.first] = maybeParameter.second;
1324
1325 eatCFWS(scursor, send, isCRLF);
1326 // end of header: ends list.
1327 if (scursor == send) {
1328 return true;
1329 }
1330 // regular separator: eat it.
1331 if (*scursor == ';') {
1332 scursor++;
1333 }
1334 }
1335 return true;
1336}
1337
1338static void decodeRFC2231Value(KCodecs::Codec *&rfc2231Codec,
1339 QStringDecoder &textcodec,
1340 bool isContinuation, QString &value,
1341 QByteArrayView &source, QByteArray &charset)
1342{
1343 //
1344 // parse the raw value into (charset,language,text):
1345 //
1346
1347 const char *decBegin = source.data();
1348 const char *decCursor = decBegin;
1349 const char *decEnd = decCursor + source.size();
1350
1351 if (!isContinuation) {
1352 // find the first single quote
1353 while (decCursor != decEnd) {
1354 if (*decCursor == '\'') {
1355 break;
1356 } else {
1357 decCursor++;
1358 }
1359 }
1360
1361 if (decCursor == decEnd) {
1362 // there wasn't a single single quote at all!
1363 // take the whole value to be in latin-1:
1364 KMIME_WARN << "No charset in extended-initial-value."
1365 "Assuming \"iso-8859-1\".";
1366 value += QLatin1StringView(decBegin, source.size());
1367 return;
1368 }
1369
1370 charset = QByteArray(decBegin, decCursor - decBegin);
1371
1372 const char *oldDecCursor = ++decCursor;
1373 // find the second single quote (we ignore the language tag):
1374 while (decCursor != decEnd) {
1375 if (*decCursor == '\'') {
1376 break;
1377 } else {
1378 decCursor++;
1379 }
1380 }
1381 if (decCursor == decEnd) {
1382 KMIME_WARN << "No language in extended-initial-value."
1383 "Trying to recover.";
1384 decCursor = oldDecCursor;
1385 } else {
1386 decCursor++;
1387 }
1388
1389 // decCursor now points to the start of the
1390 // "extended-other-values":
1391
1392 //
1393 // get the decoders:
1394 //
1395
1396 textcodec = QStringDecoder(charset.constData());
1397 if (!textcodec.isValid()) {
1398 KMIME_WARN_UNKNOWN(Charset, charset);
1399 }
1400 }
1401
1402 if (!rfc2231Codec) {
1403 rfc2231Codec = KCodecs::Codec::codecForName("x-kmime-rfc2231");
1404 assert(rfc2231Codec);
1405 }
1406
1407 if (!textcodec.isValid()) {
1408 value += QString::fromLatin1(decCursor, decEnd - decCursor);
1409 return;
1410 }
1411
1412 KCodecs::Decoder *dec = rfc2231Codec->makeDecoder();
1413 assert(dec);
1414
1415 //
1416 // do the decoding:
1417 //
1418
1419 QByteArray buffer;
1420 buffer.resize(rfc2231Codec->maxDecodedSizeFor(decEnd - decCursor));
1421 QByteArray::Iterator bit = buffer.begin();
1422 QByteArray::ConstIterator bend = buffer.end();
1423
1424 if (!dec->decode(decCursor, decEnd, bit, bend)) {
1425 KMIME_WARN << rfc2231Codec->name()
1426 << "codec lies about its maxDecodedSizeFor()"
1427 << Qt::endl
1428 << "result may be truncated";
1429 }
1430
1431 value += textcodec.decode(QByteArrayView(buffer.begin(), bit - buffer.begin()));
1432
1433 // qCDebug(KMIME_LOG) << "value now: \"" << value << "\"";
1434 // cleanup:
1435 delete dec;
1436}
1437
1438// known issues:
1439// - permutes rfc2231 continuations when the total number of parts
1440// exceeds 10 (other-sections then becomes *xy, i.e. two digits)
1441
1442bool parseParameterListWithCharset(const char *&scursor,
1443 const char *const send,
1444 Headers::ParameterMap &result,
1445 QByteArray &charset, bool isCRLF)
1446{
1447 // parse the list into raw attribute-value pairs:
1448 std::map<QByteArray, QStringOrQPair> rawParameterList;
1449 if (!parseRawParameterList(scursor, send, rawParameterList, isCRLF)) {
1450 return false;
1451 }
1452
1453 if (rawParameterList.empty()) {
1454 return true;
1455 }
1456
1457 // decode rfc 2231 continuations and alternate charset encoding:
1458
1459 // NOTE: this code assumes that what QMapIterator delivers is sorted
1460 // by the key!
1461
1462 KCodecs::Codec *rfc2231Codec = nullptr;
1463 QStringDecoder textcodec;
1464 QByteArray attribute;
1465 QString value;
1466 enum Mode {
1467 NoMode = 0x0, Continued = 0x1, Encoded = 0x2
1468 };
1469
1470 enum EncodingMode {
1471 NoEncoding,
1472 RFC2047,
1473 RFC2231
1474 };
1475
1476 for (auto &it : rawParameterList) {
1477 if (attribute.isNull() || !it.first.startsWith(attribute)) {
1478 //
1479 // new attribute:
1480 //
1481
1482 // store the last attribute/value pair in the result map now:
1483 if (!attribute.isNull()) {
1484 result[attribute] = value;
1485 }
1486 // and extract the information from the new raw attribute:
1487 value.clear();
1488 attribute = it.first;
1489 int mode = NoMode;
1490 EncodingMode encodingMode = NoEncoding;
1491
1492 // is the value rfc2331-encoded?
1493 if (attribute.endsWith('*')) {
1494 attribute.chop(1);
1495 mode |= Encoded;
1496 encodingMode = RFC2231;
1497 }
1498 // is the value rfc2047-encoded?
1499 if (!it.second.qstring.isNull() &&
1500 it.second.qstring.contains(QLatin1StringView("=?"))) {
1501 mode |= Encoded;
1502 encodingMode = RFC2047;
1503 }
1504 // is the value continued?
1505 if (attribute.endsWith(QLatin1StringView("*0"))) {
1506 attribute.chop(2);
1507 mode |= Continued;
1508 }
1509 //
1510 // decode if necessary:
1511 //
1512 if (mode & Encoded) {
1513 if (encodingMode == RFC2231) {
1514 decodeRFC2231Value(rfc2231Codec, textcodec,
1515 false, /* isn't continuation */
1516 value, it.second.view, charset);
1517 } else if (encodingMode == RFC2047) {
1518 value += KCodecs::decodeRFC2047String(it.second.qstring.toLatin1(), &charset);
1519 }
1520 } else {
1521 // not encoded.
1522 if (!it.second.view.isNull()) {
1523 value += QLatin1StringView(it.second.view);
1524 } else {
1525 value += it.second.qstring;
1526 }
1527 }
1528
1529 //
1530 // shortcut-processing when the value isn't encoded:
1531 //
1532
1533 if (!(mode & Continued)) {
1534 // save result already:
1535 result[attribute] = value;
1536 // force begin of a new attribute:
1537 attribute.clear();
1538 }
1539 } else { // it.key().startsWith( attribute )
1540 //
1541 // continuation
1542 //
1543
1544 // ignore the section and trust QMap to have sorted the keys:
1545 if (it.first.endsWith('*')) {
1546 // encoded
1547 decodeRFC2231Value(rfc2231Codec, textcodec,
1548 true, /* is continuation */
1549 value, it.second.view, charset);
1550 } else {
1551 // not encoded
1552 if (!it.second.view.isNull()) {
1553 value += QLatin1StringView(it.second.view);
1554 } else {
1555 value += it.second.qstring;
1556 }
1557 }
1558 }
1559 }
1560 // write last attr/value pair:
1561 if (!attribute.isNull()) {
1562 result[attribute] = value;
1563 }
1564
1565 return true;
1566}
1567
1568static const char stdDayNames[][4] = {
1569 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
1570};
1571static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
1572
1573static bool parseDayName(const char *&scursor, const char *const send)
1574{
1575 // check bounds:
1576 if (send - scursor < 3) {
1577 return false;
1578 }
1579
1580 for (int i = 0 ; i < stdDayNamesLen ; ++i) {
1581 if (qstrnicmp(scursor, stdDayNames[i], 3) == 0) {
1582 scursor += 3;
1583 // qCDebug(KMIME_LOG) << "found" << stdDayNames[i];
1584 return true;
1585 }
1586 }
1587
1588 return false;
1589}
1590
1591static const char stdMonthNames[][4] = {
1592 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
1593 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
1594};
1595static const int stdMonthNamesLen =
1596 sizeof stdMonthNames / sizeof *stdMonthNames;
1597
1598static bool parseMonthName(const char *&scursor, const char *const send,
1599 int &result)
1600{
1601 // check bounds:
1602 if (send - scursor < 3) {
1603 return false;
1604 }
1605
1606 for (result = 0 ; result < stdMonthNamesLen ; ++result) {
1607 if (qstrnicmp(scursor, stdMonthNames[result], 3) == 0) {
1608 scursor += 3;
1609 return true;
1610 }
1611 }
1612
1613 // not found:
1614 return false;
1615}
1616
1617static const struct {
1618 const char tzName[5];
1619 int secsEastOfGMT;
1620} timeZones[] = {
1621 // rfc 822 timezones:
1622 { "GMT", 0 },
1623 { "UT", 0 },
1624 { "EDT", -4 * 3600 },
1625 { "EST", -5 * 3600 },
1626 { "MST", -5 * 3600 },
1627 { "CST", -6 * 3600 },
1628 { "MDT", -6 * 3600 },
1629 { "MST", -7 * 3600 },
1630 { "PDT", -7 * 3600 },
1631 { "PST", -8 * 3600 },
1632 // common, non-rfc-822 zones:
1633 { "CET", 1 * 3600 },
1634 { "MET", 1 * 3600 },
1635 { "UTC", 0 },
1636 { "CEST", 2 * 3600 },
1637 { "BST", 1 * 3600 },
1638 // rfc 822 military timezones:
1639 { "Z", 0 },
1640 { "A", -1 * 3600 },
1641 { "B", -2 * 3600 },
1642 { "C", -3 * 3600 },
1643 { "D", -4 * 3600 },
1644 { "E", -5 * 3600 },
1645 { "F", -6 * 3600 },
1646 { "G", -7 * 3600 },
1647 { "H", -8 * 3600 },
1648 { "I", -9 * 3600 },
1649 // J is not used!
1650 { "K", -10 * 3600 },
1651 { "L", -11 * 3600 },
1652 { "M", -12 * 3600 },
1653 { "N", 1 * 3600 },
1654 { "O", 2 * 3600 },
1655 { "P", 3 * 3600 },
1656 { "Q", 4 * 3600 },
1657 { "R", 5 * 3600 },
1658 { "S", 6 * 3600 },
1659 { "T", 7 * 3600 },
1660 { "U", 8 * 3600 },
1661 { "V", 9 * 3600 },
1662 { "W", 10 * 3600 },
1663 { "X", 11 * 3600 },
1664 { "Y", 12 * 3600 },
1665};
1666static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
1667
1668static bool parseAlphaNumericTimeZone(const char *&scursor,
1669 const char *const send,
1670 long int &secsEastOfGMT,
1671 bool &timeZoneKnown)
1672{
1673 // allow the timezone to be wrapped in quotes; bug 260761
1674 if (scursor < send && *scursor == '"') {
1675 scursor++;
1676
1677 if (scursor == send) {
1678 return false;
1679 }
1680 }
1681
1682 QByteArrayView maybeTimeZone;
1683 if (!parseToken(scursor, send, maybeTimeZone, ParseTokenNoFlag)) {
1684 return false;
1685 }
1686 for (int i = 0 ; i < timeZonesLen ; ++i) {
1687 if (maybeTimeZone.compare(timeZones[i].tzName, Qt::CaseInsensitive) == 0) {
1688 scursor += maybeTimeZone.size();
1689 secsEastOfGMT = timeZones[i].secsEastOfGMT;
1690 timeZoneKnown = true;
1691
1692 if (scursor < send && *scursor == '"') {
1693 scursor++;
1694 }
1695
1696 return true;
1697 }
1698 }
1699
1700 // don't choke just because we don't happen to know the time zone
1701 KMIME_WARN_UNKNOWN(time zone, maybeTimeZone);
1702 secsEastOfGMT = 0;
1703 timeZoneKnown = false;
1704 return true;
1705}
1706
1707// parse a number and return the number of digits parsed:
1708int parseDigits(const char *&scursor, const char *const send, int &result)
1709{
1710 result = 0;
1711 int digits = 0;
1712 for (; scursor != send && isdigit(*scursor) ; scursor++, digits++) {
1713 result *= 10;
1714 result += int(*scursor - '0');
1715 }
1716 return digits;
1717}
1718
1719static bool parseTimeOfDay(const char *&scursor, const char *const send,
1720 int &hour, int &min, int &sec, bool isCRLF = false)
1721{
1722 // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
1723
1724 //
1725 // 2DIGIT representing "hour":
1726 //
1727 if (!parseDigits(scursor, send, hour)) {
1728 return false;
1729 }
1730
1731 eatCFWS(scursor, send, isCRLF);
1732 if (scursor == send || *scursor != ':') {
1733 return false;
1734 }
1735 scursor++; // eat ':'
1736
1737 eatCFWS(scursor, send, isCRLF);
1738 if (scursor == send) {
1739 return false;
1740 }
1741
1742 //
1743 // 2DIGIT representing "minute":
1744 //
1745 if (!parseDigits(scursor, send, min)) {
1746 return false;
1747 }
1748
1749 eatCFWS(scursor, send, isCRLF);
1750 if (scursor == send) {
1751 return true; // seconds are optional
1752 }
1753
1754 //
1755 // let's see if we have a 2DIGIT representing "second":
1756 //
1757 if (*scursor == ':') {
1758 // yepp, there are seconds:
1759 scursor++; // eat ':'
1760 eatCFWS(scursor, send, isCRLF);
1761 if (scursor == send) {
1762 return false;
1763 }
1764
1765 if (!parseDigits(scursor, send, sec)) {
1766 return false;
1767 }
1768 } else {
1769 sec = 0;
1770 }
1771
1772 return true;
1773}
1774
1775bool parseTime(const char *&scursor, const char *send,
1776 int &hour, int &min, int &sec, long int &secsEastOfGMT,
1777 bool &timeZoneKnown, bool isCRLF)
1778{
1779 // time := time-of-day CFWS ( zone / obs-zone )
1780 //
1781 // obs-zone := "UT" / "GMT" /
1782 // "EST" / "EDT" / ; -0500 / -0400
1783 // "CST" / "CDT" / ; -0600 / -0500
1784 // "MST" / "MDT" / ; -0700 / -0600
1785 // "PST" / "PDT" / ; -0800 / -0700
1786 // "A"-"I" / "a"-"i" /
1787 // "K"-"Z" / "k"-"z"
1788
1789 eatCFWS(scursor, send, isCRLF);
1790 if (scursor == send) {
1791 return false;
1792 }
1793
1794 if (!parseTimeOfDay(scursor, send, hour, min, sec, isCRLF)) {
1795 return false;
1796 }
1797
1798 eatCFWS(scursor, send, isCRLF);
1799 // there might be no timezone but a year following
1800 if ((scursor == send) || isdigit(*scursor)) {
1801 timeZoneKnown = false;
1802 secsEastOfGMT = 0;
1803 return true; // allow missing timezone
1804 }
1805
1806 timeZoneKnown = true;
1807 if (*scursor == '+' || *scursor == '-') {
1808 // remember and eat '-'/'+':
1809 const char sign = *scursor++;
1810 // numerical timezone:
1811 int maybeTimeZone;
1812 const int tzDigits = parseDigits(scursor, send, maybeTimeZone);
1813 if (tzDigits != 4) {
1814 // Allow timezones in 02:00 format
1815 if (tzDigits == 2 && scursor != send && *scursor == ':') {
1816 scursor++;
1817 int maybeTimeZone2;
1818 if (parseDigits(scursor, send, maybeTimeZone2) != 2) {
1819 return false;
1820 }
1821 maybeTimeZone = maybeTimeZone * 100 + maybeTimeZone2;
1822 } else {
1823 return false;
1824 }
1825 }
1826 secsEastOfGMT = 60 * (maybeTimeZone / 100 * 60 + maybeTimeZone % 100);
1827 if (sign == '-') {
1828 secsEastOfGMT *= -1;
1829 if (secsEastOfGMT == 0) {
1830 timeZoneKnown = false; // -0000 means indetermined tz
1831 }
1832 }
1833 } else {
1834 // maybe alphanumeric timezone:
1835 if (!parseAlphaNumericTimeZone(scursor, send, secsEastOfGMT, timeZoneKnown)) {
1836 return false;
1837 }
1838 }
1839 return true;
1840}
1841
1842bool parseQDateTime(const char *&scursor, const char *const send,
1843 QDateTime &result, bool isCRLF)
1844{
1845 eatCFWS(scursor, send, isCRLF);
1846 if (scursor == send || std::distance(scursor, send) < 17) {
1847 return false;
1848 }
1849 // In qt6 yy == 1900 ! => for sure we use 2000 here.
1850 result = QDateTime::fromString(QString::fromLatin1(scursor, 17), QStringLiteral("dd/MM/yy HH:mm:ss"));
1851 QDate resultDate = result.date();
1852 resultDate.setDate(resultDate.year() + 100, resultDate.month(), resultDate.day());
1853 result.setDate(resultDate);
1854 return result.isValid();
1855}
1856
1857bool parseDateTime(const char *&scursor, const char *const send,
1858 QDateTime &result, bool isCRLF)
1859{
1860 // Parsing date-time; strict mode:
1861 //
1862 // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday
1863 // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
1864 // time
1865 //
1866 // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
1867 // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
1868 // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec"
1869
1870 result = QDateTime();
1871
1872 eatCFWS(scursor, send, isCRLF);
1873 if (scursor == send) {
1874 return false;
1875 }
1876
1877 //
1878 // let's see if there's a day-of-week:
1879 //
1880 if (parseDayName(scursor, send)) {
1881 eatCFWS(scursor, send, isCRLF);
1882 if (scursor == send) {
1883 return false;
1884 }
1885 // day-name should be followed by ',' but we treat it as optional:
1886 if (*scursor == ',') {
1887 scursor++; // eat ','
1888 eatCFWS(scursor, send, isCRLF);
1889 }
1890 }
1891
1892 int maybeMonth = -1;
1893 bool asctimeFormat = false;
1894
1895 // ANSI-C asctime() format is: Wed Jun 30 21:49:08 1993
1896 if (!isdigit(*scursor) && parseMonthName(scursor, send, maybeMonth)) {
1897 asctimeFormat = true;
1898 eatCFWS(scursor, send, isCRLF);
1899 }
1900
1901 //
1902 // 1*2DIGIT representing "day" (of month):
1903 //
1904 int maybeDay;
1905 if (!parseDigits(scursor, send, maybeDay)) {
1906 return false;
1907 }
1908
1909 eatCFWS(scursor, send, isCRLF);
1910 if (scursor == send) {
1911 return false;
1912 }
1913
1914 // ignore ","; bug 54098
1915 if (*scursor == ',') {
1916 scursor++;
1917 }
1918
1919 //
1920 // month-name:
1921 //
1922 if (!asctimeFormat && !parseMonthName(scursor, send, maybeMonth)) {
1923 return false;
1924 }
1925 if (scursor == send) {
1926 return false;
1927 }
1928 assert(maybeMonth >= 0); assert(maybeMonth <= 11);
1929 ++maybeMonth; // 0-11 -> 1-12
1930
1931 eatCFWS(scursor, send, isCRLF);
1932 if (scursor == send) {
1933 return false;
1934 }
1935
1936 // check for "year HH:MM:SS" or only "HH:MM:SS" (or "H:MM:SS")
1937 bool timeAfterYear = true;
1938 if ((send - scursor > 3) && ((scursor[1] == ':') || (scursor[2] == ':'))) {
1939 timeAfterYear = false; // first read time, then year
1940 }
1941
1942 //
1943 // 2*DIGIT representing "year":
1944 //
1945 int maybeYear = 0;
1946
1947 if (timeAfterYear && !parseDigits(scursor, send, maybeYear)) {
1948 return false;
1949 }
1950
1951 eatCFWS(scursor, send, isCRLF);
1952 int maybeHour;
1953 int maybeMinute;
1954 int maybeSecond;
1955 long int secsEastOfGMT = 0;
1956 QDate maybeDate;
1957 QTime maybeTime;
1958 if (scursor != send) {
1959 //
1960 // time
1961 //
1962 bool timeZoneKnown = true;
1963
1964 if (!parseTime(scursor, send,
1965 maybeHour, maybeMinute, maybeSecond,
1966 secsEastOfGMT, timeZoneKnown, isCRLF)) {
1967 return false;
1968 }
1969
1970 // in asctime() the year follows the time
1971 if (!timeAfterYear) {
1972 eatCFWS(scursor, send, isCRLF);
1973 if (scursor == send) {
1974 return false;
1975 }
1976
1977 if (!parseDigits(scursor, send, maybeYear)) {
1978 return false;
1979 }
1980 }
1981
1982 // RFC 2822 4.3 processing:
1983 if (maybeYear < 50) {
1984 maybeYear += 2000;
1985 } else if (maybeYear < 1000) {
1986 maybeYear += 1900;
1987 }
1988 // else keep as is
1989 if (maybeYear < 1900) {
1990 return false; // rfc2822, 3.3
1991 }
1992
1993 maybeDate = QDate(maybeYear, maybeMonth, maybeDay);
1994 maybeTime = QTime(maybeHour, maybeMinute, maybeSecond);
1995
1996 if (!maybeDate.isValid() || !maybeTime.isValid()) {
1997 return false;
1998 }
1999 } else {
2000 maybeDate = QDate(maybeYear, maybeMonth, maybeDay);
2001 maybeTime = QTime(0, 0, 0);
2002 }
2003
2004 result = QDateTime(maybeDate, maybeTime, QTimeZone::fromSecondsAheadOfUtc(secsEastOfGMT));
2005 if (!result.isValid()) {
2006 return false;
2007 }
2008 return true;
2009}
2010
2011namespace {
2012
2013Headers::Base *extractHeader(QByteArrayView head, const qsizetype headerStart, qsizetype &endOfFieldBody)
2014{
2015 Headers::Base *header = {};
2016
2017 auto startOfFieldBody = head.indexOf(':', headerStart);
2018 if (startOfFieldBody < 0) {
2019 return nullptr;
2020 }
2021
2022 const char *rawType = head.constData() + headerStart;
2023 const size_t rawTypeLen = startOfFieldBody - headerStart;
2024
2025 startOfFieldBody++; //skip the ':'
2026 if (startOfFieldBody < head.size() - 1 && head[startOfFieldBody] == ' ') { // skip the space after the ':', if there's any
2027 startOfFieldBody++;
2028 }
2029
2030 bool folded = false;
2031 endOfFieldBody = findHeaderLineEnd(head, startOfFieldBody, &folded);
2032
2033 // We might get an invalid mail without a field name, don't crash on that.
2034 if (rawTypeLen > 0) {
2035 header = HeaderFactory::createHeader(QByteArrayView(rawType, rawTypeLen));
2036 }
2037 if (!header) {
2038 //qCWarning(KMIME_LOG)() << "Returning Generic header of type" << rawType;
2039 header = new Headers::Generic(rawType, rawTypeLen);
2040 }
2041 if (folded) {
2042 const auto unfoldedBody = unfoldHeader(head.constData() + startOfFieldBody, endOfFieldBody - startOfFieldBody);
2043 header->from7BitString(unfoldedBody);
2044 } else {
2045 header->from7BitString(QByteArrayView(head.constData() + startOfFieldBody, endOfFieldBody - startOfFieldBody));
2046 }
2047
2048 return header;
2049}
2050
2051}
2052
2053std::unique_ptr<KMime::Headers::Base> parseNextHeader(QByteArrayView &head)
2054{
2055 qsizetype endOfFieldBody = 0;
2056 std::unique_ptr<KMime::Headers::Base> header(extractHeader(head, 0, endOfFieldBody));
2057 if (header) {
2058 head = head.mid(endOfFieldBody + 1);
2059 } else {
2060 head = {};
2061 }
2062
2063 return header;
2064}
2065
2066void extractHeaderAndBody(const QByteArray &content, QByteArray &header, QByteArray &body)
2067{
2068 header.clear();
2069 body.clear();
2070
2071 // empty header
2072 if (content.startsWith('\n')) {
2073 body = content.right(content.length() - 1);
2074 return;
2075 }
2076
2077 auto pos = content.indexOf("\n\n", 0);
2078 if (pos > -1) {
2079 header = content.left(++pos); //header *must* end with "\n" !!
2080 body = content.mid(pos + 1);
2081 if (body.startsWith("\n")) {
2082 body = "\n" + body;
2083 }
2084 } else {
2085 header = content;
2086 }
2087}
2088
2089QList<Headers::Base *> parseHeaders(const QByteArray &head) {
2091
2092 qsizetype cursor = 0;
2093 while (cursor < head.size()) {
2094 const auto headerStart = cursor;
2095 qsizetype endOfFieldBody;
2096 if (auto header = extractHeader(head, headerStart, endOfFieldBody)) {
2097 ret << header;
2098 cursor = endOfFieldBody + 1;
2099 } else {
2100 break;
2101 }
2102 }
2103
2104 return ret;
2105}
2106
2107} // namespace HeaderParsing
2108
2109} // namespace KMime
static Codec * codecForName(QByteArrayView name)
virtual Decoder * makeDecoder(NewlineType newline=NewlineLF) const=0
virtual const char * name() const=0
virtual qsizetype maxDecodedSizeFor(qsizetype insize, NewlineType newline=NewlineLF) const=0
Baseclass of all header-classes.
Definition headers.h:97
virtual void from7BitString(QByteArrayView s)=0
Parses the given string.
Represents an arbitrary header, that can contain any header-field.
Definition headers.h:1200
Represents an (email address, display name) pair according RFC 2822, section 3.4.
Definition types.h:38
void setName(const QString &name)
Sets the name.
Definition types.cpp:132
void setAddress(const AddrSpec &addr)
Sets the email address.
Definition types.cpp:114
Q_SCRIPTABLE Q_NOREPLY void start()
This file is part of the API for handling MIME data and defines the various header classes:
KCODECS_EXPORT QString decodeRFC2047String(QByteArrayView src, QByteArray *usedCS, const QByteArray &defaultCS=QByteArray(), CharsetOption option=NoOption)
iterator begin()
void chop(qsizetype n)
void clear()
const char * constData() const const
char * data()
iterator end()
bool endsWith(QByteArrayView bv) const const
QByteArray first(qsizetype n) const const
qsizetype indexOf(QByteArrayView bv, qsizetype from) const const
bool isNull() const const
QByteArray left(qsizetype len) const const
qsizetype length() const const
QByteArray mid(qsizetype pos, qsizetype len) const const
void resize(qsizetype newSize, char c)
QByteArray right(qsizetype len) const const
qsizetype size() const const
bool startsWith(QByteArrayView bv) const const
QByteArray toLower() const const
QByteArrayView mid(qsizetype start, qsizetype length) const const
void chop(qsizetype length)
int compare(QByteArrayView bv, Qt::CaseSensitivity cs) const const
const_pointer constData() const const
const_pointer data() const const
bool endsWith(QByteArrayView bv) const const
qsizetype indexOf(QByteArrayView bv, qsizetype from) const const
qsizetype size() const const
QByteArray toByteArray() const const
int day() const const
bool isValid(int year, int month, int day)
int month() const const
bool setDate(int year, int month, int day)
int year() const const
QDate date() const const
QDateTime fromString(QStringView string, QStringView format, QCalendar cal)
bool isValid() const const
void setDate(QDate date)
void append(QList< T > &&value)
void clear()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString fromLatin1(QByteArrayView str)
bool isNull() const const
QString mid(qsizetype position, qsizetype n) const const
qsizetype size() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
bool isValid() const const
EncodedData< QByteArrayView > decode(QByteArrayView ba)
CaseInsensitive
QTextStream & dec(QTextStream &stream)
QTextStream & endl(QTextStream &stream)
bool isValid(int h, int m, int s, int ms)
QTimeZone fromSecondsAheadOfUtc(int offset)
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:48:31 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.