KIMAP2

rfccodecs.cpp
Go to the documentation of this file.
1/**********************************************************************
2 *
3 * rfccodecs.cpp - handler for various rfc/mime encodings
4 * Copyright (C) 2000 s.carstens@gmx.de
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 *********************************************************************/
22/**
23 * @file
24 * This file is part of the IMAP support library and defines the
25 * RfcCodecs class.
26 *
27 * @brief
28 * Defines the RfcCodecs class.
29 *
30 * @author Sven Carstens
31 */
32
33#include "rfccodecs.h"
34
35#include <ctype.h>
36#include <sys/types.h>
37
38#include <stdio.h>
39#include <stdlib.h>
40
41#include <QtCore/QTextCodec>
42#include <QtCore/QBuffer>
43#include <QtCore/QByteArray>
44#include <QtCore/QLatin1Char>
45#include <kcodecs.h>
46
47using namespace KIMAP2;
48
49// This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
50// adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
51
52//@cond PRIVATE
53static const unsigned char base64chars[] =
54 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
55#define UNDEFINED 64
56#define MAXLINE 76
57static const char especials[17] = "()<>@,;:\"/[]?.= ";
58
59/* UTF16 definitions */
60#define UTF16MASK 0x03FFUL
61#define UTF16SHIFT 10
62#define UTF16BASE 0x10000UL
63#define UTF16HIGHSTART 0xD800UL
64#define UTF16HIGHEND 0xDBFFUL
65#define UTF16LOSTART 0xDC00UL
66#define UTF16LOEND 0xDFFFUL
67//@endcond
68
69//-----------------------------------------------------------------------------
71{
72 unsigned char c, i, bitcount;
73 unsigned long ucs4, utf16, bitbuf;
74 unsigned char base64[256], utf8[6];
75 unsigned int srcPtr = 0;
76 QByteArray dst;
77 QByteArray src = inSrc;
78 uint srcLen = inSrc.length();
79
80 /* initialize modified base64 decoding table */
81 memset(base64, UNDEFINED, sizeof(base64));
82 for (i = 0; i < sizeof(base64chars); ++i) {
83 base64[(int)base64chars[i]] = i;
84 }
85
86 /* loop until end of string */
87 while (srcPtr < srcLen) {
88 c = src[srcPtr++];
89 /* deal with literal characters and &- */
90 if (c != '&' || src[srcPtr] == '-') {
91 /* encode literally */
92 dst += c;
93 /* skip over the '-' if this is an &- sequence */
94 if (c == '&') {
95 srcPtr++;
96 }
97 } else {
98 /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
99 bitbuf = 0;
100 bitcount = 0;
101 ucs4 = 0;
102 while ((c = base64[(unsigned char)src[srcPtr]]) != UNDEFINED) {
103 ++srcPtr;
104 bitbuf = (bitbuf << 6) | c;
105 bitcount += 6;
106 /* enough bits for a UTF-16 character? */
107 if (bitcount >= 16) {
108 bitcount -= 16;
109 utf16 = (bitcount ? bitbuf >> bitcount : bitbuf) & 0xffff;
110 /* convert UTF16 to UCS4 */
111 if (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND) {
112 ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT;
113 continue;
114 } else if (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND) {
115 ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
116 } else {
117 ucs4 = utf16;
118 }
119 /* convert UTF-16 range of UCS4 to UTF-8 */
120 if (ucs4 <= 0x7fUL) {
121 utf8[0] = ucs4;
122 i = 1;
123 } else if (ucs4 <= 0x7ffUL) {
124 utf8[0] = 0xc0 | (ucs4 >> 6);
125 utf8[1] = 0x80 | (ucs4 & 0x3f);
126 i = 2;
127 } else if (ucs4 <= 0xffffUL) {
128 utf8[0] = 0xe0 | (ucs4 >> 12);
129 utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f);
130 utf8[2] = 0x80 | (ucs4 & 0x3f);
131 i = 3;
132 } else {
133 utf8[0] = 0xf0 | (ucs4 >> 18);
134 utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f);
135 utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f);
136 utf8[3] = 0x80 | (ucs4 & 0x3f);
137 i = 4;
138 }
139 /* copy it */
140 for (c = 0; c < i; ++c) {
141 dst += utf8[c];
142 }
143 }
144 }
145 /* skip over trailing '-' in modified UTF-7 encoding */
146 if (src[srcPtr] == '-') {
147 ++srcPtr;
148 }
149 }
150 }
151 return dst;
152}
153
158
159//-----------------------------------------------------------------------------
160
162{
163 uint len = src.length();
164 QByteArray result;
165 result.reserve(2 * len);
166 for (unsigned int i = 0; i < len; i++) {
167 if (src[i] == '"' || src[i] == '\\') {
168 result += '\\';
169 }
170 result += src[i];
171 }
172 result.squeeze();
173 return result;
174}
175
177{
178 uint len = src.length();
179 QString result;
180 result.reserve(2 * len);
181 for (unsigned int i = 0; i < len; i++) {
182 if (src[i] == QLatin1Char('"') || src[i] == QLatin1Char('\\')) {
183 result += QLatin1Char('\\');
184 }
185 result += src[i];
186 }
187 //result.squeeze(); - unnecessary and slow
188 return result;
189}
190
191//-----------------------------------------------------------------------------
196
198{
199 unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
200 unsigned int ucs4, bitbuf;
201 QByteArray src = inSrc;
202 QByteArray dst;
203
204 int srcPtr = 0;
205 utf7mode = 0;
206 utf8total = 0;
207 bitstogo = 0;
208 utf8pos = 0;
209 bitbuf = 0;
210 ucs4 = 0;
211 while (srcPtr < src.length()) {
212 c = (unsigned char)src[srcPtr++];
213 /* normal character? */
214 if (c >= ' ' && c <= '~') {
215 /* switch out of UTF-7 mode */
216 if (utf7mode) {
217 if (bitstogo) {
218 dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
219 bitstogo = 0;
220 }
221 dst += '-';
222 utf7mode = 0;
223 }
224 dst += c;
225 /* encode '&' as '&-' */
226 if (c == '&') {
227 dst += '-';
228 }
229 continue;
230 }
231 /* switch to UTF-7 mode */
232 if (!utf7mode) {
233 dst += '&';
234 utf7mode = 1;
235 }
236 /* Encode US-ASCII characters as themselves */
237 if (c < 0x80) {
238 ucs4 = c;
239 utf8total = 1;
240 } else if (utf8total) {
241 /* save UTF8 bits into UCS4 */
242 ucs4 = (ucs4 << 6) | (c & 0x3FUL);
243 if (++utf8pos < utf8total) {
244 continue;
245 }
246 } else {
247 utf8pos = 1;
248 if (c < 0xE0) {
249 utf8total = 2;
250 ucs4 = c & 0x1F;
251 } else if (c < 0xF0) {
252 utf8total = 3;
253 ucs4 = c & 0x0F;
254 } else {
255 /* NOTE: can't convert UTF8 sequences longer than 4 */
256 utf8total = 4;
257 ucs4 = c & 0x03;
258 }
259 continue;
260 }
261 /* loop to split ucs4 into two utf16 chars if necessary */
262 utf8total = 0;
263 do {
264 if (ucs4 >= UTF16BASE) {
265 ucs4 -= UTF16BASE;
266 bitbuf =
267 (bitbuf << 16) | ((ucs4 >> UTF16SHIFT) + UTF16HIGHSTART);
268 ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART;
269 utf16flag = 1;
270 } else {
271 bitbuf = (bitbuf << 16) | ucs4;
272 utf16flag = 0;
273 }
274 bitstogo += 16;
275 /* spew out base64 */
276 while (bitstogo >= 6) {
277 bitstogo -= 6;
278 dst +=
279 base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F];
280 }
281 } while (utf16flag);
282 }
283 /* if in UTF-7 mode, finish in ASCII */
284 if (utf7mode) {
285 if (bitstogo) {
286 dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
287 }
288 dst += '-';
289 }
290 return quoteIMAP(dst);
291}
292
293//-----------------------------------------------------------------------------
294QTextCodec *KIMAP2::codecForName(const QString &str)
295{
296 if (str.isEmpty()) {
297 return Q_NULLPTR;
298 }
299 return QTextCodec::codecForName(str.toLower().
300 replace(QStringLiteral("windows"), QStringLiteral("cp")).toLatin1());
301}
302
303//-----------------------------------------------------------------------------
305{
306 QString throw_away;
307
308 return decodeRFC2047String(str, throw_away);
309}
310
311//-----------------------------------------------------------------------------
313 QString &charset)
314{
315 QString throw_away;
316
317 return decodeRFC2047String(str, charset, throw_away);
318}
319
320//-----------------------------------------------------------------------------
322 QString &charset,
323 QString &language)
324{
325 //do we have a rfc string
326 if (!str.contains(QStringLiteral("=?"))) {
327 return str;
328 }
329
330 // FIXME get rid of the conversion?
331 QByteArray aStr = str.toLatin1(); // QString.length() means Unicode chars
332 QByteArray result;
333 char *pos, *beg, *end, *mid = Q_NULLPTR;
334 QByteArray cstr;
335 char encoding = 0, ch;
336 bool valid;
337 const int maxLen = 200;
338 int i;
339
340// result.truncate(aStr.length());
341 for (pos = aStr.data(); *pos; pos++) {
342 if (pos[0] != '=' || pos[1] != '?') {
343 result += *pos;
344 continue;
345 }
346 beg = pos + 2;
347 end = beg;
348 valid = true;
349 // parse charset name
350 for (i = 2, pos += 2;
351 i < maxLen &&
352 (*pos != '?' && (ispunct(*pos) || isalnum(*pos)));
353 i++) {
354 pos++;
355 }
356 if (*pos != '?' || i < 4 || i >= maxLen) {
357 valid = false;
358 } else {
359 charset = QLatin1String(QByteArray(beg, i - 1)); // -2 + 1 for the zero
360 int pt = charset.lastIndexOf(QLatin1Char('*'));
361 if (pt != -1) {
362 // save language for later usage
363 language = charset.right(charset.length() - pt - 1);
364
365 // tie off language as defined in rfc2047
366 charset.truncate(pt);
367 }
368 // get encoding and check delimiting question marks
369 encoding = toupper(pos[1]);
370 if (pos[2] != '?' ||
371 (encoding != 'Q' && encoding != 'B' &&
372 encoding != 'q' && encoding != 'b')) {
373 valid = false;
374 }
375 pos += 3;
376 i += 3;
377// qCDebug(KIMAP2_LOG) << "Charset:" << charset << "- Language:" << language << "-'" << pos << "'";
378 }
379 if (valid) {
380 mid = pos;
381 // search for end of encoded part
382 while (i < maxLen && *pos && !(*pos == '?' && *(pos + 1) == '=')) {
383 i++;
384 pos++;
385 }
386 end = pos + 2;//end now points to the first char after the encoded string
387 if (i >= maxLen || !*pos) {
388 valid = false;
389 }
390 }
391 if (valid) {
392 ch = *pos;
393 *pos = '\0';
394 cstr = QByteArray(mid).left((int)(mid - pos - 1));
395 if (encoding == 'Q') {
396 // decode quoted printable text
397 for (i = cstr.length() - 1; i >= 0; --i) {
398 if (cstr[i] == '_') {
399 cstr[i] = ' ';
400 }
401 }
402// qCDebug(KIMAP2_LOG) << "before QP '"
403// << cstr << "'";
405// qCDebug(KIMAP2_LOG) << "after QP '"
406// << cstr << "'";
407 } else {
408 // decode base64 text
409 cstr = QByteArray::fromBase64(cstr);
410 }
411 *pos = ch;
412 int len = cstr.length();
413 for (i = 0; i < len; ++i) {
414 result += cstr[i];
415 }
416
417 pos = end - 1;
418 } else {
419// qCDebug(KIMAP2_LOG) << "invalid";
420 //result += "=?";
421 //pos = beg -1; // because pos gets increased shortly afterwards
422 pos = beg - 2;
423 result += *pos++;
424 result += *pos;
425 }
426 }
427 if (!charset.isEmpty()) {
428 QTextCodec *aCodec = codecForName(QLatin1String(charset.toLatin1()));
429 if (aCodec) {
430// qCDebug(KIMAP2_LOG) << "Codec is" << aCodec->name();
431 return aCodec->toUnicode(result);
432 }
433 }
434 return QLatin1String(result);
435}
436
437//-----------------------------------------------------------------------------
439{
440 return QLatin1String(encodeRFC2047String(str.toLatin1()));
441}
442
443//-----------------------------------------------------------------------------
445{
446 if (str.isEmpty()) {
447 return str;
448 }
449
450 const signed char *latin =
451 reinterpret_cast<const signed char *>
452 (str.data()), *l, *start, *stop;
453 char hexcode;
454 int numQuotes, i;
455 int rptr = 0;
456 // My stats show this number results in 12 resize() out of 73,000
457 int resultLen = 3 * str.length() / 2;
458 QByteArray result(resultLen, '\0');
459
460 while (*latin) {
461 l = latin;
462 start = latin;
463 while (*l) {
464 if (*l == 32) {
465 start = l + 1;
466 }
467 if (*l < 0) {
468 break;
469 }
470 l++;
471 }
472 if (*l) {
473 numQuotes = 1;
474 while (*l) {
475 /* The encoded word must be limited to 75 character */
476 for (i = 0; i < 16; ++i) {
477 if (*l == especials[i]) {
478 numQuotes++;
479 }
480 }
481 if (*l < 0) {
482 numQuotes++;
483 }
484 /* Stop after 58 = 75 - 17 characters or at "<user@host..." */
485 if (l - start + 2 * numQuotes >= 58 || *l == 60) {
486 break;
487 }
488 l++;
489 }
490 if (*l) {
491 stop = l - 1;
492 while (stop >= start && *stop != 32) {
493 stop--;
494 }
495 if (stop <= start) {
496 stop = l;
497 }
498 } else {
499 stop = l;
500 }
501 if (resultLen - rptr - 1 <= start - latin + 1 + 16) {
502 // =?iso-88...
503 resultLen += (start - latin + 1) * 2 + 20; // more space
504 result.resize(resultLen);
505 }
506 while (latin < start) {
507 result[rptr++] = *latin;
508 latin++;
509 }
510 result.replace(rptr, 15, "=?iso-8859-1?q?");
511 rptr += 15;
512 if (resultLen - rptr - 1 <= 3 * (stop - latin + 1)) {
513 resultLen += (stop - latin + 1) * 4 + 20; // more space
514 result.resize(resultLen);
515 }
516 while (latin < stop) {
517 // can add up to 3 chars/iteration
518 numQuotes = 0;
519 for (i = 0; i < 16; ++i) {
520 if (*latin == especials[i]) {
521 numQuotes = 1;
522 }
523 }
524 if (*latin < 0) {
525 numQuotes = 1;
526 }
527 if (numQuotes) {
528 result[rptr++] = '=';
529 hexcode = ((*latin & 0xF0) >> 4) + 48;
530 if (hexcode >= 58) {
531 hexcode += 7;
532 }
533 result[rptr++] = hexcode;
534 hexcode = (*latin & 0x0F) + 48;
535 if (hexcode >= 58) {
536 hexcode += 7;
537 }
538 result[rptr++] = hexcode;
539 } else {
540 result[rptr++] = *latin;
541 }
542 latin++;
543 }
544 result[rptr++] = '?';
545 result[rptr++] = '=';
546 } else {
547 while (*latin) {
548 if (rptr == resultLen - 1) {
549 resultLen += 30;
550 result.resize(resultLen);
551 }
552 result[rptr++] = *latin;
553 latin++;
554 }
555 }
556 }
557 result[rptr] = 0;
558 return result;
559}
560
561//-----------------------------------------------------------------------------
563{
564 if (str.isEmpty()) {
565 return str;
566 }
567
568 signed char *latin = (signed char *)calloc(1, str.length() + 1);
569 char *latin_us = (char *)latin;
570 strcpy(latin_us, str.toLatin1());
571 signed char *l = latin;
572 char hexcode;
573 int i;
574 bool quote;
575 while (*l) {
576 if (*l < 0) {
577 break;
578 }
579 l++;
580 }
581 if (!*l) {
582 free(latin);
583 return str;
584 }
585 QByteArray result;
586 l = latin;
587 while (*l) {
588 quote = *l < 0;
589 for (i = 0; i < 16; ++i) {
590 if (*l == especials[i]) {
591 quote = true;
592 }
593 }
594 if (quote) {
595 result += '%';
596 hexcode = ((*l & 0xF0) >> 4) + 48;
597 if (hexcode >= 58) {
598 hexcode += 7;
599 }
600 result += hexcode;
601 hexcode = (*l & 0x0F) + 48;
602 if (hexcode >= 58) {
603 hexcode += 7;
604 }
605 result += hexcode;
606 } else {
607 result += *l;
608 }
609 l++;
610 }
611 free(latin);
612 return QLatin1String(result);
613}
614
615//-----------------------------------------------------------------------------
617{
618 int p = str.indexOf(QLatin1Char('\''));
619
620 //see if it is an rfc string
621 if (p < 0) {
622 return str;
623 }
624
625 int l = str.lastIndexOf(QLatin1Char('\''));
626
627 //second is language
628 if (p >= l) {
629 return str;
630 }
631
632 //first is charset or empty
633 //QString charset = str.left ( p );
634 QString st = str.mid(l + 1);
635 //QString language = str.mid ( p + 1, l - p - 1 );
636
637 //qCDebug(KIMAP2_LOG) << "Charset:" << charset << "Language:" << language;
638
639 char ch, ch2;
640 p = 0;
641 while (p < (int) st.length()) {
642 if (st.at(p) == 37) {
643 ch = st.at(p + 1).toLatin1() - 48;
644 if (ch > 16) {
645 ch -= 7;
646 }
647 ch2 = st.at(p + 2).toLatin1() - 48;
648 if (ch2 > 16) {
649 ch2 -= 7;
650 }
651 st.replace(p, 1, ch * 16 + ch2);
652 st.remove(p + 1, 2);
653 }
654 p++;
655 }
656 return st;
657}
void stop(Ekos::AlignState mode)
Q_SCRIPTABLE Q_NOREPLY void start()
KCODECS_EXPORT QByteArray quotedPrintableDecode(QByteArrayView in)
const char * constData() const const
char * data()
QByteArray fromBase64(const QByteArray &base64, Base64Options options)
bool isEmpty() const const
QByteArray left(qsizetype len) const const
qsizetype length() const const
QByteArray & replace(QByteArrayView before, QByteArrayView after)
void reserve(qsizetype size)
void resize(qsizetype newSize, char c)
void squeeze()
char toLatin1() const const
const QChar at(qsizetype position) const const
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
QString fromUtf8(QByteArrayView str)
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
qsizetype lastIndexOf(QChar ch, Qt::CaseSensitivity cs) const const
qsizetype length() const const
QString mid(qsizetype position, qsizetype n) const const
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString & replace(QChar before, QChar after, Qt::CaseSensitivity cs)
void reserve(qsizetype size)
QString right(qsizetype n) const const
QByteArray toLatin1() const const
QString toLower() const const
QByteArray toUtf8() const const
void truncate(qsizetype position)
This file is part of the IMAP support library and defines the RfcCodecs class.
KIMAP2_EXPORT const QString encodeRFC2231String(const QString &str)
Encodes a RFC2231 string str.
KIMAP2_EXPORT QByteArray encodeImapFolderName(const QByteArray &src)
Converts an Unicode IMAP mailbox to a QByteArray which can be used in IMAP communication.
KIMAP2_EXPORT QByteArray decodeImapFolderName(const QByteArray &inSrc)
Converts an UTF-7 encoded IMAP mailbox to a QByteArray.
Definition rfccodecs.cpp:70
KIMAP2_EXPORT const QString decodeRFC2231String(const QString &str)
Decodes a RFC2231 string str.
KIMAP2_EXPORT QString quoteIMAP(const QString &src)
Replaces " with \" and \ with \\ " and \ characters.
KIMAP2_EXPORT const QString encodeRFC2047String(const QString &str)
Encodes a RFC2047 string str.
KIMAP2_EXPORT const QString decodeRFC2047String(const QString &str, QString &charset, QString &language)
Decodes a RFC2047 string str.
KIMAP2_EXPORT QTextCodec * codecForName(const QString &name)
Fetches a Codec by name.
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:59:41 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.