KMime

codecs.cpp
1/*
2 kmime_codecs.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 SPDX-FileCopyrightText: 2001 the KMime authors.
6 See file AUTHORS for details
7
8 SPDX-License-Identifier: LGPL-2.0-or-later
9*/
10
11#include "codecs_p.h"
12#include "kmime_debug.h"
13
14#include <QStringEncoder>
15
16namespace KMime {
17
18static const char reservedCharacters[] = "\"()<>@,.;:\\[]=";
19
21 bool addressHeader)
22{
23 QByteArray result;
24 int start = 0;
25 int end = 0;
26 bool nonAscii = false;
27 bool useQEncoding = false;
28
29 // fromLatin1() is safe here, codecForName() uses toLatin1() internally
30 QStringEncoder codec(charset.constData());
31
32 QByteArray usedCS;
33 if (!codec.isValid()) {
34 //no codec available => try local8Bit and hope the best ;-)
36 usedCS = codec.name();
37 } else {
38 if (charset.isEmpty()) {
39 usedCS = codec.name();
40 } else {
41 usedCS = charset;
42 }
43 }
44
45 QByteArray encoded8Bit = codec.encode(src);
46 if (codec.hasError()) {
47 usedCS = "utf-8";
48 codec = QStringEncoder(usedCS.constData());
49 encoded8Bit = codec.encode(src);
50 }
51
52 if (usedCS.contains("8859-")) { // use "B"-Encoding for non iso-8859-x charsets
53 useQEncoding = true;
54 }
55
56 const auto encoded8BitLength = encoded8Bit.size();
57 for (int i = 0; i < encoded8BitLength; i++) {
58 if (encoded8Bit[i] == ' ') { // encoding starts at word boundaries
59 start = i + 1;
60 }
61
62 // encode escape character, for japanese encodings...
63 if (((signed char)encoded8Bit[i] < 0) || (encoded8Bit[i] == '\033') ||
64 (addressHeader && (strchr("\"()<>@,.;:\\[]=", encoded8Bit[i]) != nullptr))) {
65 end = start; // non us-ascii char found. Now we determine where to stop encoding
66 nonAscii = true;
67 break;
68 }
69 }
70
71 if (nonAscii) {
72 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
73 // we encode complete words
74 end++;
75 }
76
77 for (int x = end; x < encoded8Bit.length(); x++) {
78 if (((signed char)encoded8Bit[x] < 0) || (encoded8Bit[x] == '\033') ||
79 (addressHeader && (strchr(reservedCharacters, encoded8Bit[x]) != nullptr))) {
80 end = x; // we found another non-ascii word
81
82 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
83 // we encode complete words
84 end++;
85 }
86 }
87 }
88
89 result = encoded8Bit.left(start) + "=?" + usedCS;
90
91 if (useQEncoding) {
92 result += "?Q?";
93
94 char hexcode; // "Q"-encoding implementation described in RFC 2047
95 for (int i = start; i < end; i++) {
96 char c = encoded8Bit[i];
97 if (c == ' ') { // make the result readable with not MIME-capable readers
98 result += '_';
99 } else {
100 if (((c >= 'a') && (c <= 'z')) || // paranoid mode, encode *all* special chars to avoid problems
101 ((c >= 'A') && (c <= 'Z')) || // with "From" & "To" headers
102 ((c >= '0') && (c <= '9'))) {
103 result += c;
104 } else {
105 result += '='; // "stolen" from KMail ;-)
106 hexcode = ((c & 0xF0) >> 4) + 48;
107 if (hexcode >= 58) {
108 hexcode += 7;
109 }
110 result += hexcode;
111 hexcode = (c & 0x0F) + 48;
112 if (hexcode >= 58) {
113 hexcode += 7;
114 }
115 result += hexcode;
116 }
117 }
118 }
119 } else {
120 result += "?B?" + encoded8Bit.mid(start, end - start).toBase64();
121 }
122
123 result += "?=";
124 result += encoded8Bit.right(encoded8Bit.length() - end);
125 } else {
126 result = encoded8Bit;
127 }
128
129 return result;
130}
131
132QByteArray encodeRFC2047Sentence(QStringView src, const QByteArray &charset)
133{
134 QByteArray result;
135 const QChar *ch = src.constData();
136 const auto length = src.size();
137 qsizetype pos = 0;
138 qsizetype wordStart = 0;
139
140 //qCDebug(KMIME_LOG) << "Input:" << src;
141 // Loop over all characters of the string.
142 // When encountering a split character, RFC-2047-encode the word before it, and add it to the result.
143 while (pos < length) {
144 //qCDebug(KMIME_LOG) << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1();
145 const bool isAscii = ch->unicode() < 127;
146 const bool isReserved = (strchr(reservedCharacters, ch->toLatin1()) != nullptr);
147 if (isAscii && isReserved) {
148 const auto wordSize = pos - wordStart;
149 if (wordSize > 0) {
150 const auto word = src.mid(wordStart, wordSize);
151 result += encodeRFC2047String(word, charset);
152 }
153
154 result += ch->toLatin1();
155 wordStart = pos + 1;
156 }
157 ch++;
158 pos++;
159 }
160
161 // Encode the last word
162 const auto wordSize = pos - wordStart;
163 if (wordSize > 0) {
164 const auto word = src.mid(wordStart, pos - wordStart);
165 result += encodeRFC2047String(word, charset);
166 }
167
168 return result;
169}
170
171//-----------------------------------------------------------------------------
172QByteArray encodeRFC2231String(QStringView str, const QByteArray &charset)
173{
174 if (str.isEmpty()) {
175 return {};
176 }
177
178 QStringEncoder codec(charset.constData());
179 QByteArray latin;
180 if (charset == "us-ascii") {
181 latin = str.toLatin1();
182 } else if (codec.isValid()) {
183 latin = codec.encode(str);
184 } else {
185 latin = str.toLocal8Bit();
186 }
187
188 char *l;
189 for (l = latin.data(); *l; ++l) {
190 if (((*l & 0xE0) == 0) || (*l & 0x80)) {
191 // *l is control character or 8-bit char
192 break;
193 }
194 }
195 if (!*l) {
196 return latin;
197 }
198
199 QByteArray result = charset + "''";
200 for (l = latin.data(); *l; ++l) {
201 bool needsQuoting = (*l & 0x80) || (*l == '%');
202 if (!needsQuoting) {
203 constexpr const char especials[] = "()<>@,;:\"/[]?.= \033";
204 for (const auto especial :especials) {
205 if (*l == especial) {
206 needsQuoting = true;
207 break;
208 }
209 }
210 }
211 if (needsQuoting) {
212 result += '%';
213 unsigned char hexcode;
214 hexcode = ((*l & 0xF0) >> 4) + 48;
215 if (hexcode >= 58) {
216 hexcode += 7;
217 }
218 result += hexcode;
219 hexcode = (*l & 0x0F) + 48;
220 if (hexcode >= 58) {
221 hexcode += 7;
222 }
223 result += hexcode;
224 } else {
225 result += *l;
226 }
227 }
228 return result;
229}
230
231}
Q_SCRIPTABLE Q_NOREPLY void start()
KCODECS_EXPORT QByteArray encodeRFC2047String(QStringView src, const QByteArray &charset)
const QList< QKeySequence > & end()
const char * constData() const const
bool contains(QByteArrayView bv) const const
char * data()
bool isEmpty() const const
QByteArray left(qsizetype len) const const
qsizetype length() const const
QByteArray mid(qsizetype pos, qsizetype len) const const
QByteArray right(qsizetype len) const const
qsizetype size() const const
QByteArray toBase64(Base64Options options) const const
char toLatin1() const const
char16_t & unicode()
QStringView mid(qsizetype start, qsizetype length) const const
const_pointer constData() const const
bool isEmpty() const const
qsizetype size() const const
QByteArray toLatin1() const const
QByteArray toLocal8Bit() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 18 2024 12:18:08 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.