KMime

charfreq.cpp
Go to the documentation of this file.
1/*
2 kmime_charfreq.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 SPDX-FileCopyrightText: 2001-2002 Marc Mutz <mutz@kde.org>
6
7 SPDX-License-Identifier: LGPL-2.0-or-later
8*/
9
10/**
11 @file
12 This file is part of the API for handling MIME data and
13 defines the CharFreq class.
14
15 @brief
16 Defines the CharFreq class.
17
18 @authors Marc Mutz <mutz@kde.org>
19*/
20
21#include "charfreq_p.h"
22
23using namespace KMime;
24
25CharFreq::CharFreq(QByteArrayView buf)
26 : mNUL(0),
27 mCTL(0),
28 mCR(0), mLF(0),
29 mCRLF(0),
30 mPrintable(0),
31 mEightBit(0),
32 mTotal(0),
33 mLineMin(0xffffffff),
34 mLineMax(0)
35{
36 if (!buf.isEmpty()) {
37 count(buf.data(), buf.size());
38 }
39}
40
41static inline bool isWS(char ch)
42{
43 return (ch == '\t' || ch == ' ');
44}
45
46void CharFreq::count(const char *it, size_t len)
47{
48 const char *end = it + len;
49 uint currentLineLength = 0;
50 // initialize the prevChar with LF so that From_ detection works w/o
51 // special-casing:
52 char prevChar = '\n';
53 char prevPrevChar = 0;
54
55 for (; it != end ; ++it) {
56 ++currentLineLength;
57 switch (*it) {
58 case '\0': ++mNUL; break;
59 case '\r': ++mCR; break;
60 case '\n': ++mLF;
61 if (prevChar == '\r') {
62 --currentLineLength; ++mCRLF;
63 }
64 if (currentLineLength >= mLineMax) {
65 mLineMax = currentLineLength - 1;
66 }
67 if (currentLineLength <= mLineMin) {
68 mLineMin = currentLineLength - 1;
69 }
70 if (!mTrailingWS) {
71 if (isWS(prevChar) ||
72 (prevChar == '\r' && isWS(prevPrevChar))) {
73 mTrailingWS = true;
74 }
75 }
76 currentLineLength = 0;
77 break;
78 case 'F': // check for lines starting with From_ if not found already:
79 if (!mLeadingFrom) {
80 if (prevChar == '\n' && end - it >= 5 &&
81 !qstrncmp("From ", it, 5)) {
82 mLeadingFrom = true;
83 }
84 }
85 ++mPrintable;
86 break;
87 default: {
88 uchar c = *it;
89 if (c == '\t' || (c >= ' ' && c <= '~')) {
90 ++mPrintable;
91 } else if (c == 127 || c < ' ') {
92 ++mCTL;
93 } else {
94 ++mEightBit;
95 }
96 }
97 }
98 prevPrevChar = prevChar;
99 prevChar = *it;
100 }
101
102 // consider the length of the last line
103 if (currentLineLength >= mLineMax) {
104 mLineMax = currentLineLength;
105 }
106 if (currentLineLength <= mLineMin) {
107 mLineMin = currentLineLength;
108 }
109
110 // check whether the last character is tab or space
111 if (isWS(prevChar)) {
112 mTrailingWS = true;
113 }
114
115 mTotal = len;
116}
117
118bool CharFreq::isEightBitData() const
119{
120 return type() == EightBitData;
121}
122
123bool CharFreq::isEightBitText() const
124{
125 return type() == EightBitText;
126}
127
128bool CharFreq::isSevenBitData() const
129{
130 return type() == SevenBitData;
131}
132
133bool CharFreq::isSevenBitText() const
134{
135 return type() == SevenBitText;
136}
137
138bool CharFreq::hasTrailingWhitespace() const
139{
140 return mTrailingWS;
141}
142
143bool CharFreq::hasLeadingFrom() const
144{
145 return mLeadingFrom;
146}
147
148CharFreq::Type CharFreq::type() const
149{
150#if 0
151 qCDebug(KMIME_LOG)("Total: %d; NUL: %d; CTL: %d;\n"
152 "CR: %d; LF: %d; CRLF: %d;\n"
153 "lineMin: %d; lineMax: %d;\n"
154 "printable: %d; eightBit: %d;\n"
155 "trailing whitespace: %s;\n"
156 "leading 'From ': %s;\n",
157 total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax,
158 printable, eightBit,
159 mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no");
160#endif
161 if (mNUL) { // must be binary
162 return Binary;
163 }
164
165 // doesn't contain NUL's:
166 if (mEightBit) {
167 if (mLineMax > 988) {
168 return EightBitData; // not allowed in 8bit
169 }
170 if ((mLF != mCRLF && mCRLF > 0) || mCR != mCRLF || controlCodesRatio() > 0.2) {
171 return EightBitData;
172 }
173 return EightBitText;
174 }
175
176 // doesn't contain NUL's, nor 8bit chars:
177 if (mLineMax > 988) {
178 return SevenBitData;
179 }
180 if ((mLF != mCRLF && mCRLF > 0) || mCR != mCRLF || controlCodesRatio() > 0.2) {
181 return SevenBitData;
182 }
183
184 // no NUL, no 8bit chars, no excessive CTLs and no lines > 998 chars:
185 return SevenBitText;
186}
187
188float CharFreq::printableRatio() const
189{
190 if (mTotal) {
191 return float(mPrintable) / float(mTotal);
192 } else {
193 return 0;
194 }
195}
196
197float CharFreq::controlCodesRatio() const
198{
199 if (mTotal) {
200 return float(mCTL) / float(mTotal);
201 } else {
202 return 0;
203 }
204}
205
Type type(const QSqlDatabase &db)
const QList< QKeySequence > & end()
bool isEmpty() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:48:31 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.