KTextEditor

kateindentdetecter.cpp
1/*
2 SPDX-FileCopyrightText: 2022 Waqar Ahmed <waqar.17a@gmail.com>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6#include "kateindentdetecter.h"
7
8#include "katedocument.h"
9
10KateIndentDetecter::KateIndentDetecter(KTextEditor::DocumentPrivate *doc)
11 : m_doc(doc)
12{
13}
14
15struct SpacesDiffResult {
16 int spacesDiff = 0;
17 bool looksLikeAlignment = false;
18};
19
20static SpacesDiffResult spacesDiff(const QString &a, int aLength, const QString &b, int bLength)
21{
22 SpacesDiffResult result;
23 result.spacesDiff = 0;
24 result.looksLikeAlignment = false;
25
26 // This can go both ways (e.g.):
27 // - a: "\t"
28 // - b: "\t "
29 // => This should count 1 tab and 4 spaces
30
31 int i = 0;
32
33 for (i = 0; i < aLength && i < bLength; i++) {
34 const auto aCharCode = a.at(i);
35 const auto bCharCode = b.at(i);
36
37 if (aCharCode != bCharCode) {
38 break;
39 }
40 }
41
42 int aSpacesCnt = 0;
43 int aTabsCount = 0;
44 for (int j = i; j < aLength; j++) {
45 const auto aCharCode = a.at(j);
46 if (aCharCode == QLatin1Char(' ')) {
47 aSpacesCnt++;
48 } else {
49 aTabsCount++;
50 }
51 }
52
53 int bSpacesCnt = 0;
54 int bTabsCount = 0;
55 for (int j = i; j < bLength; j++) {
56 const auto bCharCode = b.at(j);
57 if (bCharCode == QLatin1Char(' ')) {
58 bSpacesCnt++;
59 } else {
60 bTabsCount++;
61 }
62 }
63
64 if (aSpacesCnt > 0 && aTabsCount > 0) {
65 return result;
66 }
67 if (bSpacesCnt > 0 && bTabsCount > 0) {
68 return result;
69 }
70
71 const auto tabsDiff = std::abs(aTabsCount - bTabsCount);
72 const auto spacesDiff = std::abs(aSpacesCnt - bSpacesCnt);
73
74 if (tabsDiff == 0) {
75 // check if the indentation difference might be caused by alignment reasons
76 // sometime folks like to align their code, but this should not be used as a hint
77 result.spacesDiff = spacesDiff;
78
79 if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length() && bSpacesCnt < b.length()) {
80 if (b.at(bSpacesCnt) != QLatin1Char(' ') && a.at(bSpacesCnt - 1) == QLatin1Char(' ')) {
81 if (a.at(a.length() - 1) == QLatin1Char(',')) {
82 // This looks like an alignment desire: e.g.
83 // const a = b + c,
84 // d = b - c;
85 result.looksLikeAlignment = true;
86 }
87 }
88 }
89 return result;
90 }
91 if (spacesDiff % tabsDiff == 0) {
92 result.spacesDiff = spacesDiff / tabsDiff;
93 return result;
94 }
95 return result;
96}
97
98KateIndentDetecter::Result KateIndentDetecter::detect(int defaultTabSize, bool defaultInsertSpaces)
99{
100 // Look at most at the first 10k lines
101 const int linesCount = std::min(m_doc->lines(), 10000);
102
103 int linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation
104 int linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation
105
106 QString previousLineText; // content of latest line that contained non-whitespace chars
107 int previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char
108
109 constexpr int ALLOWED_TAB_SIZE_GUESSES[] = {2, 4, 6, 8, 3, 5, 7}; // prefer even guesses for `tabSize`, limit to [2, 8].
110 constexpr int MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8
111
112 int spacesDiffCount[MAX_ALLOWED_TAB_SIZE_GUESS + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; // `tabSize` scores
113 SpacesDiffResult tmp;
114
115 for (int lineNumber = 0; lineNumber < linesCount; lineNumber++) {
116 const QString currentLineText = m_doc->line(lineNumber);
117 const int currentLineLength = currentLineText.length();
118
119 bool currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars
120 int currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char
121 int currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation
122 int currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation
123 for (int j = 0, lenJ = currentLineLength; j < lenJ; j++) {
124 const auto charCode = currentLineText.at(j);
125
126 if (charCode == QLatin1Char('\t')) {
127 currentLineTabsCount++;
128 } else if (charCode == QLatin1Char(' ')) {
129 currentLineSpacesCount++;
130 } else {
131 // Hit non whitespace character on this line
132 currentLineHasContent = true;
133 currentLineIndentation = j;
134 break;
135 }
136 }
137
138 // Ignore empty or only whitespace lines
139 if (!currentLineHasContent) {
140 continue;
141 }
142
143 if (currentLineTabsCount > 0) {
144 linesIndentedWithTabsCount++;
145 } else if (currentLineSpacesCount > 1) {
146 linesIndentedWithSpacesCount++;
147 }
148
149 tmp = spacesDiff(previousLineText, previousLineIndentation, currentLineText, currentLineIndentation);
150
151 if (tmp.looksLikeAlignment) {
152 // if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation
153 //
154 // - item1
155 // - item2
156 //
157 // otherwise skip this line entirely
158 //
159 // const a = 1,
160 // b = 2;
161
162 if (!(defaultInsertSpaces && defaultTabSize == tmp.spacesDiff)) {
163 continue;
164 }
165 }
166
167 const int currentSpacesDiff = tmp.spacesDiff;
168 if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) {
169 spacesDiffCount[currentSpacesDiff]++;
170 }
171
172 previousLineText = currentLineText;
173 previousLineIndentation = currentLineIndentation;
174 }
175
176 bool insertSpaces = defaultInsertSpaces;
177 if (linesIndentedWithTabsCount != linesIndentedWithSpacesCount) {
178 insertSpaces = (linesIndentedWithTabsCount < linesIndentedWithSpacesCount);
179 }
180
181 int tabSize = defaultTabSize;
182
183 // Guess tabSize only if inserting spaces...
184 if (insertSpaces) {
185 int tabSizeScore = 0;
186 for (const int possibleTabSize : ALLOWED_TAB_SIZE_GUESSES) {
187 // prefer multiples of two, if ever found one, see bug 474505 and autotests/input/indent_detect/bogus7spaces.md
188 const int possibleTabSizeScore = spacesDiffCount[possibleTabSize];
189 if (possibleTabSizeScore > tabSizeScore && (possibleTabSize % 2 == 0 || tabSizeScore == 0)) {
190 tabSizeScore = possibleTabSizeScore;
191 tabSize = possibleTabSize;
192 }
193 }
194
195 // Let a tabSize of 2 win even if it is not the maximum
196 // (only in case 4 was guessed)
197 if (tabSize == 4 && spacesDiffCount[4] > 0 && spacesDiffCount[2] > 0 && spacesDiffCount[2] >= spacesDiffCount[4] / 2) {
198 tabSize = 2;
199 }
200
201 // If no indent detected, check if the file is 1 space indented
202 if (tabSizeScore == 0) {
203 const auto it = std::max_element(spacesDiffCount, spacesDiffCount + 9);
204 const auto maxIdx = std::distance(spacesDiffCount, it);
205 if (maxIdx == 1) {
206 tabSize = 1;
207 }
208 }
209 }
210
211 return {.indentWidth = tabSize, .indentUsingSpaces = insertSpaces};
212}
Backend of KTextEditor::Document related public KTextEditor interfaces.
QString line(int line) const override
Get a single text line.
int lines() const override
Get the count of lines of the document.
const QChar at(qsizetype position) const const
qsizetype length() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 12:00:26 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.