MauiKit Image Tools

ocs.cpp
1#include "ocs.h"
2#include <QImage>
3#include <QDebug>
4#include <QtConcurrent>
5#include <QFutureWatcher>
6
7#include <tesseract/baseapi.h>
8#include <leptonica/allheaders.h>
9#include "OCRLanguageModel.h"
10#if TESSERACT_MAJOR_VERSION < 5
11#include <tesseract/strngs.h>
12#include <tesseract/genericvector.h>
13#endif
14
15// #include "preprocessimage.hpp"
16#include <preprocessimage.hpp>
17#include <convertimage.hpp>
18
19//static cv::Mat qimageToMatRef(QImage &img, int format)
20// {
21// return cv::Mat(img.height(),
22// img.width(),
23// format,
24// img.bits(),
25// static_cast<size_t>(img.bytesPerLine()));
26// }
27// static cv::Mat qimageToMat(QImage img, int format)
28// {
29// return cv::Mat(img.height(),
30// img.width(),
31// format,
32// img.bits(),
33// static_cast<size_t>(img.bytesPerLine()));
34// }
35// static QImage matToQimageRef(cv::Mat &mat, QImage::Format format)
36// {
37// return QImage(mat.data, mat.cols, mat.rows, static_cast<int>(mat.step), format);
38// }
39// static QImage matToQimage(cv::Mat mat, QImage::Format format)
40// {
41// return QImage(mat.data, mat.cols, mat.rows, static_cast<int>(mat.step), format);
42// }
43
44OCS::OCS(QObject *parent) : QObject(parent)
45 ,m_tesseract(new tesseract::TessBaseAPI())
46 ,m_languages(new OCRLanguageModel(this))
47 ,m_boxesTypes(BoxType::Word | BoxType::Line | BoxType::Paragraph)
48 ,m_confidenceThreshold(50)
49 // ,m_whiteList("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz")
50{
51 std::vector<std::string> availableLanguages;
52#if TESSERACT_MAJOR_VERSION < 5
53 GenericVector<STRING> languageVector;
54 m_tesseract->GetAvailableLanguagesAsVector(&languageVector);
55 for (int i = 0; i < languageVector.size(); i++) {
56 availableLanguages.push_back(languageVector[i].c_str());
57 }
58#else
59 m_tesseract->GetAvailableLanguagesAsVector(&availableLanguages);
60#endif
61
62 m_languages->setLanguages(availableLanguages);
63}
64
65OCS::~OCS()
66{
67 m_tesseract->End();
68}
69
70QString OCS::filePath() const
71{
72 return m_filePath;
73}
74
75QRect OCS::area() const
76{
77 return m_area;
78}
79
80bool OCS::autoRead() const
81{
82 return m_autoRead;
83}
84
85void OCS::setAutoRead(bool value)
86{
87 if(m_autoRead == value)
88 return;
89
90 m_autoRead = value;
91 Q_EMIT autoReadChanged();
92}
93
94void OCS::setBoxesType(OCS::BoxesType types)
95{
96 if(m_boxesTypes == types)
97 return;
98
99
100 m_boxesTypes = types;
101 qDebug() << "Setting the boxes types" << m_boxesTypes << types;
102
103 Q_EMIT boxesTypeChanged();
104}
105
106void OCS::setConfidenceThreshold(float value)
107{
108 if(m_confidenceThreshold == value)
109 return;
110
111 m_confidenceThreshold = value;
112 Q_EMIT confidenceThresholdChanged();
113}
114
115int OCS::wordBoxAt(const QPoint point)
116{
117 int i = 0;
118 for(const auto &box : m_wordBoxes)
119 {
120 QRect rect = box["rect"].toRect();
121
122 qDebug() << "Rect: " << rect << "Point: " << point << rect.contains(point, true);
123
124 if(rect.contains(point))
125 return i;
126
127 i++;
128 }
129
130 return i;
131}
132
133QVector<int> OCS::wordBoxesAt(const QRect &rect)
134{
135 QVector<int> res;
136 int i = 0;
137 for(const auto &box : m_wordBoxes)
138 {
139 QRect rect_o = box["rect"].toRect();
140
141 if(rect.intersects(rect_o))
142 res << i;
143
144 i++;
145 }
146
147 return res;
148}
149
150void OCS::setWhiteList(const QString &value)
151{
152 if(value == m_whiteList)
153 return;
154
155 m_whiteList = value;
156 Q_EMIT whiteListChanged();
157}
158
159void OCS::setBlackList(const QString &value)
160{
161 if(value == m_blackList)
162 return;
163
164 m_blackList = value;
165 Q_EMIT blackListChanged();
166}
167
168void OCS::setPreprocessImage(bool value)
169{
170 if(m_preprocessImage == value)
171 return;
172
173 m_preprocessImage = value;
174
175 Q_EMIT preprocessImageChanged();
176}
177
178void OCS::setPageSegMode(PageSegMode value)
179{
180 if(m_segMode == value)
181 return;
182
183 m_segMode = value;
184 Q_EMIT pageSegModeChanged();
185}
186
187QString OCS::versionString()
188{
189 return QString::fromStdString(tesseract::TessBaseAPI::Version());
190}
191
192void OCS::do_preprocessImage(const QImage &image)
193{
194
195
196}
197
198static tesseract::PageSegMode mapPageSegValue(OCS::PageSegMode value)
199{
200 switch(value)
201 {
202 default:
203 case OCS::PageSegMode::Auto: return tesseract::PageSegMode::PSM_AUTO;
204 case OCS::PageSegMode::Auto_OSD: return tesseract::PageSegMode::PSM_AUTO_OSD;
205 case OCS::PageSegMode::SingleColumn: return tesseract::PageSegMode::PSM_SINGLE_COLUMN;
206 case OCS::PageSegMode::SingleLine: return tesseract::PageSegMode::PSM_SINGLE_LINE;
207 case OCS::PageSegMode::SingleBlock: return tesseract::PageSegMode::PSM_SINGLE_BLOCK;
208 case OCS::PageSegMode::SingleWord: return tesseract::PageSegMode::PSM_SINGLE_WORD;
209 }
210}
211
212void OCS::getTextAsync()
213{
214 if(!QUrl::fromUserInput(m_filePath).isLocalFile())
215 {
216 qDebug() << "URL is not local :: OCR";
217 return;
218 }
219 typedef QMap<BoxType, TextBoxes> Res;
220 auto func = [ocs = this](QUrl url, BoxesType levels) -> Res
221 {
222 tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
223 api->Init(NULL, "eng");
224
225 api->SetVariable("tessedit_char_whitelist",
226 ocs->m_whiteList.toStdString().c_str());
227 api->SetVariable("tessedit_char_blacklist",
228 ocs->m_blackList.toStdString().c_str());
229
230 api->SetPageSegMode(mapPageSegValue(ocs->m_segMode));
231
232 if(ocs->m_preprocessImage)
233 {
234 auto var = new QImage(url.toLocalFile());
235 auto m_imgMat = ConvertImage::qimageToMatRef(*var, CV_8UC4);
236
237 // PreprocessImage::toGray(m_imgMat,1);
238 PreprocessImage::adaptThreshold(m_imgMat, false, 3, 1);
239
240 auto m_ocrImg = ConvertImage::matToQimageRef(m_imgMat, QImage::Format_RGBA8888); //remember to delete
241
242 m_ocrImg.save("/home/camilo/"+QFileInfo(url.toLocalFile()).fileName());
243
244 api->SetImage(m_ocrImg.bits(), m_ocrImg.width(), m_ocrImg.height(), 4, m_ocrImg.bytesPerLine());
245 }else
246 {
247 // Pix *image = pixRead(url.toLocalFile().toStdString().c_str());
248 // api->SetImage(image);
249
250 ocs->m_ocrImg = new QImage(url.toLocalFile());
251 api->SetImage(ocs->m_ocrImg->bits(), ocs->m_ocrImg->width(), ocs->m_ocrImg->height(), 4,
252 ocs->m_ocrImg->bytesPerLine());
253 }
254
255
256 api->SetSourceResolution(200);
257
258 api->Recognize(0);
259
260 TextBoxes wordBoxes, lineBoxes, paragraphBoxes;
261
262 auto levelFunc = [ocs](tesseract::TessBaseAPI *api, tesseract::PageIteratorLevel level) -> TextBoxes
263 {
264 TextBoxes res;
265 tesseract::ResultIterator* ri = api->GetIterator();
266 if (ri != 0)
267 {
268 qDebug() << "Getting text for level" << level;
269 do
270 {
271 const char* word = ri->GetUTF8Text(level);
272 float conf = ri->Confidence(level);
273 int x1, y1, x2, y2;
274 ri->BoundingBox(level, &x1, &y1, &x2, &y2);
275
276 printf("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
277 word, conf, x1, y1, x2, y2);
278
279 if(conf > ocs->m_confidenceThreshold && !isspace(*word))
280 res << QVariantMap{{"text", QString::fromStdString(word)}, {"rect", QRect{x1, y1, x2-x1, y2-y1}}};
281 delete[] word;
282 } while (ri->Next(level));
283 }
284
285 return res;
286 };
287
288 if(levels.testFlag(Word))
289 wordBoxes = levelFunc(api, tesseract::RIL_WORD);
290
291 if(levels.testFlag(Line))
292 lineBoxes = levelFunc(api, tesseract::RIL_TEXTLINE);
293
294 if(levels.testFlag(Paragraph))
295 paragraphBoxes = levelFunc(api, tesseract::RIL_PARA);
296
297 api->End();
298
299 delete api;
300 return Res{{Word, wordBoxes}, {Line, lineBoxes}, {Paragraph, paragraphBoxes}};
301 };
302
303 auto watcher = new QFutureWatcher<Res>;
304 connect(watcher, &QFutureWatcher<Res>::finished, [this, watcher]()
305 {
306 // Q_EMIT textReady(watcher.future().result());
307 m_wordBoxes = watcher->result()[Word];
308 m_lineBoxes = watcher->result()[Line];
309 m_paragraphBoxes = watcher->result()[Paragraph];
310 Q_EMIT wordBoxesChanged();
311 Q_EMIT lineBoxesChanged();
312 Q_EMIT paragraphBoxesChanged();
313 watcher->deleteLater();
314 });
315
316 qDebug() << "GEtting text for boxes " << m_boxesTypes << m_boxesTypes.testFlag(Word);
317 QFuture<Res> future = QtConcurrent::run(func, QUrl::fromUserInput(m_filePath), m_boxesTypes);
318 watcher->setFuture(future);
319}
320
321QString OCS::getText()
322{
323 QUrl url(QUrl::fromUserInput(m_filePath));
324 if(!url.isLocalFile())
325 {
326 qDebug() << "URL is not local :: OCR";
327 return "Error!";
328 }
329
330 if (m_tesseract->Init(nullptr, m_languages->getLanguagesString().c_str()))
331 {
332 qDebug() << "Failed tesseract OCR init";
333 return "Error!";
334 }
335
336 m_tesseract->SetPageSegMode(tesseract::PSM_AUTO);
337
338 QString outText;
339
340 if(!m_area.isEmpty())
341 {
342 QImage img(url.toLocalFile());
343 img = img.copy(m_area);
344 // img = img.convertToFormat(QImage::Format_Grayscale8);
345
346 m_tesseract->SetImage(img.bits(), img.width(), img.height(), 4, img.bytesPerLine());
347
348 }else
349 {
350 Pix* im = pixRead(url.toLocalFile().toStdString().c_str());
351 m_tesseract->SetImage(im);
352 }
353
354 outText = QString::fromStdString(m_tesseract->GetUTF8Text());
355
356 return outText;
357}
358
359void OCS::setFilePath(QString filePath)
360{
361 if (m_filePath == filePath)
362 return;
363
364 m_filePath = filePath;
365 Q_EMIT filePathChanged(m_filePath);
366}
367
368void OCS::setArea(QRect area)
369{
370 if (m_area == area)
371 return;
372
373 m_area = area;
374 Q_EMIT areaChanged(m_area);
375}
376
377
378TextBoxes OCS::wordBoxes() const
379{
380 return m_wordBoxes;
381}
382
383TextBoxes OCS::paragraphBoxes() const
384{
385 return m_paragraphBoxes;
386}
387
388TextBoxes OCS::lineBoxes() const
389{
390 return m_lineBoxes;
391}
392
393OCS::BoxesType OCS::boxesType()
394{
395 return m_boxesTypes;
396}
397
398float OCS::confidenceThreshold()
399{
400 return m_confidenceThreshold;
401}
402
403QString OCS::whiteList() const
404{
405 return m_whiteList;
406}
407
408QString OCS::blackList() const
409{
410 return m_blackList;
411}
412
413OCS::PageSegMode OCS::pageSegMode() const
414{
415 return m_segMode;
416}
417
418bool OCS::preprocessImage() const
419{
420 return m_preprocessImage;
421}
422
423void OCS::classBegin()
424{
425}
426
427void OCS::componentComplete()
428{
429 qDebug() << "OCS CALSS COMPLETED IN QML";
430 connect(this, &OCS::filePathChanged, [this](QString)
431 {
432 if(m_autoRead)
433 {
434 getTextAsync();
435 }
436 });
437 getTextAsync();
438}
QStringView level(QStringView ifopt)
Q_EMITQ_EMIT
QMetaObject::Connection connect(const QObject *sender, PointerToMemberFunction signal, Functor functor)
bool contains(const QPoint &point, bool proper) const const
bool intersects(const QRect &rectangle) const const
QString fromStdString(const std::string &str)
QFuture< T > run(Function function,...)
QUrl fromUserInput(const QString &userInput, const QString &workingDirectory, UserInputResolutionOptions options)
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Apr 11 2025 11:57:09 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.