KTextAddons

voskspeechtotextdevice.cpp
1/*
2 SPDX-FileCopyrightText: 2023-2025 Laurent Montel <montel@kde.org>
3
4 SPDX-License-Identifier: GPL-2.0-or-later
5 based on VoiceAssistant plugin code
6*/
7
8#include "voskspeechtotextdevice.h"
9#include "libvoskspeechtotext_debug.h"
10#if HAVE_VOSK_API_SUPPORT
11#include "vosk_api.h"
12#endif
13#include <QJsonDocument>
14
15VoskSpeechToTextDevice::VoskSpeechToTextDevice(QObject *parent)
16 : QIODevice{parent}
17{
19 qCWarning(LIBVOSKSPEECHTOTEXT_LOG) << "Impossible to open VoskSpeechToTextDevice";
20#if HAVE_VOSK_API_SUPPORT
21 vosk_set_log_level(-1);
22#endif
23 }
24}
25
26VoskSpeechToTextDevice::~VoskSpeechToTextDevice()
27{
28#if HAVE_VOSK_API_SUPPORT
29 vosk_recognizer_free(mRecognizer);
30 vosk_model_free(mModel);
31#endif
32}
33
34bool VoskSpeechToTextDevice::available() const
35{
36#if HAVE_VOSK_API_SUPPORT
37 return true;
38#else
39 return false;
40#endif
41}
42
43bool VoskSpeechToTextDevice::isAsking() const
44{
45 return mIsAsking;
46}
47
48void VoskSpeechToTextDevice::setAsking(bool asking)
49{
50 if (mIsAsking != asking) {
51 mIsAsking = asking;
52 Q_EMIT askingChanged();
53 }
54}
55
56bool VoskSpeechToTextDevice::initialize(VoskSpeechToTextDeviceInfo &&info)
57{
58#if HAVE_VOSK_API_SUPPORT
59 mModel = vosk_model_new(QString(info.modelDir + info.formattedLang).toUtf8().constData());
60 if (mModel) {
61 mRecognizer = vosk_recognizer_new(mModel, info.sampleRate);
62 }
63
64 if (!mModel || !mRecognizer) {
65 return false;
66 }
67#endif
68 return true;
69}
70
71void VoskSpeechToTextDevice::clear()
72{
73#if HAVE_VOSK_API_SUPPORT
74 if (mRecognizer) {
75 vosk_recognizer_reset(mRecognizer);
76 }
77#endif
78}
79
80qint64 VoskSpeechToTextDevice::readData(char *data, qint64 maxlen)
81{
82 Q_UNUSED(data);
83 return maxlen;
84}
85
86qint64 VoskSpeechToTextDevice::writeData(const char *data, qint64 len)
87{
88#if HAVE_VOSK_API_SUPPORT
89 if (vosk_recognizer_accept_waveform(mRecognizer, data, (int)len)) {
90 parseText(vosk_recognizer_result(mRecognizer));
91 } else {
92 parsePartial(vosk_recognizer_partial_result(mRecognizer));
93 }
94#else
95 Q_UNUSED(data);
96#endif
97 return len;
98}
99
100void VoskSpeechToTextDevice::parseText(const char *json)
101{
102 const QJsonDocument obj = QJsonDocument::fromJson(json);
103 QString text = obj[QStringLiteral("text")].toString();
104
105 if (text.isEmpty())
106 return;
107 else if (mIsAsking) {
108 Q_EMIT result(text);
109 return;
110 }
111
112 text.append(u' ');
113
114 if (!text.contains(mWakeWord)) {
115 if (!mIsListiningBecauseOfWakeWord)
116 return;
117
118 Q_EMIT falsePositiveWakeWord();
119 mIsListiningBecauseOfWakeWord = false;
120 return;
121 }
122
123 text = text.mid(text.indexOf(mWakeWord) + mWakeWord.size());
124 text = text.trimmed();
125
126 Q_EMIT result(text);
127 qDebug() << "[debug] Text:" << text;
128 Q_EMIT doneListening();
129}
130
131void VoskSpeechToTextDevice::parsePartial(const char *json)
132{
133 const QJsonDocument obj = QJsonDocument::fromJson(json);
134 QString text = obj[QStringLiteral("partial")].toString();
135 if (text.isEmpty())
136 return;
137 text.append(u' ');
138
139 if (text.contains(mWakeWord)) {
140 Q_EMIT wakeWordDetected();
141 text = text.mid(text.indexOf(mWakeWord) + mWakeWord.size());
142 mIsListiningBecauseOfWakeWord = true;
143 } else if (mIsListiningBecauseOfWakeWord) {
144 Q_EMIT falsePositiveWakeWord();
145 mIsListiningBecauseOfWakeWord = false;
146 return;
147 } else if (!mIsAsking)
148 return;
149
150 Q_EMIT result(text);
151}
152
153QDebug operator<<(QDebug d, const VoskSpeechToTextDevice::VoskSpeechToTextDeviceInfo &t)
154{
155 d.space() << "sampleRate" << t.sampleRate;
156 d.space() << "modelDir" << t.modelDir;
157 d.space() << "formattedLang" << t.formattedLang;
158 return d;
159}
160
161#include "moc_voskspeechtotextdevice.cpp"
KGuiItem open()
QDebug & space()
QJsonDocument fromJson(const QByteArray &json, QJsonParseError *error)
Q_EMITQ_EMIT
QString & append(QChar ch)
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString mid(qsizetype position, qsizetype n) const const
QString trimmed() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 24 2025 11:49:24 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.