8#include "transaction.h"
10#include "documenturldb.h"
11#include "documentiddb.h"
12#include "positiondb.h"
13#include "documentdatadb.h"
16#include "enginequery.h"
18#include "andpostingiterator.h"
19#include "orpostingiterator.h"
20#include "phraseanditerator.h"
24#include "databasesize.h"
26#include "enginedebug.h"
35Transaction::Transaction(
const Database& db, Transaction::TransactionType type)
42void Transaction::reset(TransactionType type)
45 qWarning(ENGINE) <<
"Resetting a Transaction without calling abort/commit";
51void Transaction::init(TransactionType type)
53 uint flags =
type == ReadOnly ? MDB_RDONLY : 0;
54 int rc = mdb_txn_begin(m_env,
nullptr, flags, &m_txn);
56 qCDebug(ENGINE) <<
"Transaction" << mdb_strerror(rc);
60 if (type == ReadWrite) {
61 m_writeTrans = std::make_unique<WriteTransaction>(m_dbis, m_txn);
65Transaction::Transaction(Database* db, Transaction::TransactionType type)
66 : Transaction(*db,
type)
70Transaction::~Transaction()
73 qWarning(ENGINE) <<
"Closing an active WriteTransaction without calling abort/commit";
81bool Transaction::hasDocument(quint64
id)
const
85 DocumentUrlDB docUrlDb(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
86 return docUrlDb.contains(
id);
89bool Transaction::inPhaseOne(quint64
id)
const
92 DocumentIdDB contentIndexingDb(m_dbis.contentIndexingDbi, m_txn);
93 return contentIndexingDb.contains(
id);
96bool Transaction::hasFailed(quint64
id)
const
99 DocumentIdDB failedIdDb(m_dbis.failedIdDbi, m_txn);
100 return failedIdDb.contains(
id);
105 DocumentIdDB failedIdDb(m_dbis.failedIdDbi, m_txn);
106 return failedIdDb.fetchItems(limit);
109QByteArray Transaction::documentUrl(quint64
id)
const
114 DocumentUrlDB docUrlDb(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
115 return docUrlDb.get(
id);
118quint64 Transaction::documentId(
const QByteArray& path)
const
123 DocumentUrlDB docUrlDb(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
126 quint64 parentId = 0;
128 if (fileName.isEmpty()) {
132 parentId = docUrlDb.getId(parentId, fileName);
141DocumentTimeDB::TimeInfo Transaction::documentTimeInfo(quint64
id)
const
145 DocumentTimeDB docTimeDb(m_dbis.docTimeDbi, m_txn);
146 return docTimeDb.get(
id);
149QByteArray Transaction::documentData(quint64
id)
const
154 DocumentDataDB docDataDb(m_dbis.docDataDbi, m_txn);
155 return docDataDb.get(
id);
163 DocumentIdDB contentIndexingDb(m_dbis.contentIndexingDbi, m_txn);
164 return contentIndexingDb.fetchItems(size);
169 Q_ASSERT(term.
size() > 0);
171 PostingDB postingDb(m_dbis.postingDbi, m_txn);
172 return postingDb.fetchTermsStartingWith(term);
175uint Transaction::phaseOneSize()
const
179 DocumentIdDB contentIndexingDb(m_dbis.contentIndexingDbi, m_txn);
180 return contentIndexingDb.size();
183uint Transaction::size()
const
187 DocumentDB docTermsDb(m_dbis.docTermsDbi, m_txn);
188 return docTermsDb.size();
194void Transaction::setPhaseOne(quint64
id)
198 Q_ASSERT(m_writeTrans);
200 DocumentIdDB contentIndexingDb(m_dbis.contentIndexingDbi, m_txn);
201 contentIndexingDb.put(
id);
204void Transaction::removePhaseOne(quint64
id)
208 Q_ASSERT(m_writeTrans);
210 DocumentIdDB contentIndexingDb(m_dbis.contentIndexingDbi, m_txn);
211 contentIndexingDb.del(
id);
214void Transaction::addFailed(quint64
id)
218 Q_ASSERT(m_writeTrans);
220 DocumentIdDB failedIdDb(m_dbis.failedIdDbi, m_txn);
224void Transaction::addDocument(
const Document& doc)
227 Q_ASSERT(doc.id() > 0);
229 qCWarning(ENGINE) <<
"m_writeTrans is null";
233 m_writeTrans->addDocument(doc);
236void Transaction::removeDocument(quint64
id)
241 qCWarning(ENGINE) <<
"m_writeTrans is null";
245 m_writeTrans->removeDocument(
id);
248void Transaction::removeRecursively(quint64
id)
253 qCWarning(ENGINE) <<
"m_writeTrans is null";
257 m_writeTrans->removeRecursively(
id);
263 Q_ASSERT(doc.id() > 0);
264 Q_ASSERT(m_writeTrans);
265 if (!hasDocument(doc.id())) {
266 qCDebug(ENGINE) <<
"Transaction::replaceDocument" <<
"Document does not exist";
270 qCWarning(ENGINE) <<
"m_writeTrans is null";
274 m_writeTrans->replaceDocument(doc, operations);
277bool Transaction::commit()
281 qCWarning(ENGINE) <<
"m_writeTrans is null";
285 m_writeTrans->commit();
286 m_writeTrans.reset();
288 int rc = mdb_txn_commit(m_txn);
292 qCWarning(ENGINE) <<
"Transaction::commit" << mdb_strerror(rc);
299void Transaction::abort()
303 mdb_txn_abort(m_txn);
306 m_writeTrans.reset();
313PostingIterator* Transaction::postingIterator(
const EngineQuery& query)
const
315 PostingDB postingDb(m_dbis.postingDbi, m_txn);
316 PositionDB positionDb(m_dbis.positionDBi, m_txn);
319 if (
query.op() == EngineQuery::Equal) {
320 return postingDb.iter(
query.term());
321 }
else if (
query.op() == EngineQuery::StartsWith) {
322 return postingDb.prefixIter(
query.term());
328 const auto subQueries =
query.subQueries();
329 if (subQueries.isEmpty()) {
333 Q_ASSERT(
query.op() == EngineQuery::Phrase);
334 if (
query.op() == EngineQuery::Phrase) {
335 if (subQueries.size() == 1) {
336 qCDebug(ENGINE) <<
"Degenerated Phrase with 1 Term:" <<
query;
337 return postingIterator(subQueries[0]);
340 vec.
reserve(subQueries.size());
341 for (
const EngineQuery& q : subQueries) {
343 qCDebug(ENGINE) <<
"Transaction::toPostingIterator" <<
"Phrase subqueries must be leafs";
346 auto termMatch = positionDb.iter(q.term());
353 return new PhraseAndIterator(vec);
359PostingIterator* Transaction::postingCompIterator(
const QByteArray& prefix, qlonglong value, PostingDB::Comparator com)
const
361 PostingDB postingDb(m_dbis.postingDbi, m_txn);
362 return postingDb.compIter(prefix, value, com);
365PostingIterator* Transaction::postingCompIterator(
const QByteArray& prefix,
double value, PostingDB::Comparator com)
const
367 PostingDB postingDb(m_dbis.postingDbi, m_txn);
368 return postingDb.compIter(prefix, value, com);
373 PostingDB postingDb(m_dbis.postingDbi, m_txn);
374 return postingDb.compIter(prefix, value, com);
377PostingIterator* Transaction::mTimeRangeIter(quint32 beginTime, quint32 endTime)
const
379 MTimeDB mTimeDb(m_dbis.mtimeDbi, m_txn);
380 return mTimeDb.iterRange(beginTime, endTime);
385 DocumentUrlDB docUrlDb(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
386 return docUrlDb.iter(
id);
397 DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
398 return documentTermsDB.get(docId);
405 DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
406 return documentFileNameTermsDB.get(docId);
413 DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
414 return documentXattrTermsDB.get(docId);
420static size_t dbiSize(MDB_txn* txn, MDB_dbi dbi)
423 mdb_stat(txn, dbi, &stat);
425 return (
stat.ms_branch_pages +
stat.ms_leaf_pages +
stat.ms_overflow_pages) *
stat.ms_psize;
428DatabaseSize Transaction::dbSize()
431 dbSize.postingDb = dbiSize(m_txn, m_dbis.postingDbi);
432 dbSize.positionDb = dbiSize(m_txn, m_dbis.positionDBi);
433 dbSize.docTerms = dbiSize(m_txn, m_dbis.docTermsDbi);
434 dbSize.docFilenameTerms = dbiSize(m_txn, m_dbis.docFilenameTermsDbi);
435 dbSize.docXattrTerms = dbiSize(m_txn, m_dbis.docXattrTermsDbi);
437 dbSize.idTree = dbiSize(m_txn, m_dbis.idTreeDbi);
438 dbSize.idFilename = dbiSize(m_txn, m_dbis.idFilenameDbi);
440 dbSize.docTime = dbiSize(m_txn, m_dbis.docTimeDbi);
441 dbSize.docData = dbiSize(m_txn, m_dbis.docDataDbi);
443 dbSize.contentIndexingIds = dbiSize(m_txn, m_dbis.contentIndexingDbi);
444 dbSize.failedIds = dbiSize(m_txn, m_dbis.failedIdDbi);
446 dbSize.mtimeDb = dbiSize(m_txn, m_dbis.mtimeDbi);
448 dbSize.expectedSize = dbSize.postingDb + dbSize.positionDb + dbSize.docTerms + dbSize.docFilenameTerms
449 + dbSize.docXattrTerms + dbSize.idTree + dbSize.idFilename + dbSize.docTime
450 + dbSize.docData + dbSize.contentIndexingIds + dbSize.failedIds + dbSize.mtimeDb;
453 mdb_env_info(m_env, &info);
454 dbSize.actualSize = info.me_last_pgno * 4096;
462void Transaction::checkFsTree()
464 DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
465 DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
466 DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
467 DocumentUrlDB docUrlDb(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
468 PostingDB postingDb(m_dbis.postingDbi, m_txn);
470 const auto map = postingDb.toTestMap();
473 for (
const auto& list : map) {
474 for (quint64
id : list) {
479 std::cout <<
"Total Document IDs: " << allIds.
size() << std::endl;
482 for (quint64
id: std::as_const(allIds)) {
485 auto terms = documentTermsDB.get(
id);
486 auto fileNameTerms = documentFileNameTermsDB.get(
id);
487 auto xAttrTerms = documentXattrTermsDB.get(
id);
492 while (it.hasNext()) {
494 if (it.value().contains(
id)) {
495 newTerms << it.key();
499 std::cout <<
"Missing filePath for " <<
id << std::endl;
500 std::cout <<
"\tPostingDB Terms: ";
501 for (
const QByteArray& term : std::as_const(newTerms)) {
504 std::cout << std::endl;
506 std::cout <<
"\tDocumentTermsDB: ";
510 std::cout << std::endl;
512 std::cout <<
"\tFileNameTermsDB: ";
513 for (
const QByteArray& term : fileNameTerms) {
516 std::cout << std::endl;
518 std::cout <<
"\tXAttrTermsDB: ";
522 std::cout << std::endl;
526 std::cout <<
"FilePath " << qPrintable(
QString::fromUtf8(url)) <<
" for " <<
id <<
" does not exist"<< std::endl;
531 std::cout <<
"Invalid Entries: " << count <<
" (" << count * 100.0 / allIds.
size() <<
"%)" << std::endl;
534void Transaction::checkTermsDbinPostingDb()
536 DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
537 DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
538 DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
539 PostingDB postingDb(m_dbis.postingDbi, m_txn);
544 const auto map = postingDb.toTestMap();
547 for (
const auto& list : map) {
548 for (quint64
id : list) {
553 std::cout <<
"PostingDB check .." << std::endl;
554 for (quint64
id : std::as_const(allIds)) {
556 terms += documentXattrTermsDB.get(
id);
557 terms += documentFileNameTermsDB.get(
id);
559 for (
const QByteArray& term : std::as_const(terms)) {
562 std::cout <<
id <<
" is missing term " << qPrintable(
QString::fromUtf8(term)) << std::endl;
568void Transaction::checkPostingDbinTermsDb()
570 DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
571 DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
572 DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
573 PostingDB postingDb(m_dbis.postingDbi, m_txn);
578 std::cout <<
"DocumentTermsDB check .." << std::endl;
579 while (it.hasNext()) {
584 for (quint64
id : list) {
585 if (documentTermsDB.get(
id).contains(term)) {
588 if (documentFileNameTermsDB.get(
id).contains(term)) {
591 if (documentXattrTermsDB.get(
id).contains(term)) {
594 std::cout <<
id <<
" is missing " << qPrintable(
QString::fromUtf8(term)) <<
" from document terms db" << std::endl;
Implements storage for a set of s for the given docId Instantiated for:
The MTime DB maps the file mtime to its id.
The PostingDB is the main database that maps -> <id1> <id2> <id2> ... This is used to lookup ids whe...
A PostingIterator is an abstract base class which can be used to iterate over all the "postings" or "...
Type type(const QSqlDatabase &db)
Implements storage for docIds without any associated data Instantiated for:
KSERVICE_EXPORT KService::List query(FilterFunc filterFunc)
KIOCORE_EXPORT StatJob * stat(const QUrl &url, JobFlags flags=DefaultFlags)
QString path(const QString &relativePath)
KIOCORE_EXPORT QStringList list(const QString &fileClass)
bool isEmpty() const const
qsizetype size() const const
bool exists() const const
bool contains(const AT &value) const const
void reserve(qsizetype size)
T value(qsizetype i) const const
qsizetype size() const const
QString fromUtf8(QByteArrayView str)
bool isEmpty() const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
QFuture< void > map(Iterator begin, Iterator end, MapFunctor &&function)