Okular

textpage.h
1/*
2 SPDX-FileCopyrightText: 2005 Piotr Szymanski <niedakh@gmail.com>
3
4 SPDX-License-Identifier: GPL-2.0-or-later
5*/
6
7#ifndef _OKULAR_TEXTPAGE_H_
8#define _OKULAR_TEXTPAGE_H_
9
10#include <QList>
11#include <QString>
12
13#include "area.h"
14#include "global.h"
15#include "okularcore_export.h"
16
17class QTransform;
18
19namespace Okular
20{
21class NormalizedPoint;
22class NormalizedRect;
23class Page;
24class PagePrivate;
25class TextPagePrivate;
26class TextSelection;
27class RegularAreaRect;
28
29/*! @class TextEntity
30 * @short Represents a piece of text on a TextPage, containing its textual representation and its bounding box.
31 *
32 * To enable searching and text selection, a generator can give information about the textual
33 * content of a Page using a TextPage.
34 * A TextPage is created using TextEntity objects.
35 * A TextEntity can represent a single character/glyph, a word, a line, or even the whole page.
36 *
37 * Ideally, every single glyph is represented by its own TextEntity.
38 * If the textual representation of a graphical glyph contains more than one character,
39 * the TextEntity must contain the whole string which represents the glyph.
40 *
41 * When the Generator has created the TextPage, and it is added to a Page,
42 * the text entities are reordered to words, lines, and paragraphs, to optimize search and text selection.
43 * This way, the Generator does not need to care about the logical order of lines or paragraphs.
44 *
45 * @par Text Selection/Highlighting
46 * A TextEntity is the smallest piece of text, which the user can select, or which can be highlighted.
47 * That is, if the TextEntity represents a word, only the whole word can be selected.
48 * It would not be possible to select a single glyph of the word, because its bounding box is not known.
49 *
50 * @see TextPage, Generator
51 */
52class OKULARCORE_EXPORT TextEntity
53{
54public:
55 typedef QList<TextEntity> List;
56
57 /**
58 * Creates a new text entity with the given @p text and the
59 * given @p area.
60 */
61 TextEntity(const QString &text, const NormalizedRect &area);
62
63 /**
64 * Destroys the text entity.
65 */
67
68 /**
69 * Returns the text of the text entity.
70 */
71 QString text() const;
72
73 /**
74 * Returns the bounding area of the text entity.
75 */
76 NormalizedRect area() const;
77
78 /**
79 * Returns the transformed area of the text entity.
80 */
81 NormalizedRect transformedArea(const QTransform &matrix) const;
82
83private:
84 QString m_text;
85 NormalizedRect m_area;
86};
87
88/**
89 * @short Represents the textual information of a Page. Makes search and text selection possible.
90 *
91 * A Generator with text support should add a TextPage to every Page.
92 * For every piece of text, a TextEntity is added, holding the string representation and the bounding box.
93 *
94 * Ideally, every TextEntity describes only one glyph.
95 * A "glyph" is one character in the graphical representation, but the textual representation may consist of multiple characters (like diacritic modifiers).
96 *
97 * When the TextPage is added to the Page, the TextEntitys are restructured to optimize text selection.
98 *
99 * @see TextEntity
100 */
101class OKULARCORE_EXPORT TextPage
102{
103 /// @cond PRIVATE
104 friend class Page;
105 friend class PagePrivate;
106 /// @endcond
107
108public:
109 /**
110 * Defines the behaviour of adding characters to text() result
111 * @since 0.10 (KDE 4.4)
112 */
114 AnyPixelTextAreaInclusionBehaviour, ///< A character is included into text() result if any pixel of his bounding box is in the given area
115 CentralPixelTextAreaInclusionBehaviour ///< A character is included into text() result if the central pixel of his bounding box is in the given area
116 };
117
118 /**
119 * Creates a new text page.
120 */
121 TextPage();
122
123 /**
124 * Creates a new text page with the given @p words.
125 */
126 explicit TextPage(const TextEntity::List &words);
127
128 /**
129 * Destroys the text page.
130 */
131 ~TextPage();
132
133 /**
134 * Appends the given @p text with the given @p area as new
135 * @ref TextEntity to the page.
136 */
137 void append(const QString &text, const NormalizedRect &area);
138
139 /**
140 * Returns the bounding rect of the text which matches the following criteria
141 * or 0 if the search is not successful.
142 *
143 * @param searchID An unique id for this search.
144 * @param query The search text.
145 * @param direction The direction of the search (@ref SearchDirection)
146 * @param caseSensitivity If Qt::CaseSensitive, the search is case sensitive; otherwise
147 * the search is case insensitive.
148 * @param area If null the search starts at the beginning of the page, otherwise
149 * right/below the coordinates of the given rect.
150 */
151 RegularAreaRect *findText(int searchID, const QString &query, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area);
152
153 /**
154 * Text extraction function. Looks for text in the given @p area.
155 *
156 * @return
157 * - If @p area points to a valid null area, a null string.
158 * - If @p area is nullptr, the whole page text as a single string.
159 * - Otherwise, the text which is included by @p area, as a single string.
160 * Uses AnyPixelTextAreaInclusionBehaviour
161 */
162 QString text(const RegularAreaRect *area = nullptr) const;
163
164 /**
165 * Text extraction function. Looks for text in the given @p area.
166 *
167 * @return
168 * - If @p area points to a valid null area, a null string.
169 * - If @p area is nullptr, the whole page text as a single string.
170 * - Otherwise, the text which is included by @p area, as a single string.
171 * @since 0.10 (KDE 4.4)
172 */
173 QString text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const;
174
175 /**
176 * Text entity extraction function. Similar to text() but returns
177 * the words including their bounding rectangles. Note that
178 * ownership of the contents of the returned list belongs to the
179 * caller.
180 * @since 0.14 (KDE 4.8)
181 */
182 TextEntity::List words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const;
183
184 /**
185 * Returns the area and text of the word at the given point
186 * Note that ownership of the returned area belongs to the caller.
187 * @since 0.15 (KDE 4.9)
188 */
189 std::unique_ptr<RegularAreaRect> wordAt(const NormalizedPoint &p) const;
190
191 /**
192 * Returns the rectangular area of the given @p selection.
193 */
194 std::unique_ptr<RegularAreaRect> textArea(const TextSelection &selection) const;
195
196private:
197 TextPagePrivate *const d;
198
199 Q_DISABLE_COPY(TextPage)
200};
201
202}
203
204#endif
NormalizedPoint is a helper class which stores the coordinates of a normalized point.
Definition area.h:117
A NormalizedRect is a rectangle which can be defined by two NormalizedPoints.
Definition area.h:189
Collector for all the data belonging to a page.
Definition page.h:48
This is a list of NormalizedRect, to describe an area consisting of multiple rectangles using normali...
Definition area.h:927
Represents a piece of text on a TextPage, containing its textual representation and its bounding box.
Definition textpage.h:53
~TextEntity()
Destroys the text entity.
Represents the textual information of a Page.
Definition textpage.h:102
TextAreaInclusionBehaviour
Defines the behaviour of adding characters to text() result.
Definition textpage.h:113
@ AnyPixelTextAreaInclusionBehaviour
A character is included into text() result if any pixel of his bounding box is in the given area.
Definition textpage.h:114
Wrapper around the information needed to generate the selection area.
Definition misc.h:19
global.h
Definition action.h:17
SearchDirection
Describes the direction of searching.
Definition global.h:36
CaseSensitivity
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:58:07 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.