Md4qt

parser.h
Go to the documentation of this file.
1/*
2 SPDX-FileCopyrightText: 2022-2025 Igor Mironchik <igor.mironchik@gmail.com>
3 SPDX-License-Identifier: MIT
4*/
5
6#ifndef MD4QT_MD_PARSER_HPP_INCLUDED
7#define MD4QT_MD_PARSER_HPP_INCLUDED
8
9// md4qt include.
10#include "doc.h"
11#include "entities_map.h"
12#include "traits.h"
13#include "utils.h"
14
15#ifdef MD4QT_QT_SUPPORT
16
17// Qt include.
18#include <QDir>
19#include <QFile>
20#include <QTextStream>
21
22#endif // MD4QT_QT_SUPPORT
23
24#ifdef MD4QT_ICU_STL_SUPPORT
25
26// C++ include.
27#include <exception>
28
29#endif // MD4QT_ICU_STL_SUPPORT
30
31// C++ include.
32#include <algorithm>
33#include <cassert>
34#include <cmath>
35#include <fstream>
36#include <functional>
37#include <memory>
38#include <set>
39#include <tuple>
40#include <unordered_map>
41#include <vector>
42
43namespace MD
44{
45
46//! Starting HTML comment string.
47static const char *s_startComment = "<!--";
48
49//! \return Is \p indent indent belongs to list with previous \p indents indents.
50inline bool
51indentInList(const std::vector<long long int> *indents,
52 long long int indent,
53 bool codeIndentedBySpaces)
54{
55 if (indents && !indents->empty()) {
56 return (std::find_if(indents->cbegin(),
57 indents->cend(),
58 [indent, codeIndentedBySpaces](const auto &v) {
59 return (indent >= v && (codeIndentedBySpaces ?
60 true : indent <= v + 3));
61 })
62 != indents->cend());
63 } else {
64 return false;
65 }
66}
67
68//! Skip spaces in line from position \p i.
69template<class Trait>
70inline long long int
71skipSpaces(long long int i, const typename Trait::String &line)
72{
73 const auto length = line.length();
74
75 while (i < length && line[i].isSpace()) {
76 ++i;
77 }
78
79 return i;
80}
81
82//! \return Last non-space character position.
83template<class String>
84inline long long int
85lastNonSpacePos(const String &line)
86{
87 long long int i = line.length() - 1;
88
89 while (i >= 0 && line[i].isSpace()) {
90 --i;
91 }
92
93 return i;
94}
95
96//! Remove spaces at the end of string \p s.
97template<class String>
98inline void
100{
101 const auto i = lastNonSpacePos(s);
102
103 if (i != s.length() - 1) {
104 s.remove(i + 1, s.length() - i - 1);
105 }
106}
107
108//! \return Starting sequence of the same characters.
109template<class Trait>
110inline typename Trait::String
111startSequence(const typename Trait::String &line)
112{
113 auto pos = skipSpaces<Trait>(0, line);
114
115 if (pos >= line.length()) {
116 return {};
117 }
118
119 const auto sch = line[pos];
120 const auto start = pos;
121
122 ++pos;
123
124 while (pos < line.length() && line[pos] == sch) {
125 ++pos;
126 }
127
128 return line.sliced(start, pos - start);
129}
130
131//! \return Is string an ordered list.
132template<class Trait>
133inline bool
134isOrderedList(const typename Trait::String &s,
135 int *num = nullptr,
136 int *len = nullptr,
137 typename Trait::Char *delim = nullptr,
138 bool *isFirstLineEmpty = nullptr)
139{
140 long long int p = skipSpaces<Trait>(0, s);
141
142 long long int dp = p;
143
144 for (; p < s.size(); ++p) {
145 if (!s[p].isDigit()) {
146 break;
147 }
148 }
149
150 if (dp != p && p < s.size()) {
151 const auto digits = s.sliced(dp, p - dp);
152
153 if (digits.size() > 9) {
154 return false;
155 }
156
157 const auto i = digits.toInt();
158
159 if (num) {
160 *num = i;
161 }
162
163 if (len) {
164 *len = p - dp;
165 }
166
167 if (s[p] == Trait::latin1ToChar('.') || s[p] == Trait::latin1ToChar(')')) {
168 if (delim) {
169 *delim = s[p];
170 }
171
172 ++p;
173
174 long long int tmp = skipSpaces<Trait>(p, s);
175
176 if (isFirstLineEmpty) {
177 *isFirstLineEmpty = (tmp == s.size());
178 }
179
180 if ((p < s.size() && s[p] == Trait::latin1ToChar(' ')) || p == s.size()) {
181 return true;
182 }
183 }
184 }
185
186 return false;
187}
188
189//
190// RawHtmlBlock
191//
192
193//! Internal structure for pre-storing HTML.
194template<class Trait>
196 std::shared_ptr<RawHtml<Trait>> m_html = {};
197 std::shared_ptr<Block<Trait>> m_parent = {};
198 std::shared_ptr<Block<Trait>> m_topParent = {};
199 using SequenceOfBlock = std::vector<std::pair<std::shared_ptr<Block<Trait>>, long long int>>;
201 std::unordered_map<std::shared_ptr<Block<Trait>>, SequenceOfBlock> m_toAdjustLastPos = {};
203 bool m_continueHtml = false;
204 bool m_onLine = false;
205
206 std::shared_ptr<Block<Trait>>
207 findParent(long long int indent) const
208 {
209 for (auto it = m_blocks.crbegin(), last = m_blocks.crend(); it != last; ++it) {
210 if (indent >= it->second) {
211 return it->first;
212 }
213 }
214
215 return nullptr;
216 }
217}; // struct RawHtmlBlock
218
219//! Reset pre-stored HTML.
220template<class Trait>
222{
223 html.m_html.reset();
224 html.m_parent.reset();
225 html.m_htmlBlockType = -1;
226 html.m_continueHtml = false;
227 html.m_onLine = false;
228}
229
230//
231// MdLineData
232//
233
234//! Internal structure for auxiliary information about a line in Markdown.
236 long long int m_lineNumber = -1;
237 using CommentData = std::pair<char, bool>;
238 using CommentDataMap = std::map<long long int, CommentData>;
239 // std::pair< closed, valid >
241 // May this line break a list?
242 bool m_mayBreakList = false;
243}; // struct MdLineData
244
245//
246// MdBlock
247//
248
249//! Internal structure for block of text in Markdown.
250template<class Trait>
251struct MdBlock {
252 using Line = std::pair<typename Trait::InternalString, MdLineData>;
253 using Data = typename Trait::template Vector<Line>;
254
256 long long int m_emptyLinesBefore = 0;
257 bool m_emptyLineAfter = true;
258}; // struct MdBlock
259
260//
261// StringListStream
262//
263
264//! Wrapper for typename Trait::StringList to be behaved like a stream.
265template<class Trait>
267{
268public:
270 : m_stream(stream)
271 , m_pos(0)
272 {
273 }
274
275 bool atEnd() const
276 {
277 return (m_pos >= (long long int)m_stream.size());
278 }
279
280 std::pair<typename Trait::InternalString, bool> readLine()
281 {
282 const std::pair<typename Trait::InternalString, bool> ret =
283 {m_stream.at(m_pos).first, m_stream.at(m_pos).second.m_mayBreakList};
284
285 ++m_pos;
286
287 return ret;
288 }
289
290 long long int currentLineNumber() const
291 {
292 return (m_pos < size() ? m_stream.at(m_pos).second.m_lineNumber : size());
293 }
294
295 typename Trait::InternalString lineAt(long long int pos)
296 {
297 return m_stream.at(pos).first;
298 }
299
300 long long int size() const
301 {
302 return m_stream.size();
303 }
304
305 void setLineNumber(long long int lineNumber)
306 {
307 m_pos = 0;
308
309 m_pos += lineNumber - currentLineNumber();
310 }
311
312private:
313 typename MdBlock<Trait>::Data &m_stream;
314 long long int m_pos;
315}; // class StringListStream
316
317//! \return Is string a footnote?
318template<class Trait>
319inline bool
320isFootnote(const typename Trait::String &s)
321{
322 long long int p = skipSpaces<Trait>(0, s);
323
324 if (s.size() - p < 5) {
325 return false;
326 }
327
328 if (s[p++] != Trait::latin1ToChar('[')) {
329 return false;
330 }
331
332 if (s[p++] != Trait::latin1ToChar('^')) {
333 return false;
334 }
335
336 if (s[p] == Trait::latin1ToChar(']') || s[p].isSpace()) {
337 return false;
338 }
339
340 for (; p < s.size(); ++p) {
341 if (s[p] == Trait::latin1ToChar(']')) {
342 break;
343 } else if (s[p].isSpace()) {
344 return false;
345 }
346 }
347
348 ++p;
349
350 if (p < s.size() && s[p] == Trait::latin1ToChar(':')) {
351 return true;
352 } else {
353 return false;
354 }
355}
356
357//! \return Is string a code fences?
358template<class Trait>
359inline bool
360isCodeFences(const typename Trait::String &s, bool closing = false)
361{
362 auto p = skipSpaces<Trait>(0, s);
363
364 if (p > 3 || p == s.length()) {
365 return false;
366 }
367
368 const auto ch = s[p];
369
370 if (ch != Trait::latin1ToChar('~') && ch != Trait::latin1ToChar('`')) {
371 return false;
372 }
373
374 bool space = false;
375
376 long long int c = 1;
377 ++p;
378
379 for (; p < s.length(); ++p) {
380 if (s[p].isSpace()) {
381 space = true;
382 } else if (s[p] == ch) {
383 if (space && (closing ? true : ch == Trait::latin1ToChar('`'))) {
384 return false;
385 }
386
387 if (!space) {
388 ++c;
389 }
390 } else if (closing) {
391 return false;
392 } else {
393 break;
394 }
395 }
396
397 if (c < 3) {
398 return false;
399 }
400
401 if (ch == Trait::latin1ToChar('`')) {
402 for (; p < s.length(); ++p) {
403 if (s[p] == Trait::latin1ToChar('`')) {
404 return false;
405 }
406 }
407 }
408
409 return true;
410}
411
412//! Skip escaped sequence of characters till first space.
413template<class Trait>
414inline typename Trait::String
415readEscapedSequence(long long int i,
416 const typename Trait::String &str,
417 long long int *endPos = nullptr)
418{
419 bool backslash = false;
420 const auto start = i;
421
422 if (start >= str.length()) {
423 return {};
424 }
425
426 while (i < str.length()) {
427 bool now = false;
428
429 if (str[i] == Trait::latin1ToChar('\\') && !backslash) {
430 backslash = true;
431 now = true;
432 } else if (str[i].isSpace() && !backslash) {
433 break;
434 }
435
436 if (!now) {
437 backslash = false;
438 }
439
440 ++i;
441 }
442
443 if (endPos) {
444 *endPos = i - 1;
445 }
446
447 return str.sliced(start, i - start);
448}
449
450//! Characters that can be escaped.
451template<class Trait>
452static const typename Trait::String s_canBeEscaped =
453 Trait::latin1ToString("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
454
455//! Remove backslashes from the string.
456template<class String, class Trait>
457inline String
458removeBackslashes(const String &s)
459{
460 String r = s;
461 bool backslash = false;
462 long long int extra = 0;
463
464 for (long long int i = 0; i < s.length(); ++i) {
465 bool now = false;
466
467 if (s[i] == Trait::latin1ToChar('\\') && !backslash && i != s.length() - 1) {
468 backslash = true;
469 now = true;
470 } else if (s_canBeEscaped<Trait>.contains(s[i]) && backslash) {
471 r.remove(i - extra - 1, 1);
472 ++extra;
473 }
474
475 if (!now) {
476 backslash = false;
477 }
478 }
479
480 return r;
481}
482
483//! \return Is string a start of code?
484template<class Trait>
485inline bool
486isStartOfCode(const typename Trait::String &str,
487 typename Trait::String *syntax = nullptr,
488 WithPosition *delim = nullptr,
489 WithPosition *syntaxPos = nullptr)
490{
491 long long int p = skipSpaces<Trait>(0, str);
492
493 if (delim) {
494 delim->setStartColumn(p);
495 }
496
497 if (p > 3) {
498 return false;
499 }
500
501 if (str.size() - p < 3) {
502 return false;
503 }
504
505 const bool c96 = str[p] == Trait::latin1ToChar('`');
506 const bool c126 = str[p] == Trait::latin1ToChar('~');
507
508 if (c96 || c126) {
509 ++p;
510 long long int c = 1;
511
512 while (p < str.length()) {
513 if (str[p] != (c96 ? Trait::latin1ToChar('`') : Trait::latin1ToChar('~'))) {
514 break;
515 }
516
517 ++c;
518 ++p;
519 }
520
521 if (delim) {
522 delim->setEndColumn(p - 1);
523 }
524
525 if (c < 3) {
526 return false;
527 }
528
529 if (syntax) {
530 p = skipSpaces<Trait>(p, str);
531 long long int endSyntaxPos = p;
532
533 if (p < str.size()) {
535 readEscapedSequence<Trait>(p, str, &endSyntaxPos));
536
537 if (syntaxPos) {
538 syntaxPos->setStartColumn(p);
539 syntaxPos->setEndColumn(endSyntaxPos);
540 }
541 }
542 }
543
544 return true;
545 }
546
547 return false;
548}
549
550//! \return Is string a horizontal line?
551template<class Trait>
552inline bool
553isHorizontalLine(const typename Trait::String &s)
554{
555 if (s.size() < 3) {
556 return false;
557 }
558
559 typename Trait::Char c;
560
561 if (s[0] == Trait::latin1ToChar('*')) {
562 c = Trait::latin1ToChar('*');
563 } else if (s[0] == Trait::latin1ToChar('-')) {
564 c = Trait::latin1ToChar('-');
565 } else if (s[0] == Trait::latin1ToChar('_')) {
566 c = Trait::latin1ToChar('_');
567 } else {
568 return false;
569 }
570
571 long long int p = 1;
572 long long int count = 1;
573
574 for (; p < s.size(); ++p) {
575 if (s[p] != c && !s[p].isSpace()) {
576 break;
577 } else if (s[p] == c) {
578 ++count;
579 }
580 }
581
582 if (count < 3) {
583 return false;
584 }
585
586 if (p == s.size()) {
587 return true;
588 }
589
590 return false;
591}
592
593//! \return Is string a column alignment?
594template<class Trait>
595inline bool
596isColumnAlignment(const typename Trait::String &s)
597{
598 long long int p = skipSpaces<Trait>(0, s);
599
600 static const typename Trait::String s_legitime = Trait::latin1ToString(":-");
601
602 if (p >= s.length()) {
603 return false;
604 }
605
606 if (!s_legitime.contains(s[p])) {
607 return false;
608 }
609
610 if (s[p] == Trait::latin1ToChar(':')) {
611 ++p;
612 }
613
614 for (; p < s.size(); ++p) {
615 if (s[p] != Trait::latin1ToChar('-')) {
616 break;
617 }
618 }
619
620 if (p == s.size()) {
621 return true;
622 }
623
624 if (s[p] != Trait::latin1ToChar(':') && !s[p].isSpace()) {
625 return false;
626 }
627
628 ++p;
629
630 for (; p < s.size(); ++p) {
631 if (!s[p].isSpace()) {
632 return false;
633 }
634 }
635
636 return true;
637}
638
639//! Split string.
640template<class Trait>
641typename Trait::StringList
642splitString(const typename Trait::String &str, const typename Trait::Char &ch);
643
644#ifdef MD4QT_ICU_STL_SUPPORT
645
646template<>
649{
650 return str.split(ch);
651}
652
653#endif
654
655#ifdef MD4QT_QT_SUPPORT
656
657template<>
659splitString<QStringTrait>(const QString &str, const QChar &ch)
660{
661 return str.split(ch, Qt::SkipEmptyParts);
662}
663
664#endif
665
666//! \return Number of columns?
667template<class Trait>
668inline int
669isTableAlignment(const typename Trait::String &s)
670{
671 const auto columns = splitString<Trait>(s.simplified(), Trait::latin1ToChar('|'));
672
673 for (const auto &c : columns) {
674 if (!isColumnAlignment<Trait>(c)) {
675 return 0;
676 }
677 }
678
679 return columns.size();
680}
681
682//! \return Is given string a HTML comment.
683template<class Trait>
684inline bool
685isHtmlComment(const typename Trait::String &s)
686{
687 auto c = s;
688
689 if (s.startsWith(Trait::latin1ToString(s_startComment))) {
690 c.remove(0, 4);
691 } else {
692 return false;
693 }
694
695 long long int p = -1;
696 bool endFound = false;
697
698 while ((p = c.indexOf(Trait::latin1ToString("--"), p + 1)) > -1) {
699 if (c.size() > p + 2 && c[p + 2] == Trait::latin1ToChar('>')) {
700 if (!endFound) {
701 endFound = true;
702 } else {
703 return false;
704 }
705 } else if (p - 2 >= 0 && c.sliced(p - 2, 4) == Trait::latin1ToString("<!--")) {
706 return false;
707 } else if (c.size() > p + 3 && c.sliced(p, 4) == Trait::latin1ToString("--!>")) {
708 return false;
709 }
710 }
711
712 return endFound;
713}
714
715//! Replace entities in the string with corresponding character.
716template<class Trait>
717inline typename Trait::String
718replaceEntity(const typename Trait::String &s)
719{
720 long long int p1 = 0;
721
722 typename Trait::String res;
723 long long int i = 0;
724
725 while ((p1 = s.indexOf(Trait::latin1ToChar('&'), p1)) != -1) {
726 if (p1 > 0 && s[p1 - 1] == Trait::latin1ToChar('\\')) {
727 ++p1;
728
729 continue;
730 }
731
732 const auto p2 = s.indexOf(Trait::latin1ToChar(';'), p1);
733
734 if (p2 != -1) {
735 const auto en = s.sliced(p1, p2 - p1 + 1);
736
737 if (en.size() > 2 && en[1] == Trait::latin1ToChar('#')) {
738 if (en.size() > 3 && en[2].toLower() == Trait::latin1ToChar('x')) {
739 const auto hex = en.sliced(3, en.size() - 4);
740
741 if (hex.size() <= 6 && hex.size() > 0) {
742 bool ok = false;
743
744 const char32_t c = hex.toInt(&ok, 16);
745
746 if (ok) {
747 res.push_back(s.sliced(i, p1 - i));
748 i = p2 + 1;
749
750 if (c) {
751 Trait::appendUcs4(res, c);
752 } else {
753 res.push_back(typename Trait::Char(0xFFFD));
754 }
755 }
756 }
757 } else {
758 const auto dec = en.sliced(2, en.size() - 3);
759
760 if (dec.size() <= 7 && dec.size() > 0) {
761 bool ok = false;
762
763 const char32_t c = dec.toInt(&ok, 10);
764
765 if (ok) {
766 res.push_back(s.sliced(i, p1 - i));
767 i = p2 + 1;
768
769 if (c) {
770 Trait::appendUcs4(res, c);
771 } else {
772 res.push_back(typename Trait::Char(0xFFFD));
773 }
774 }
775 }
776 }
777 } else {
778 const auto it = s_entityMap<Trait>.find(en);
779
780 if (it != s_entityMap<Trait>.cend()) {
781 res.push_back(s.sliced(i, p1 - i));
782 i = p2 + 1;
783 res.push_back(Trait::utf16ToString(it->second));
784 }
785 }
786 } else {
787 break;
788 }
789
790 p1 = p2 + 1;
791 }
792
793 res.push_back(s.sliced(i, s.size() - i));
794
795 return res;
796}
797
798//! Remove backslashes in block.
799template<class Trait>
800inline typename MdBlock<Trait>::Data
802{
803 auto tmp = d;
804
805 for (auto &line : tmp) {
807 }
808
809 return tmp;
810}
811
812//! Type of the paragraph's optimization.
814 //! Full optimization.
816 //! Semi optimization, optimization won't concatenate text
817 //! items if style delimiters will be in the middle.
819 //! Full optimization, but raw text data won't be concatenated (will be untouched).
821 //! Semi optimization, but raw text data won't be concatenated (will be untouched).
823};
824
825//
826// TextPlugin
827//
828
829//! ID of text plugin.
830enum TextPlugin : int {
831 //! Unknown plugin.
833 //! GitHub's autolinks plugin.
835 //! First user defined plugin ID.
837}; // enum TextPlugin
838
839//
840// Style
841//
842
843//! Emphasis type.
844enum class Style {
845 //! "*"
847 //! "_"
849 //! "**"
851 //! "__"
853 //! "~"
855 //! Unknown.
857};
858
859//! \return Text option from style.
860inline TextOption
862{
863 switch (s) {
864 case Style::Italic1:
865 case Style::Italic2:
866 return ItalicText;
867
868 case Style::Bold1:
869 case Style::Bold2:
870 return BoldText;
871
873 return StrikethroughText;
874
875 default:
876 return TextWithoutFormat;
877 }
878}
879
880//
881// TextPluginFunc
882//
883
884template<class Trait>
885struct TextParsingOpts;
886
887//! Functor type for text plugin.
888template<class Trait>
889using TextPluginFunc = std::function<void(std::shared_ptr<Paragraph<Trait>>,
891 const typename Trait::StringList &)>;
892
893//
894// TextPluginsMap
895//
896
897//! Type of the map of text plugins.
898template<class Trait>
899using TextPluginsMap = std::map<int, std::tuple<TextPluginFunc<Trait>,
900 bool,
901 typename Trait::StringList>>;
902
903//
904// TextParsingOpts
905//
906
907//! Internal structure for auxiliary options for parser.
908template<class Trait>
911 std::shared_ptr<Block<Trait>> m_parent;
912 std::shared_ptr<RawHtml<Trait>> m_tmpHtml;
913 std::shared_ptr<Document<Trait>> m_doc;
914 typename Trait::StringList &m_linksToParse;
915 typename Trait::String m_workingPath;
916 typename Trait::String m_fileName;
921 std::shared_ptr<Text<Trait>> m_lastText = {};
922 bool m_wasRefLink = false;
925
926 struct TextData {
927 typename Trait::String m_str;
928 long long int m_pos = -1;
929 long long int m_line = -1;
930 };
931
932 std::vector<TextData> m_rawTextData = {};
933
934 inline void
935 concatenateAuxText(long long int start, long long int end)
936 {
937 if (start < end && (end - start > 1)) {
938 for (auto i = start + 1; i < end; ++i) {
939 m_rawTextData[start].m_str += m_rawTextData[i].m_str;
940 }
941
942 m_rawTextData.erase(m_rawTextData.cbegin() + start + 1, m_rawTextData.cbegin() + end);
943 }
944 }
945
946 enum class Detected { Nothing = 0, Table = 1, HTML = 2, List = 3, Code = 4 }; // enum class Detected
947
949
950 inline bool
952 {
953 switch (m_detected) {
954 case Detected::Table:
955 case Detected::List:
956 case Detected::Code:
957 return true;
958
959 default:
960 return false;
961 }
962 }
963
964 long long int m_line = 0;
965 long long int m_pos = 0;
966 long long int m_startTableLine = -1;
967 long long int m_lastTextLine = -1;
968 long long int m_lastTextPos = -1;
971
972 struct StyleInfo {
974 long long int m_length;
976 };
977
978 std::vector<StyleInfo> m_styles = {};
980 std::shared_ptr<ItemWithOpts<Trait>> m_lastItemWithStyle = nullptr;
981}; // struct TextParsingOpts
982
983//
984// virginSubstr
985//
986
987//! \return Substring from fragment with given virgin positions.
988template<class Trait>
989inline typename Trait::String
990virginSubstr(const MdBlock<Trait> &fr, const WithPosition &virginPos)
991{
992 if (fr.m_data.empty()) {
993 return {};
994 }
995
996 long long int startLine = virginPos.startLine() < fr.m_data.at(0).second.m_lineNumber ?
997 (virginPos.endLine() < fr.m_data.at(0).second.m_lineNumber ? -1 : 0) :
998 virginPos.startLine() - fr.m_data.at(0).second.m_lineNumber;
999
1000 if (startLine >= static_cast<long long int>(fr.m_data.size()) || startLine < 0) {
1001 return {};
1002 }
1003
1004 auto spos = virginPos.startColumn() - fr.m_data.at(startLine).first.virginPos(0);
1005
1006 if (spos < 0) {
1007 spos = 0;
1008 }
1009
1010 long long int epos = 0;
1011 long long int linesCount = virginPos.endLine() - virginPos.startLine() -
1012 (virginPos.startLine() < fr.m_data.at(0).second.m_lineNumber ?
1013 fr.m_data.at(0).second.m_lineNumber - virginPos.startLine() : 0);
1014
1015 if (startLine + linesCount > static_cast<long long int>(fr.m_data.size())) {
1016 linesCount = fr.m_data.size() - startLine - 1;
1017 epos = fr.m_data.back().first.length();
1018 } else {
1019 epos = virginPos.endColumn() - fr.m_data.at(linesCount + startLine).first.virginPos(0) + 1;
1020 }
1021
1022 if (epos < 0) {
1023 epos = 0;
1024 }
1025
1026 if (epos > fr.m_data.at(linesCount + startLine).first.length()) {
1027 epos = fr.m_data.at(linesCount + startLine).first.length();
1028 }
1029
1030 typename Trait::String str =
1031 (linesCount ? fr.m_data.at(startLine).first.sliced(spos).asString() :
1032 fr.m_data.at(startLine).first.sliced(spos, epos - spos).asString());
1033
1034 long long int i = startLine + 1;
1035
1036 for (; i < startLine + linesCount; ++i) {
1037 str.push_back(Trait::latin1ToString("\n"));
1038 str.push_back(fr.m_data.at(i).first.asString());
1039 }
1040
1041 if (linesCount) {
1042 str.push_back(Trait::latin1ToString("\n"));
1043 str.push_back(fr.m_data.at(i).first.sliced(0, epos).asString());
1044 }
1045
1046 return str;
1047}
1048
1049//
1050// localPosFromVirgin
1051//
1052
1053//! \return Local position ( { column, line } ) in fragment for given virgin position if exists.
1054//! \return { -1, -1 } if there is no given position.
1055template<class Trait>
1056inline std::pair<long long int, long long int>
1057localPosFromVirgin(const MdBlock<Trait> &fr, long long int virginColumn, long long int virginLine)
1058{
1059 if (fr.m_data.empty()) {
1060 return {-1, -1};
1061 }
1062
1063 if (fr.m_data.front().second.m_lineNumber > virginLine ||
1064 fr.m_data.back().second.m_lineNumber < virginLine) {
1065 return {-1, -1};
1066 }
1067
1068 auto line = virginLine - fr.m_data.front().second.m_lineNumber;
1069
1070 if (fr.m_data.at(line).first.isEmpty()) {
1071 return {-1, -1};
1072 }
1073
1074 const auto vzpos = fr.m_data.at(line).first.virginPos(0);
1075
1076 if (vzpos > virginColumn || virginColumn > vzpos + fr.m_data.at(line).first.length() - 1) {
1077 return {-1, -1};
1078 }
1079
1080 return {virginColumn - vzpos, line};
1081}
1082
1083//
1084// GitHubAutolinkPlugin
1085//
1086
1087/*
1088 "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?"
1089 "(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
1090*/
1091//! \return Is the given string a valid email?
1092template<class Trait>
1093inline bool
1094isEmail(const typename Trait::String &url)
1095{
1096 auto isAllowed = [](const typename Trait::Char &ch) -> bool {
1097 const auto unicode = ch.unicode();
1098 return ((unicode >= 48 && unicode <= 57) || (unicode >= 97 && unicode <= 122) ||
1099 (unicode >= 65 && unicode <= 90));
1100 };
1101
1102 auto isAdditional = [](const typename Trait::Char &ch) -> bool {
1103 const auto unicode = ch.unicode();
1104 return (unicode == 33 || (unicode >= 35 && unicode <= 39) ||
1105 unicode == 42 || unicode == 43 || (unicode >= 45 && unicode <= 47) ||
1106 unicode == 61 || unicode == 63 || (unicode >= 94 && unicode <= 96) ||
1107 (unicode >= 123 && unicode <= 126));
1108 };
1109
1110 static const auto s_delim = Trait::latin1ToChar('-');
1111 static const auto s_dog = Trait::latin1ToChar('@');
1112 static const auto s_dot = Trait::latin1ToChar('.');
1113
1114 long long int i = (url.startsWith(Trait::latin1ToString("mailto:")) ? 7 : 0);
1115 const auto dogPos = url.indexOf(s_dog, i);
1116
1117 if (dogPos != -1) {
1118 if (i == dogPos) {
1119 return false;
1120 }
1121
1122 for (; i < dogPos; ++i) {
1123 if (!isAllowed(url[i]) && !isAdditional(url[i])) {
1124 return false;
1125 }
1126 }
1127
1128 auto checkToDot = [&](long long int start, long long int dotPos) -> bool {
1129 static const long long int maxlen = 63;
1130
1131 if (dotPos - start > maxlen ||
1132 start + 1 > dotPos ||
1133 start >= url.length() ||
1134 dotPos > url.length()) {
1135 return false;
1136 }
1137
1138 if (url[start] == s_delim) {
1139 return false;
1140 }
1141
1142 if (url[dotPos - 1] == s_delim) {
1143 return false;
1144 }
1145
1146 for (; start < dotPos; ++start) {
1147 if (!isAllowed(url[start]) && url[start] != s_delim) {
1148 return false;
1149 }
1150 }
1151
1152 return true;
1153 };
1154
1155 long long int dotPos = url.indexOf(s_dot, dogPos + 1);
1156
1157 if (dotPos != -1) {
1158 i = dogPos + 1;
1159
1160 while (dotPos != -1) {
1161 if (!checkToDot(i, dotPos)) {
1162 return false;
1163 }
1164
1165 i = dotPos + 1;
1166 dotPos = url.indexOf(s_dot, i);
1167 }
1168
1169 if (!checkToDot(i, url.length())) {
1170 return false;
1171 }
1172
1173 return true;
1174 }
1175 }
1176
1177 return false;
1178}
1179
1180//! \return Is the fiven string a valid URL?
1181template<class Trait>
1182inline bool
1183isValidUrl(const typename Trait::String &url);
1184
1185//! \return Is the given string a GitHub autolink?
1186template<class Trait>
1187inline bool
1188isGitHubAutolink(const typename Trait::String &url);
1189
1190#ifdef MD4QT_QT_SUPPORT
1191
1192template<>
1193inline bool
1195{
1196 const QUrl u(url, QUrl::StrictMode);
1197
1198 return (u.isValid() && !u.isRelative());
1199}
1200
1201template<>
1202inline bool
1204{
1205 const QUrl u(url, QUrl::StrictMode);
1206
1207 return (u.isValid()
1208 && ((!u.scheme().isEmpty() && !u.host().isEmpty())
1209 || (url.startsWith(QStringLiteral("www.")) && url.length() >= 7 &&
1210 url.indexOf(QLatin1Char('.'), 4) != -1)));
1211}
1212
1213#endif
1214
1215#ifdef MD4QT_ICU_STL_SUPPORT
1216
1217template<>
1218inline bool
1220{
1221 const UrlUri u(url);
1222
1223 return (u.isValid() && !u.isRelative());
1224}
1225
1226template<>
1227inline bool
1229{
1230 const UrlUri u(url);
1231
1232 return (u.isValid()
1233 && ((!u.scheme().isEmpty() && !u.host().isEmpty())
1234 || (url.startsWith(UnicodeString("www.")) && url.length() >= 7 &&
1235 url.indexOf(UnicodeChar('.'), 4) != -1)));
1236}
1237
1238#endif
1239
1240//! Process GitHub autolinks for the text with index \p idx.
1241template<class Trait>
1242inline long long int
1245 long long int idx)
1246{
1247 if (idx < 0 || idx >= (long long int)po.m_rawTextData.size()) {
1248 return idx;
1249 }
1250
1251 static const auto s_delims = Trait::latin1ToString("*_~()<>");
1252 auto s = po.m_rawTextData[idx];
1253 bool first = true;
1254 long long int j = 0;
1255 auto end = typename Trait::Char(0x00);
1256 bool skipSpace = true;
1257 long long int ret = idx;
1258
1259 while (s.m_str.length()) {
1260 long long int i = 0;
1261 end = typename Trait::Char(0x00);
1262
1263 for (; i < s.m_str.length(); ++i) {
1264 if (first) {
1265 if (s.m_str[i] == Trait::latin1ToChar('(')) {
1266 end = Trait::latin1ToChar(')');
1267 }
1268
1269 if (s_delims.indexOf(s.m_str[i]) == -1 && !s.m_str[i].isSpace()) {
1270 first = false;
1271 j = i;
1272 }
1273 } else {
1274 if (s.m_str[i].isSpace() || i == s.m_str.length() - 1 || s.m_str[i] == end) {
1275 auto tmp = s.m_str.sliced(j, i - j +
1276 (i == s.m_str.length() - 1 && s.m_str[i] != end && !s.m_str[i].isSpace() ?
1277 1 : 0));
1278 skipSpace = s.m_str[i].isSpace();
1279
1280 const auto email = isEmail<Trait>(tmp);
1281
1282 if (isGitHubAutolink<Trait>(tmp) || email) {
1283 auto ti = textAtIdx(p, idx);
1284
1285 if (ti >= 0 && ti < static_cast<long long int>(p->items().size())) {
1286 typename ItemWithOpts<Trait>::Styles openStyles, closeStyles;
1287 const auto opts = std::static_pointer_cast<Text<Trait>>(p->items().at(ti))->opts();
1288
1289 if (j == 0 || s.m_str.sliced(0, j).isEmpty()) {
1290 openStyles = std::static_pointer_cast<ItemWithOpts<Trait>>(p->items().at(ti))->openStyles();
1291 closeStyles = std::static_pointer_cast<ItemWithOpts<Trait>>(p->items().at(ti))->closeStyles();
1292 p->removeItemAt(ti);
1293 po.m_rawTextData.erase(po.m_rawTextData.cbegin() + idx);
1294 --ret;
1295 } else {
1296 const auto tmp = s.m_str.sliced(0, j);
1297
1298 auto t = std::static_pointer_cast<Text<Trait>>(p->items().at(ti));
1299 t->setEndColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + j - 1));
1300 closeStyles = t->closeStyles();
1301 t->closeStyles() = {};
1302 po.m_rawTextData[idx].m_str = tmp;
1303 ++idx;
1305 ++ti;
1306 }
1307
1308 std::shared_ptr<Link<Trait>> lnk(new Link<Trait>);
1309 lnk->setStartColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + j));
1310 lnk->setStartLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1311 lnk->setEndColumn(
1312 po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + i -
1313 (i == s.m_str.length() - 1 && s.m_str[i] != end && !s.m_str[i].isSpace() ?
1314 0 : 1)));
1315 lnk->setEndLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1316 lnk->openStyles() = openStyles;
1317 lnk->setTextPos({lnk->startColumn(), lnk->startLine(), lnk->endColumn(), lnk->endLine()});
1318 lnk->setUrlPos(lnk->textPos());
1319
1320 if (email && !tmp.toLower().startsWith(Trait::latin1ToString("mailto:"))) {
1321 tmp = Trait::latin1ToString("mailto:") + tmp;
1322 }
1323
1324 if (!email && tmp.toLower().startsWith(Trait::latin1ToString("www."))) {
1325 tmp = Trait::latin1ToString("http://") + tmp;
1326 }
1327
1328 lnk->setUrl(tmp);
1329 lnk->setOpts(opts);
1330 p->insertItem(ti, lnk);
1331
1332 s.m_pos += i + (s.m_str[i] == end || s.m_str[i].isSpace() ? 0 : 1);
1333 s.m_str.remove(0, i + (s.m_str[i] == end || s.m_str[i].isSpace() ? 0 : 1));
1334 j = 0;
1335 i = 0;
1336
1337 if (!s.m_str.isEmpty()) {
1338 po.m_rawTextData.insert(po.m_rawTextData.cbegin() + idx, s);
1339 ++ret;
1340
1341 auto t = std::make_shared<Text<Trait>>();
1342 t->setStartColumn(po.m_fr.m_data[s.m_line].first.virginPos(s.m_pos));
1343 t->setStartLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1344 t->setEndLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1345 t->setEndColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + s.m_str.length() - 1));
1347 t->closeStyles() = closeStyles;
1348 p->insertItem(ti + 1, t);
1349 } else {
1350 lnk->closeStyles() = closeStyles;
1351 }
1352
1353 break;
1354 }
1355 }
1356
1357 j = i + (skipSpace ? 1 : 0);
1358 }
1359 }
1360 }
1361
1362 first = true;
1363
1364 if (i == s.m_str.length()) {
1365 break;
1366 }
1367 }
1368
1369 return ret;
1370}
1371
1372//! GitHub autolinks plugin.
1373template<class Trait>
1374inline void
1377 const typename Trait::StringList &)
1378{
1379 if (!po.m_collectRefLinks) {
1380 long long int i = 0;
1381
1382 while (i >= 0 && i < (long long int)po.m_rawTextData.size()) {
1383 i = processGitHubAutolinkExtension(p, po, i);
1384
1385 ++i;
1386 }
1387 }
1388}
1389
1390//
1391// Parser
1392//
1393
1394//! Markdown parser.
1395template<class Trait>
1396class Parser final
1397{
1398public:
1403
1404 ~Parser() = default;
1405
1406 //! \return Parsed Markdown document.
1407 std::shared_ptr<Document<Trait>>
1408 parse(
1409 //! File name of the Markdown document.
1410 const typename Trait::String &fileName,
1411 //! Should parsing be recursive? If recursive all links to existing Markdown
1412 //! files will be parsed and presented in the returned document.
1413 bool recursive = true,
1414 //! Allowed extensions for Markdonw document files. If Markdown file doesn't
1415 //! have given extension it will be ignored.
1416 const typename Trait::StringList &ext = {Trait::latin1ToString("md"), Trait::latin1ToString("markdown")},
1417 //! Make full optimization, or just semi one. In full optimization
1418 //! text items with one style but with some closing delimiters
1419 //! in the middle will be concatenated in one, like in **text* text*,
1420 //! here in full optimization will be "text text" with 2 open/close
1421 //! style delimiters, but one closing delimiter is in the middle.
1422 bool fullyOptimizeParagraphs = true);
1423
1424 //! \return Parsed Markdown document.
1425 std::shared_ptr<Document<Trait>>
1426 parse(
1427 //! Stream to parse.
1428 typename Trait::TextStream &stream,
1429 //! Absolute path to the root folder for the document.
1430 //! This path will be used to resolve local links.
1431 const typename Trait::String &path,
1432 //! This argument needed only for anchor.
1433 const typename Trait::String &fileName,
1434 //! Make full optimization, or just semi one. In full optimization
1435 //! text items with one style but with some closing delimiters
1436 //! in the middle will be concatenated in one, like in **text* text*,
1437 //! here in full optimization will be "text text" with 2 open/close
1438 //! style delimiters, but one closing delimiter is in the middle.
1439 bool fullyOptimizeParagraphs = true);
1440
1441 //! Add text plugin.
1442 void
1444 //! ID of a plugin. Use TextPlugin::UserDefinedPluginID value for start ID.
1445 int id,
1446 //! Function of a plugin, that will be invoked to processs raw text.
1447 TextPluginFunc<Trait> plugin,
1448 //! Should this plugin be used in parsing of internals of links?
1449 bool processInLinks,
1450 //! User data that will be passed to plugin function.
1451 const typename Trait::StringList &userData)
1452 {
1453 m_textPlugins.insert({id, {plugin, processInLinks, userData}});
1454 }
1455
1456 //! Remove text plugin.
1457 void
1459 //! ID of plugin that should be removed.
1460 int id)
1461 {
1462 m_textPlugins.erase(id);
1463 }
1464
1465private:
1466 void
1467 parseFile(const typename Trait::String &fileName,
1468 bool recursive,
1469 std::shared_ptr<Document<Trait>> doc,
1470 const typename Trait::StringList &ext,
1471 typename Trait::StringList *parentLinks = nullptr);
1472
1473 void
1474 parseStream(typename Trait::TextStream &stream,
1475 const typename Trait::String &workingPath,
1476 const typename Trait::String &fileName,
1477 bool recursive,
1478 std::shared_ptr<Document<Trait>> doc,
1479 const typename Trait::StringList &ext,
1480 typename Trait::StringList *parentLinks = nullptr);
1481
1482 void
1483 clearCache();
1484
1485 enum class BlockType {
1486 Unknown,
1487 EmptyLine,
1488 Text,
1489 List,
1490 ListWithFirstEmptyLine,
1491 CodeIndentedBySpaces,
1492 Code,
1493 Blockquote,
1494 Heading,
1495 SomethingInList,
1496 FensedCodeInList,
1497 Footnote
1498 }; // enum BlockType
1499
1500 struct ListIndent {
1501 long long int m_level = -1;
1502 long long int m_indent = -1;
1503 }; // struct ListIndent
1504
1505 BlockType
1506 whatIsTheLine(typename Trait::InternalString &str,
1507 bool inList = false,
1508 bool inListWithFirstEmptyLine = false,
1509 bool fensedCodeInList = false,
1510 typename Trait::String *startOfCode = nullptr,
1511 ListIndent *indent = nullptr,
1512 bool emptyLinePreceded = false,
1513 bool calcIndent = false,
1514 const std::vector<long long int> *indents = nullptr);
1515
1516 long long int
1517 parseFragment(MdBlock<Trait> &fr,
1518 std::shared_ptr<Block<Trait>> parent,
1519 std::shared_ptr<Document<Trait>> doc,
1520 typename Trait::StringList &linksToParse,
1521 const typename Trait::String &workingPath,
1522 const typename Trait::String &fileName,
1523 bool collectRefLinks,
1524 RawHtmlBlock<Trait> &html);
1525
1526 void
1527 parseText(MdBlock<Trait> &fr,
1528 std::shared_ptr<Block<Trait>> parent,
1529 std::shared_ptr<Document<Trait>> doc,
1530 typename Trait::StringList &linksToParse,
1531 const typename Trait::String &workingPath,
1532 const typename Trait::String &fileName,
1533 bool collectRefLinks,
1534 RawHtmlBlock<Trait> &html);
1535
1536 long long int
1537 parseBlockquote(MdBlock<Trait> &fr,
1538 std::shared_ptr<Block<Trait>> parent,
1539 std::shared_ptr<Document<Trait>> doc,
1540 typename Trait::StringList &linksToParse,
1541 const typename Trait::String &workingPath,
1542 const typename Trait::String &fileName,
1543 bool collectRefLinks,
1544 RawHtmlBlock<Trait> &html);
1545
1546 long long int
1547 parseList(MdBlock<Trait> &fr,
1548 std::shared_ptr<Block<Trait>> parent,
1549 std::shared_ptr<Document<Trait>> doc,
1550 typename Trait::StringList &linksToParse,
1551 const typename Trait::String &workingPath,
1552 const typename Trait::String &fileName,
1553 bool collectRefLinks,
1554 RawHtmlBlock<Trait> &html);
1555
1556 long long int
1557 parseCode(MdBlock<Trait> &fr,
1558 std::shared_ptr<Block<Trait>> parent,
1559 bool collectRefLinks);
1560
1561 long long int
1562 parseCodeIndentedBySpaces(MdBlock<Trait> &fr,
1563 std::shared_ptr<Block<Trait>> parent,
1564 bool collectRefLinks,
1565 int indent,
1566 const typename Trait::String &syntax,
1567 long long int emptyColumn,
1568 long long int startLine,
1569 bool fensedCode,
1570 const WithPosition &startDelim = {},
1571 const WithPosition &endDelim = {},
1572 const WithPosition &syntaxPos = {});
1573
1574 long long int
1575 parseListItem(MdBlock<Trait> &fr,
1576 std::shared_ptr<Block<Trait>> parent,
1577 std::shared_ptr<Document<Trait>> doc,
1578 typename Trait::StringList &linksToParse,
1579 const typename Trait::String &workingPath,
1580 const typename Trait::String &fileName,
1581 bool collectRefLinks,
1582 RawHtmlBlock<Trait> &html,
1583 std::shared_ptr<ListItem<Trait>> *resItem = nullptr);
1584
1585 void
1586 parseHeading(MdBlock<Trait> &fr,
1587 std::shared_ptr<Block<Trait>> parent,
1588 std::shared_ptr<Document<Trait>> doc,
1589 typename Trait::StringList &linksToParse,
1590 const typename Trait::String &workingPath,
1591 const typename Trait::String &fileName,
1592 bool collectRefLinks);
1593
1594 void
1595 parseFootnote(MdBlock<Trait> &fr,
1596 std::shared_ptr<Block<Trait>> parent,
1597 std::shared_ptr<Document<Trait>> doc,
1598 typename Trait::StringList &linksToParse,
1599 const typename Trait::String &workingPath,
1600 const typename Trait::String &fileName,
1601 bool collectRefLinks);
1602
1603 void
1604 parseTable(MdBlock<Trait> &fr,
1605 std::shared_ptr<Block<Trait>> parent,
1606 std::shared_ptr<Document<Trait>> doc,
1607 typename Trait::StringList &linksToParse,
1608 const typename Trait::String &workingPath,
1609 const typename Trait::String &fileName,
1610 bool collectRefLinks,
1611 int columnsCount);
1612
1613 void
1614 parseParagraph(MdBlock<Trait> &fr,
1615 std::shared_ptr<Block<Trait>> parent,
1616 std::shared_ptr<Document<Trait>> doc,
1617 typename Trait::StringList &linksToParse,
1618 const typename Trait::String &workingPath,
1619 const typename Trait::String &fileName,
1620 bool collectRefLinks,
1621 RawHtmlBlock<Trait> &html);
1622
1623 void
1624 parseFormattedTextLinksImages(MdBlock<Trait> &fr,
1625 std::shared_ptr<Block<Trait>> parent,
1626 std::shared_ptr<Document<Trait>> doc,
1627 typename Trait::StringList &linksToParse,
1628 const typename Trait::String &workingPath,
1629 const typename Trait::String &fileName,
1630 bool collectRefLinks,
1631 bool ignoreLineBreak,
1632 RawHtmlBlock<Trait> &html,
1633 bool inLink);
1634
1635 struct ParserContext {
1636 typename Trait::template Vector<MdBlock<Trait>> m_splitted;
1637 typename MdBlock<Trait>::Data m_fragment;
1638 bool m_emptyLineInList = false;
1639 bool m_fensedCodeInList = false;
1640 long long int m_emptyLinesCount = 0;
1641 long long int m_lineCounter = 0;
1642 std::vector<long long int> m_indents;
1643 ListIndent m_indent;
1644 RawHtmlBlock<Trait> m_html;
1645 long long int m_emptyLinesBefore = 0;
1646 MdLineData::CommentDataMap m_htmlCommentData;
1647 typename Trait::String m_startOfCode;
1648 typename Trait::String m_startOfCodeInList;
1649 BlockType m_type = BlockType::EmptyLine;
1650 BlockType m_lineType = BlockType::Unknown;
1651 BlockType m_prevLineType = BlockType::Unknown;
1652 }; // struct ParserContext
1653
1654 std::pair<long long int, bool>
1655 parseFirstStep(ParserContext &ctx,
1656 StringListStream<Trait> &stream,
1657 std::shared_ptr<Block<Trait>> parent,
1658 std::shared_ptr<Document<Trait>> doc,
1659 typename Trait::StringList &linksToParse,
1660 const typename Trait::String &workingPath,
1661 const typename Trait::String &fileName,
1662 bool collectRefLinks);
1663
1664 void
1665 parseSecondStep(ParserContext &ctx,
1666 std::shared_ptr<Block<Trait>> parent,
1667 std::shared_ptr<Document<Trait>> doc,
1668 typename Trait::StringList &linksToParse,
1669 const typename Trait::String &workingPath,
1670 const typename Trait::String &fileName,
1671 bool collectRefLinks,
1672 bool top,
1673 bool dontProcessLastFreeHtml);
1674
1675 std::pair<RawHtmlBlock<Trait>, long long int>
1676 parse(StringListStream<Trait> &stream,
1677 std::shared_ptr<Block<Trait>> parent,
1678 std::shared_ptr<Document<Trait>> doc,
1679 typename Trait::StringList &linksToParse,
1680 const typename Trait::String &workingPath,
1681 const typename Trait::String &fileName,
1682 bool collectRefLinks,
1683 bool top = false,
1684 bool dontProcessLastFreeHtml = false,
1685 bool stopOnMayBreakList = false);
1686
1687 std::pair<long long int, bool>
1688 parseFragment(ParserContext &ctx,
1689 std::shared_ptr<Block<Trait>> parent,
1690 std::shared_ptr<Document<Trait>> doc,
1691 typename Trait::StringList &linksToParse,
1692 const typename Trait::String &workingPath,
1693 const typename Trait::String &fileName,
1694 bool collectRefLinks);
1695
1696 void
1697 eatFootnote(ParserContext &ctx,
1698 StringListStream<Trait> &stream,
1699 std::shared_ptr<Block<Trait>> parent,
1700 std::shared_ptr<Document<Trait>> doc,
1701 typename Trait::StringList &linksToParse,
1702 const typename Trait::String &workingPath,
1703 const typename Trait::String &fileName,
1704 bool collectRefLinks);
1705
1706 void
1707 finishHtml(ParserContext &ctx,
1708 std::shared_ptr<Block<Trait>> parent,
1709 std::shared_ptr<Document<Trait>> doc,
1710 bool collectRefLinks,
1711 bool top,
1712 bool dontProcessLastFreeHtml);
1713
1714 void
1715 makeLineMain(ParserContext &ctx,
1716 const typename Trait::InternalString &line,
1717 long long int emptyLinesCount,
1718 const ListIndent &currentIndent,
1719 long long int ns,
1720 long long int currentLineNumber);
1721
1722 std::pair<long long int, bool>
1723 parseFragmentAndMakeNextLineMain(ParserContext &ctx,
1724 std::shared_ptr<Block<Trait>> parent,
1725 std::shared_ptr<Document<Trait>> doc,
1726 typename Trait::StringList &linksToParse,
1727 const typename Trait::String &workingPath,
1728 const typename Trait::String &fileName,
1729 bool collectRefLinks,
1730 const typename Trait::InternalString &line,
1731 const ListIndent &currentIndent,
1732 long long int ns,
1733 long long int currentLineNumber);
1734
1735 bool
1736 isListType(BlockType t);
1737
1738 std::pair<typename Trait::InternalString, bool>
1739 readLine(ParserContext &ctx, StringListStream<Trait> &stream);
1740
1741 std::shared_ptr<Image<Trait>>
1742 makeImage(const typename Trait::String &url,
1743 const typename MdBlock<Trait>::Data &text,
1744 TextParsingOpts<Trait> &po,
1745 bool doNotCreateTextOnFail,
1746 long long int startLine,
1747 long long int startPos,
1748 long long int lastLine,
1749 long long int lastPos,
1750 const WithPosition &textPos,
1751 const WithPosition &urlPos);
1752
1753 std::shared_ptr<Link<Trait>>
1754 makeLink(const typename Trait::String &url,
1755 const typename MdBlock<Trait>::Data &text,
1756 TextParsingOpts<Trait> &po,
1757 bool doNotCreateTextOnFail,
1758 long long int startLine,
1759 long long int startPos,
1760 long long int lastLine,
1761 long long int lastPos,
1762 const WithPosition &textPos,
1763 const WithPosition &urlPos);
1764
1765 struct Delimiter {
1766 enum DelimiterType {
1767 // (
1768 ParenthesesOpen,
1769 // )
1770 ParenthesesClose,
1771 // [
1772 SquareBracketsOpen,
1773 // ]
1774 SquareBracketsClose,
1775 // ![
1776 ImageOpen,
1777 // ~~
1778 Strikethrough,
1779 // *
1780 Emphasis1,
1781 // _
1782 Emphasis2,
1783 // `
1784 InlineCode,
1785 // <
1786 Less,
1787 // >
1788 Greater,
1789 // $
1790 Math,
1791 HorizontalLine,
1792 H1,
1793 H2,
1794 Unknown
1795 }; // enum DelimiterType
1796
1797 DelimiterType m_type = Unknown;
1798 long long int m_line = -1;
1799 long long int m_pos = -1;
1800 long long int m_len = 0;
1801 bool m_isWordBefore = false;
1802 bool m_backslashed = false;
1803 bool m_leftFlanking = false;
1804 bool m_rightFlanking = false;
1805 bool m_skip = false;
1806 }; // struct Delimiter
1807
1808 using Delims = typename Trait::template Vector<Delimiter>;
1809
1810 bool
1811 createShortcutImage(const typename MdBlock<Trait>::Data &text,
1812 TextParsingOpts<Trait> &po,
1813 long long int startLine,
1814 long long int startPos,
1815 long long int lastLineForText,
1816 long long int lastPosForText,
1817 typename Delims::iterator lastIt,
1818 const typename MdBlock<Trait>::Data &linkText,
1819 bool doNotCreateTextOnFail,
1820 const WithPosition &textPos,
1821 const WithPosition &linkTextPos);
1822
1823 typename Delims::iterator
1824 checkForImage(typename Delims::iterator it,
1825 typename Delims::iterator last,
1826 TextParsingOpts<Trait> &po);
1827
1828 bool
1829 createShortcutLink(const typename MdBlock<Trait>::Data &text,
1830 TextParsingOpts<Trait> &po,
1831 long long int startLine,
1832 long long int startPos,
1833 long long int lastLineForText,
1834 long long int lastPosForText,
1835 typename Delims::iterator lastIt,
1836 const typename MdBlock<Trait>::Data &linkText,
1837 bool doNotCreateTextOnFail,
1838 const WithPosition &textPos,
1839 const WithPosition &linkTextPos);
1840
1841 typename Delims::iterator
1842 checkForLink(typename Delims::iterator it,
1843 typename Delims::iterator last,
1844 TextParsingOpts<Trait> &po);
1845
1846 Delims
1847 collectDelimiters(const typename MdBlock<Trait>::Data &fr);
1848
1849 std::pair<typename Trait::String, bool>
1850 readHtmlTag(typename Delims::iterator it, TextParsingOpts<Trait> &po);
1851
1852 typename Delims::iterator
1853 findIt(typename Delims::iterator it,
1854 typename Delims::iterator last,
1855 TextParsingOpts<Trait> &po);
1856
1857 void
1858 finishRule1HtmlTag(typename Delims::iterator it,
1859 typename Delims::iterator last,
1860 TextParsingOpts<Trait> &po,
1861 bool skipFirst);
1862
1863 void
1864 finishRule2HtmlTag(typename Delims::iterator it,
1865 typename Delims::iterator last,
1866 TextParsingOpts<Trait> &po);
1867
1868 void
1869 finishRule3HtmlTag(typename Delims::iterator it,
1870 typename Delims::iterator last,
1871 TextParsingOpts<Trait> &po);
1872
1873 void
1874 finishRule4HtmlTag(typename Delims::iterator it,
1875 typename Delims::iterator last,
1876 TextParsingOpts<Trait> &po);
1877
1878 void
1879 finishRule5HtmlTag(typename Delims::iterator it,
1880 typename Delims::iterator last,
1881 TextParsingOpts<Trait> &po);
1882
1883 void
1884 finishRule6HtmlTag(typename Delims::iterator it,
1885 typename Delims::iterator last,
1886 TextParsingOpts<Trait> &po);
1887
1889 finishRule7HtmlTag(typename Delims::iterator it,
1890 typename Delims::iterator last,
1891 TextParsingOpts<Trait> &po);
1892
1893 typename Delims::iterator
1894 finishRawHtmlTag(typename Delims::iterator it,
1895 typename Delims::iterator last,
1896 TextParsingOpts<Trait> &po,
1897 bool skipFirst);
1898
1899 int
1900 htmlTagRule(typename Delims::iterator it,
1901 typename Delims::iterator last,
1902 TextParsingOpts<Trait> &po);
1903
1904 typename Delims::iterator
1905 checkForRawHtml(typename Delims::iterator it,
1906 typename Delims::iterator last,
1907 TextParsingOpts<Trait> &po);
1908
1909 typename Delims::iterator
1910 checkForMath(typename Delims::iterator it,
1911 typename Delims::iterator last,
1912 TextParsingOpts<Trait> &po);
1913
1914 typename Delims::iterator
1915 checkForAutolinkHtml(typename Delims::iterator it,
1916 typename Delims::iterator last,
1917 TextParsingOpts<Trait> &po,
1918 bool updatePos);
1919
1920 typename Delims::iterator
1921 checkForInlineCode(typename Delims::iterator it,
1922 typename Delims::iterator last,
1923 TextParsingOpts<Trait> &po);
1924
1925 std::pair<typename MdBlock<Trait>::Data, typename Delims::iterator>
1926 readTextBetweenSquareBrackets(typename Delims::iterator start,
1927 typename Delims::iterator it,
1928 typename Delims::iterator last,
1929 TextParsingOpts<Trait> &po,
1930 bool doNotCreateTextOnFail,
1931 WithPosition *pos = nullptr);
1932
1933 std::pair<typename MdBlock<Trait>::Data, typename Delims::iterator>
1934 checkForLinkText(typename Delims::iterator it,
1935 typename Delims::iterator last,
1936 TextParsingOpts<Trait> &po,
1937 WithPosition *pos = nullptr);
1938
1939 std::pair<typename MdBlock<Trait>::Data, typename Delims::iterator>
1940 checkForLinkLabel(typename Delims::iterator it,
1941 typename Delims::iterator last,
1942 TextParsingOpts<Trait> &po,
1943 WithPosition *pos = nullptr);
1944
1945 std::tuple<typename Trait::String, typename Trait::String, typename Delims::iterator, bool>
1946 checkForInlineLink(typename Delims::iterator it,
1947 typename Delims::iterator last,
1948 TextParsingOpts<Trait> &po,
1949 WithPosition *urlPos = nullptr);
1950
1951 inline std::tuple<typename Trait::String, typename Trait::String, typename Delims::iterator, bool>
1952 checkForRefLink(typename Delims::iterator it,
1953 typename Delims::iterator last,
1954 TextParsingOpts<Trait> &po,
1955 WithPosition *urlPos = nullptr);
1956
1957 typename Trait::String
1958 toSingleLine(const typename MdBlock<Trait>::Data &d);
1959
1960 template<class Func>
1961 typename Delims::iterator
1962 checkShortcut(typename Delims::iterator it,
1963 typename Delims::iterator last,
1964 TextParsingOpts<Trait> &po,
1965 Func functor)
1966 {
1967 const auto start = it;
1968
1969 typename MdBlock<Trait>::Data text;
1970
1971 WithPosition labelPos;
1972 std::tie(text, it) = checkForLinkLabel(start, last, po, &labelPos);
1973
1974 if (it != start && !toSingleLine(text).simplified().isEmpty()) {
1975 if ((this->*functor)(text, po, start->m_line, start->m_pos, start->m_line,
1976 start->m_pos + start->m_len, it, {}, false, labelPos, {})) {
1977 return it;
1978 }
1979 }
1980
1981 return start;
1982 }
1983
1984 bool
1985 isSequence(typename Delims::iterator it,
1986 long long int itLine,
1987 long long int itPos,
1988 typename Delimiter::DelimiterType t);
1989
1990 std::pair<typename Delims::iterator, typename Delims::iterator>
1991 readSequence(typename Delims::iterator first,
1992 typename Delims::iterator it,
1993 typename Delims::iterator last,
1994 long long int &pos,
1995 long long int &length,
1996 long long int &itCount,
1997 long long int &lengthFromIt,
1998 long long int &itCountFromIt);
1999
2000 typename Delims::iterator
2001 readSequence(typename Delims::iterator it,
2002 typename Delims::iterator last,
2003 long long int &line,
2004 long long int &pos,
2005 long long int &len,
2006 long long int &itCount);
2007
2008 int
2009 emphasisToInt(typename Delimiter::DelimiterType t);
2010
2011 void
2012 createStyles(std::vector<std::pair<Style, long long int>> & styles,
2013 typename Delimiter::DelimiterType t,
2014 long long int style);
2015
2016 std::vector<std::pair<Style, long long int>>
2017 createStyles(typename Delimiter::DelimiterType t,
2018 const std::vector<long long int> &styles,
2019 long long int lastStyle);
2020
2021 std::tuple<bool, std::vector<std::pair<Style, long long int>>, long long int, long long int>
2022 isStyleClosed(typename Delims::iterator first,
2023 typename Delims::iterator it,
2024 typename Delims::iterator last,
2025 typename Delims::iterator &stackBottom,
2026 TextParsingOpts<Trait> &po);
2027
2028 typename Delims::iterator
2029 incrementIterator(typename Delims::iterator it,
2030 typename Delims::iterator last,
2031 long long int count);
2032
2033 typename Delims::iterator
2034 checkForStyle(typename Delims::iterator first,
2035 typename Delims::iterator it,
2036 typename Delims::iterator last,
2037 typename Delims::iterator &stackBottom,
2038 TextParsingOpts<Trait> &po);
2039
2040 bool
2041 isListOrQuoteAfterHtml(TextParsingOpts<Trait> &po);
2042
2043 void
2044 parseTableInParagraph(TextParsingOpts<Trait> &po,
2045 std::shared_ptr<Paragraph<Trait>> parent,
2046 std::shared_ptr<Document<Trait>> doc,
2047 typename Trait::StringList &linksToParse,
2048 const typename Trait::String &workingPath,
2049 const typename Trait::String &fileName,
2050 bool collectRefLinks);
2051
2052 bool
2053 isNewBlockIn(MdBlock<Trait> &fr,
2054 long long int startLine,
2055 long long int endLine);
2056
2057 void
2058 makeInlineCode(long long int startLine,
2059 long long int startPos,
2060 long long int lastLine,
2061 long long int lastPos,
2062 TextParsingOpts<Trait> &po,
2063 typename Delims::iterator startDelimIt,
2064 typename Delims::iterator endDelimIt);
2065
2067 defaultParagraphOptimization() const
2068 {
2069 return (m_fullyOptimizeParagraphs ? OptimizeParagraphType::Full :
2071 }
2072
2073private:
2074 //! Used in tests.
2075 friend struct PrivateAccess;
2076
2077private:
2078 typename Trait::StringList m_parsedFiles;
2079 TextPluginsMap<Trait> m_textPlugins;
2080 bool m_fullyOptimizeParagraphs = true;
2081
2083}; // class Parser
2084
2085//
2086// Parser
2087//
2088
2089template<class Trait>
2090inline std::shared_ptr<Document<Trait>>
2091Parser<Trait>::parse(const typename Trait::String &fileName,
2092 bool recursive,
2093 const typename Trait::StringList &ext,
2094 bool fullyOptimizeParagraphs)
2095{
2096 m_fullyOptimizeParagraphs = fullyOptimizeParagraphs;
2097
2098 std::shared_ptr<Document<Trait>> doc(new Document<Trait>);
2099
2100 parseFile(fileName, recursive, doc, ext);
2101
2102 clearCache();
2103
2104 return doc;
2105}
2106
2107template<class Trait>
2108inline std::shared_ptr<Document<Trait>>
2109Parser<Trait>::parse(typename Trait::TextStream &stream,
2110 const typename Trait::String &path,
2111 const typename Trait::String &fileName,
2112 bool fullyOptimizeParagraphs)
2113{
2114 m_fullyOptimizeParagraphs = fullyOptimizeParagraphs;
2115
2116 std::shared_ptr<Document<Trait>> doc(new Document<Trait>);
2117
2118 parseStream(stream, path, fileName, false, doc, typename Trait::StringList());
2119
2120 clearCache();
2121
2122 return doc;
2123}
2124
2125template<class Trait>
2127
2128#ifdef MD4QT_QT_SUPPORT
2129
2130//! Wrapper for QTextStream.
2131template<>
2133{
2134public:
2136 : m_stream(stream)
2137 , m_lastBuf(false)
2138 , m_pos(0)
2139 {
2140 }
2141
2142 bool
2143 atEnd() const
2144 {
2145 return (m_lastBuf && m_pos == m_buf.size());
2146 }
2147
2148 QString
2150 {
2151 QString line;
2152 bool rFound = false;
2153
2154 while (!atEnd()) {
2155 const auto c = getChar();
2156
2157 if (rFound && c != QLatin1Char('\n')) {
2158 --m_pos;
2159
2160 return line;
2161 }
2162
2163 if (c == QLatin1Char('\r')) {
2164 rFound = true;
2165
2166 continue;
2167 } else if (c == QLatin1Char('\n')) {
2168 return line;
2169 }
2170
2171 if (!c.isNull()) {
2172 line.push_back(c);
2173 }
2174 }
2175
2176 return line;
2177 }
2178
2179private:
2180 void
2181 fillBuf()
2182 {
2183 m_buf = m_stream.read(512);
2184
2185 if (m_stream.atEnd()) {
2186 m_lastBuf = true;
2187 }
2188
2189 m_pos = 0;
2190 }
2191
2192 QChar
2193 getChar()
2194 {
2195 if (m_pos < m_buf.size()) {
2196 return m_buf.at(m_pos++);
2197 } else if (!atEnd()) {
2198 fillBuf();
2199
2200 return getChar();
2201 } else {
2202 return QChar();
2203 }
2204 }
2205
2206private:
2207 QTextStream &m_stream;
2208 QString m_buf;
2209 bool m_lastBuf;
2210 long long int m_pos;
2211}; // class TextStream
2212
2213#endif
2214
2215#ifdef MD4QT_ICU_STL_SUPPORT
2216
2217//! Wrapper for std::istream.
2218template<>
2220{
2221public:
2222 TextStream(std::istream &stream)
2223 : m_pos(0)
2224 {
2225 std::vector<unsigned char> content;
2226
2227 stream.seekg(0, std::ios::end);
2228 const auto ssize = stream.tellg();
2229 content.resize((size_t)ssize + 1);
2230 stream.seekg(0, std::ios::beg);
2231 stream.read((char *)&content[0], ssize);
2232 content[(size_t)ssize] = 0;
2233
2234 const auto z = std::count(content.cbegin(), content.cend(), 0);
2235
2236 if (z > 1) {
2237 std::vector<unsigned char> tmp;
2238 tmp.resize(content.size() + (z - 1) * 2);
2239
2240 for (size_t i = 0, j = 0; i < content.size() - 1; ++i, ++j) {
2241 if (content[i] == 0) {
2242 // 0xFFFD - replacement character in UTF-8.
2243 tmp[j++] = 0xEF;
2244 tmp[j++] = 0xBF;
2245 tmp[j] = 0xBD;
2246 } else {
2247 tmp[j] = content[i];
2248 }
2249 }
2250
2251 tmp[tmp.size() - 1] = 0;
2252
2253 std::swap(content, tmp);
2254 }
2255
2256 m_str = UnicodeString::fromUTF8((char *)&content[0]);
2257 }
2258
2259 bool
2260 atEnd() const
2261 {
2262 return m_pos == m_str.size();
2263 }
2264
2267 {
2268 UnicodeString line;
2269
2270 bool rFound = false;
2271
2272 while (!atEnd()) {
2273 const auto c = getChar();
2274
2275 if (rFound && c != UnicodeChar('\n')) {
2276 --m_pos;
2277
2278 return line;
2279 }
2280
2281 if (c == UnicodeChar('\r')) {
2282 rFound = true;
2283
2284 continue;
2285 } else if (c == UnicodeChar('\n')) {
2286 return line;
2287 }
2288
2289 if (!c.isNull()) {
2290 line.push_back(c);
2291 }
2292 }
2293
2294 return line;
2295 }
2296
2297private:
2299 getChar()
2300 {
2301 if (!atEnd()) {
2302 return m_str[m_pos++];
2303 } else {
2304 return UnicodeChar();
2305 }
2306 }
2307
2308private:
2309 UnicodeString m_str;
2310 long long int m_pos;
2311};
2312
2313#endif
2314
2315//! \return Is HTML comment closed?
2316template<class Trait>
2317inline bool
2318checkForEndHtmlComments(const typename Trait::String &line,
2319 long long int pos)
2320{
2321 const long long int e = line.indexOf(Trait::latin1ToString("-->"), pos);
2322
2323 if (e != -1) {
2324 return isHtmlComment<Trait>(line.sliced(0, e + 3));
2325 }
2326
2327 return false;
2328}
2329
2330//! Collect information about HTML comments.
2331template<class Trait>
2332inline void
2333checkForHtmlComments(const typename Trait::InternalString &line,
2336{
2337 long long int p = 0, l = stream.currentLineNumber();
2338
2339 const auto &str = line.asString();
2340
2341 while ((p = str.indexOf(Trait::latin1ToString(s_startComment), p)) != -1) {
2342 bool addNegative = false;
2343
2344 auto c = str.sliced(p);
2345
2346 if (c.startsWith(Trait::latin1ToString("<!-->"))) {
2347 res.insert({line.virginPos(p), {0, true}});
2348
2349 p += 5;
2350
2351 continue;
2352 } else if (c.startsWith(Trait::latin1ToString("<!--->"))) {
2353 res.insert({line.virginPos(p), {1, true}});
2354
2355 p += 6;
2356
2357 continue;
2358 }
2359
2361 res.insert({line.virginPos(p), {2, true}});
2362 } else {
2363 addNegative = true;
2364
2365 for (; l < stream.size(); ++l) {
2366 c.push_back(Trait::latin1ToChar(' '));
2367 c.push_back(stream.lineAt(l).asString());
2368
2370 res.insert({line.virginPos(p), {2, true}});
2371
2372 addNegative = false;
2373
2374 break;
2375 }
2376 }
2377 }
2378
2379 if (addNegative) {
2380 res.insert({line.virginPos(p), {-1, false}});
2381 }
2382
2383 ++p;
2384 }
2385}
2386
2387template<class Trait>
2388inline std::pair<long long int, bool>
2390 std::shared_ptr<Block<Trait>> parent,
2391 std::shared_ptr<Document<Trait>> doc,
2392 typename Trait::StringList &linksToParse,
2393 const typename Trait::String &workingPath,
2394 const typename Trait::String &fileName,
2395 bool collectRefLinks)
2396{
2397 auto clearCtx = [&ctx] () {
2398 ctx.m_fragment.clear();
2399 ctx.m_type = BlockType::EmptyLine;
2400 ctx.m_emptyLineInList = false;
2401 ctx.m_fensedCodeInList = false;
2402 ctx.m_emptyLinesCount = 0;
2403 ctx.m_lineCounter = 0;
2404 ctx.m_indents.clear();
2405 ctx.m_indent = {-1, -1};
2406 ctx.m_startOfCode.clear();
2407 ctx.m_startOfCodeInList.clear();
2408 };
2409
2410 if (!ctx.m_fragment.empty()) {
2411 MdBlock<Trait> block = {ctx.m_fragment, ctx.m_emptyLinesBefore, ctx.m_emptyLinesCount > 0};
2412
2413 const auto line = parseFragment(block, parent, doc, linksToParse, workingPath,
2414 fileName, collectRefLinks, ctx.m_html);
2415
2416 assert(line != ctx.m_fragment.front().second.m_lineNumber);
2417
2418 if (line > 0) {
2419 if (ctx.m_html.m_html) {
2420 if (!collectRefLinks) {
2421 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2422 }
2423
2424 resetHtmlTag<Trait>(ctx.m_html);
2425 }
2426
2427 const auto it = ctx.m_fragment.cbegin() + (line - ctx.m_fragment.cbegin()->second.m_lineNumber);
2428
2429 MdBlock<Trait> tmp = {{}, ctx.m_emptyLinesBefore, false};
2430 std::copy(ctx.m_fragment.cbegin(), it, std::back_inserter(tmp.m_data));
2431
2432 long long int emptyLines = 0;
2433
2434 while (!tmp.m_data.empty() && tmp.m_data.back().first.asString().simplified().isEmpty()) {
2435 tmp.m_data.pop_back();
2436 tmp.m_emptyLineAfter = true;
2437 ++emptyLines;
2438 }
2439
2440 if (!tmp.m_data.empty()) {
2441 ctx.m_splitted.push_back(tmp);
2442 }
2443
2444 const auto retLine = it->second.m_lineNumber;
2445 const auto retMayBreakList = it->second.m_mayBreakList;
2446
2447 clearCtx();
2448
2449 ctx.m_emptyLinesBefore = emptyLines;
2450
2451 return {retLine, retMayBreakList};
2452 }
2453
2454 ctx.m_splitted.push_back({ctx.m_fragment, ctx.m_emptyLinesBefore, ctx.m_emptyLinesCount > 0});
2455 }
2456
2457 clearCtx();
2458
2459 return {-1, false};
2460}
2461
2462//! Replace tabs with spaces (just for internal simpler use).
2463template<class Trait>
2464inline void
2465replaceTabs(typename Trait::InternalString &s)
2466{
2467 unsigned char size = 4;
2468 long long int len = s.length();
2469
2470 for (long long int i = 0; i < len; ++i, --size) {
2471 if (s[i] == Trait::latin1ToChar('\t')) {
2472 s.replaceOne(i, 1, typename Trait::String(size, Trait::latin1ToChar(' ')));
2473
2474 len += size - 1;
2475 i += size - 1;
2476 size = 5;
2477 }
2478
2479 if (size == 1) {
2480 size = 5;
2481 }
2482 }
2483}
2484
2485template<class Trait>
2486inline void
2488 StringListStream<Trait> &stream,
2489 std::shared_ptr<Block<Trait>> parent,
2490 std::shared_ptr<Document<Trait>> doc,
2491 typename Trait::StringList &linksToParse,
2492 const typename Trait::String &workingPath,
2493 const typename Trait::String &fileName,
2494 bool collectRefLinks)
2495{
2496 long long int emptyLinesCount = 0;
2497 bool wasEmptyLine = false;
2498
2499 while (!stream.atEnd()) {
2500 const auto currentLineNumber = stream.currentLineNumber();
2501
2502 typename Trait::InternalString line;
2503 bool mayBreak;
2504
2505 std::tie(line, mayBreak) = readLine(ctx, stream);
2506
2507 replaceTabs<Trait>(line);
2508
2509 const auto ns = skipSpaces<Trait>(0, line.asString());
2510
2511 if (ns == line.length() || line.asString().startsWith(Trait::latin1ToString(" "))) {
2512 if (ns == line.length()) {
2513 ++emptyLinesCount;
2514 wasEmptyLine = true;
2515 } else {
2516 emptyLinesCount = 0;
2517 }
2518
2519 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2520 } else if (!wasEmptyLine) {
2521 if (isFootnote<Trait>(line.sliced(ns).asString())) {
2522 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2523
2524 ctx.m_lineType = BlockType::Footnote;
2525
2526 makeLineMain(ctx, line, emptyLinesCount, ctx.m_indent, ns, currentLineNumber);
2527
2528 continue;
2529 } else {
2530 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2531 }
2532 } else {
2533 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2534
2535 ctx.m_lineType =
2536 whatIsTheLine(line, false, false, false, &ctx.m_startOfCodeInList, &ctx.m_indent,
2537 ctx.m_lineType == BlockType::EmptyLine, true, &ctx.m_indents);
2538
2539 makeLineMain(ctx, line, emptyLinesCount, ctx.m_indent, ns, currentLineNumber);
2540
2541 if (ctx.m_type == BlockType::Footnote) {
2542 wasEmptyLine = false;
2543
2544 continue;
2545 } else {
2546 break;
2547 }
2548 }
2549 }
2550
2551 if (stream.atEnd() && !ctx.m_fragment.empty()) {
2552 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2553 }
2554}
2555
2556template<class Trait>
2557inline void
2558Parser<Trait>::finishHtml(ParserContext &ctx,
2559 std::shared_ptr<Block<Trait>> parent,
2560 std::shared_ptr<Document<Trait>> doc,
2561 bool collectRefLinks,
2562 bool top,
2563 bool dontProcessLastFreeHtml)
2564{
2565 if (!collectRefLinks || top) {
2566 if (ctx.m_html.m_html->isFreeTag()) {
2567 if (!dontProcessLastFreeHtml) {
2568 if (ctx.m_html.m_parent) {
2569 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2570
2571 updateLastPosInList(ctx.m_html);
2572 } else {
2573 parent->appendItem(ctx.m_html.m_html);
2574 }
2575 }
2576 } else {
2577 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
2578 p->appendItem(ctx.m_html.m_html);
2579 p->setStartColumn(ctx.m_html.m_html->startColumn());
2580 p->setStartLine(ctx.m_html.m_html->startLine());
2581 p->setEndColumn(ctx.m_html.m_html->endColumn());
2582 p->setEndLine(ctx.m_html.m_html->endLine());
2583 doc->appendItem(p);
2584 }
2585 }
2586
2587 if (!dontProcessLastFreeHtml) {
2588 resetHtmlTag(ctx.m_html);
2589 }
2590
2591 ctx.m_html.m_toAdjustLastPos.clear();
2592}
2593
2594template<class Trait>
2595inline void
2596Parser<Trait>::makeLineMain(ParserContext &ctx,
2597 const typename Trait::InternalString &line,
2598 long long int emptyLinesCount,
2599 const ListIndent &currentIndent,
2600 long long int ns,
2601 long long int currentLineNumber)
2602{
2603 if (ctx.m_html.m_htmlBlockType >= 6) {
2604 ctx.m_html.m_continueHtml = (emptyLinesCount <= 0);
2605 }
2606
2607 ctx.m_type = ctx.m_lineType;
2608
2609 switch (ctx.m_type) {
2610 case BlockType::List:
2611 case BlockType::ListWithFirstEmptyLine: {
2612 if (ctx.m_indents.empty())
2613 ctx.m_indents.push_back(currentIndent.m_indent);
2614
2615 ctx.m_indent = currentIndent;
2616 } break;
2617
2618 case BlockType::Code:
2619 ctx.m_startOfCode = startSequence<Trait>(line.asString());
2620 break;
2621
2622 default:
2623 break;
2624 }
2625
2626 if (!line.isEmpty() && ns < line.length()) {
2627 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2628 }
2629
2630 ctx.m_lineCounter = 1;
2631 ctx.m_emptyLinesCount = 0;
2632 ctx.m_emptyLinesBefore = emptyLinesCount;
2633}
2634
2635template<class Trait>
2636inline std::pair<long long int, bool>
2637Parser<Trait>::parseFragmentAndMakeNextLineMain(ParserContext &ctx,
2638 std::shared_ptr<Block<Trait>> parent,
2639 std::shared_ptr<Document<Trait>> doc,
2640 typename Trait::StringList &linksToParse,
2641 const typename Trait::String &workingPath,
2642 const typename Trait::String &fileName,
2643 bool collectRefLinks,
2644 const typename Trait::InternalString &line,
2645 const ListIndent &currentIndent,
2646 long long int ns,
2647 long long int currentLineNumber)
2648{
2649 const auto empty = ctx.m_emptyLinesCount;
2650
2651 const auto ret = parseFragment(ctx, parent, doc, linksToParse, workingPath,
2652 fileName, collectRefLinks);
2653
2654 makeLineMain(ctx, line, empty, currentIndent, ns, currentLineNumber);
2655
2656 return ret;
2657}
2658
2659template<class Trait>
2660inline bool
2661Parser<Trait>::isListType(BlockType t)
2662{
2663 switch (t) {
2664 case BlockType::List:
2665 case BlockType::ListWithFirstEmptyLine:
2666 return true;
2667
2668 default:
2669 return false;
2670 }
2671}
2672
2673template<class Trait>
2674std::pair<typename Trait::InternalString, bool>
2675Parser<Trait>::readLine(typename Parser<Trait>::ParserContext &ctx,
2677{
2678 ctx.m_htmlCommentData.clear();
2679
2680 auto line = stream.readLine();
2681
2682 static const char16_t c_zeroReplaceWith[2] = {0xFFFD, 0};
2683
2684 line.first.replace(typename Trait::Char(0), Trait::utf16ToString(&c_zeroReplaceWith[0]));
2685
2686 checkForHtmlComments(line.first, stream, ctx.m_htmlCommentData);
2687
2688 return line;
2689}
2690
2691template<class Trait>
2692inline std::pair<long long int, bool>
2693Parser<Trait>::parseFirstStep(ParserContext &ctx,
2695 std::shared_ptr<Block<Trait>> parent,
2696 std::shared_ptr<Document<Trait>> doc,
2697 typename Trait::StringList &linksToParse,
2698 const typename Trait::String &workingPath,
2699 const typename Trait::String &fileName,
2700 bool collectRefLinks)
2701{
2702 while (!stream.atEnd()) {
2703 const auto currentLineNumber = stream.currentLineNumber();
2704
2705 typename Trait::InternalString line;
2706 bool mayBreak;
2707
2708 std::tie(line, mayBreak) = readLine(ctx, stream);
2709
2710 if (ctx.m_lineType != BlockType::Unknown) {
2711 ctx.m_prevLineType = ctx.m_lineType;
2712 }
2713
2714 ctx.m_lineType = whatIsTheLine(line,
2715 (ctx.m_emptyLineInList || isListType(ctx.m_type)),
2716 ctx.m_prevLineType == BlockType::ListWithFirstEmptyLine,
2717 ctx.m_fensedCodeInList,
2718 &ctx.m_startOfCodeInList,
2719 &ctx.m_indent,
2720 ctx.m_lineType == BlockType::EmptyLine,
2721 true,
2722 &ctx.m_indents);
2723
2724 if (isListType(ctx.m_type) && ctx.m_lineType == BlockType::FensedCodeInList) {
2725 ctx.m_fensedCodeInList = !ctx.m_fensedCodeInList;
2726 }
2727
2728 const auto currentIndent = ctx.m_indent;
2729
2730 const auto ns = skipSpaces<Trait>(0, line.asString());
2731
2732 const auto indentInListValue = indentInList(&ctx.m_indents, ns, true);
2733
2734 if (isListType(ctx.m_lineType) && !ctx.m_fensedCodeInList && ctx.m_indent.m_level > -1) {
2735 if (ctx.m_indent.m_level < (long long int)ctx.m_indents.size()) {
2736 ctx.m_indents.erase(ctx.m_indents.cbegin() + ctx.m_indent.m_level, ctx.m_indents.cend());
2737 }
2738
2739 ctx.m_indents.push_back(ctx.m_indent.m_indent);
2740 }
2741
2742 if (ctx.m_type == BlockType::CodeIndentedBySpaces && ns > 3) {
2743 ctx.m_lineType = BlockType::CodeIndentedBySpaces;
2744 }
2745
2746 if (ctx.m_type == BlockType::ListWithFirstEmptyLine && ctx.m_lineCounter == 2 &&
2747 !isListType(ctx.m_lineType)) {
2748 if (ctx.m_emptyLinesCount > 0) {
2749 const auto l = parseFragmentAndMakeNextLineMain(ctx,
2750 parent,
2751 doc,
2752 linksToParse,
2753 workingPath,
2754 fileName,
2755 collectRefLinks,
2756 line,
2757 currentIndent,
2758 ns,
2759 currentLineNumber);
2760
2761 if (l.first != -1) {
2762 return l;
2763 }
2764
2765 continue;
2766 } else {
2767 ctx.m_emptyLineInList = false;
2768 ctx.m_emptyLinesCount = 0;
2769 }
2770 }
2771
2772 if (ctx.m_type == BlockType::ListWithFirstEmptyLine && ctx.m_lineCounter == 2) {
2773 ctx.m_type = BlockType::List;
2774 }
2775
2776 // Footnote.
2777 if (ctx.m_lineType == BlockType::Footnote) {
2778 const auto l = parseFragmentAndMakeNextLineMain(ctx,
2779 parent,
2780 doc,
2781 linksToParse,
2782 workingPath,
2783 fileName,
2784 collectRefLinks,
2785 line,
2786 currentIndent,
2787 ns,
2788 currentLineNumber);
2789
2790 if (l.first != -1) {
2791 return l;
2792 }
2793
2794 eatFootnote(ctx, stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2795
2796 continue;
2797 }
2798
2799 // First line of the fragment.
2800 if (ns != line.length() && ctx.m_type == BlockType::EmptyLine) {
2801 makeLineMain(ctx, line, ctx.m_emptyLinesCount, currentIndent, ns, currentLineNumber);
2802
2803 continue;
2804 } else if (ns == line.length() && ctx.m_type == BlockType::EmptyLine) {
2805 continue;
2806 }
2807
2808 ++ctx.m_lineCounter;
2809
2810 // Got new empty line.
2811 if (ns == line.length()) {
2812 ++ctx.m_emptyLinesCount;
2813
2814 switch (ctx.m_type) {
2815 case BlockType::Blockquote: {
2816 const auto l = parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName,
2817 collectRefLinks);
2818
2819 if (l.first != -1) {
2820 return l;
2821 }
2822
2823 continue;
2824 }
2825
2826 case BlockType::Text:
2827 case BlockType::CodeIndentedBySpaces:
2828 continue;
2829 break;
2830
2831 case BlockType::Code: {
2832 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2833 ctx.m_emptyLinesCount = 0;
2834
2835 continue;
2836 }
2837
2838 case BlockType::List:
2839 case BlockType::ListWithFirstEmptyLine: {
2840 ctx.m_emptyLineInList = true;
2841
2842 continue;
2843 }
2844
2845 default:
2846 break;
2847 }
2848 }
2849 // Empty new line in list.
2850 else if (ctx.m_emptyLineInList) {
2851 if (indentInListValue || isListType(ctx.m_lineType) || ctx.m_lineType == BlockType::SomethingInList) {
2852 for (long long int i = 0; i < ctx.m_emptyLinesCount; ++i) {
2853 ctx.m_fragment.push_back({typename Trait::String(),
2854 {currentLineNumber - ctx.m_emptyLinesCount + i, {}, false}});
2855 }
2856
2857 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2858
2859 ctx.m_emptyLineInList = false;
2860 ctx.m_emptyLinesCount = 0;
2861
2862 continue;
2863 } else {
2864 const auto empty = ctx.m_emptyLinesCount;
2865
2866 const auto l = parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName,
2867 collectRefLinks);
2868
2869 if (l.first != -1) {
2870 return l;
2871 }
2872
2873 ctx.m_lineType = whatIsTheLine(line, false, false, false, nullptr, nullptr,
2874 true, false, &ctx.m_indents);
2875
2876 makeLineMain(ctx, line, empty, currentIndent, ns, currentLineNumber);
2877
2878 continue;
2879 }
2880 } else if (ctx.m_emptyLinesCount > 0) {
2881 if (ctx.m_type == BlockType::CodeIndentedBySpaces &&
2882 ctx.m_lineType == BlockType::CodeIndentedBySpaces) {
2883 const auto indent = skipSpaces<Trait>(0, ctx.m_fragment.front().first.asString());
2884
2885 for (long long int i = 0; i < ctx.m_emptyLinesCount; ++i) {
2886 ctx.m_fragment.push_back({typename Trait::String(indent, Trait::latin1ToChar(' ')),
2887 {currentLineNumber - ctx.m_emptyLinesCount + i, {}, false}});
2888 }
2889
2890 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2891 ctx.m_emptyLinesCount = 0;
2892 } else {
2893 const auto l = parseFragmentAndMakeNextLineMain(ctx,
2894 parent,
2895 doc,
2896 linksToParse,
2897 workingPath,
2898 fileName,
2899 collectRefLinks,
2900 line,
2901 currentIndent,
2902 ns,
2903 currentLineNumber);
2904
2905 if (l.first != -1) {
2906 return l;
2907 }
2908 }
2909
2910 continue;
2911 }
2912
2913 // Something new and first block is not a code block or a list, blockquote.
2914 if (ctx.m_type != ctx.m_lineType && ctx.m_type != BlockType::Code &&
2915 !isListType(ctx.m_type) && ctx.m_type != BlockType::Blockquote) {
2916 if (ctx.m_type == BlockType::Text && ctx.m_lineType == BlockType::CodeIndentedBySpaces) {
2917 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2918 }
2919 else {
2920 if (ctx.m_type == BlockType::Text && isListType(ctx.m_lineType)) {
2921 if (ctx.m_lineType != BlockType::ListWithFirstEmptyLine) {
2922 int num = 0;
2923
2924 if (isOrderedList<Trait>(line.asString(), &num)) {
2925 if (num != 1) {
2926 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2927
2928 continue;
2929 }
2930 }
2931 } else {
2932 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2933
2934 continue;
2935 }
2936 }
2937
2938 const auto l = parseFragmentAndMakeNextLineMain(ctx,
2939 parent,
2940 doc,
2941 linksToParse,
2942 workingPath,
2943 fileName,
2944 collectRefLinks,
2945 line,
2946 currentIndent,
2947 ns,
2948 currentLineNumber);
2949
2950 if (l.first != -1) {
2951 return l;
2952 }
2953 }
2954 }
2955 // End of code block.
2956 else if (ctx.m_type == BlockType::Code && ctx.m_type == ctx.m_lineType &&
2957 !ctx.m_startOfCode.isEmpty() &&
2958 startSequence<Trait>(line.asString()).contains(ctx.m_startOfCode) &&
2959 isCodeFences<Trait>(line.asString(), true)) {
2960 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
2961
2962 const auto l = parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName,
2963 collectRefLinks);
2964
2965 if (l.first != -1) {
2966 return l;
2967 }
2968 }
2969 // Not a continue of list.
2970 else if (ctx.m_type != ctx.m_lineType && isListType(ctx.m_type) &&
2971 ctx.m_lineType != BlockType::SomethingInList &&
2972 ctx.m_lineType != BlockType::FensedCodeInList && !isListType(ctx.m_lineType)) {
2973 const auto l = parseFragmentAndMakeNextLineMain(ctx,
2974 parent,
2975 doc,
2976 linksToParse,
2977 workingPath,
2978 fileName,
2979 collectRefLinks,
2980 line,
2981 currentIndent,
2982 ns,
2983 currentLineNumber);
2984
2985 if (l.first != -1) {
2986 return l;
2987 }
2988 } else if (ctx.m_type == BlockType::Heading) {
2989 const auto l = parseFragmentAndMakeNextLineMain(ctx,
2990 parent,
2991 doc,
2992 linksToParse,
2993 workingPath,
2994 fileName,
2995 collectRefLinks,
2996 line,
2997 currentIndent,
2998 ns,
2999 currentLineNumber);
3000
3001 if (l.first != -1) {
3002 return l;
3003 }
3004 } else {
3005 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData, mayBreak}});
3006 }
3007
3008 ctx.m_emptyLinesCount = 0;
3009 }
3010
3011 if (!ctx.m_fragment.empty()) {
3012 if (ctx.m_type == BlockType::Code) {
3013 ctx.m_fragment.push_back({ctx.m_startOfCode, {-1, {}, false}});
3014 }
3015
3016 const auto l = parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName,
3017 collectRefLinks);
3018
3019 if (l.first != -1) {
3020 return l;
3021 }
3022 }
3023
3024 return {-1, false};
3025}
3026
3027template<class Trait>
3028inline void
3029Parser<Trait>::parseSecondStep(ParserContext &ctx,
3030 std::shared_ptr<Block<Trait>> parent,
3031 std::shared_ptr<Document<Trait>> doc,
3032 typename Trait::StringList &linksToParse,
3033 const typename Trait::String &workingPath,
3034 const typename Trait::String &fileName,
3035 bool collectRefLinks,
3036 bool top,
3037 bool dontProcessLastFreeHtml)
3038{
3039 if (top) {
3040 resetHtmlTag(ctx.m_html);
3041
3042 for (long long int i = 0; i < (long long int)ctx.m_splitted.size(); ++i) {
3043 parseFragment(ctx.m_splitted[i], parent, doc, linksToParse, workingPath, fileName, false,
3044 ctx.m_html);
3045
3046 if (ctx.m_html.m_htmlBlockType >= 6) {
3047 ctx.m_html.m_continueHtml = (!ctx.m_splitted[i].m_emptyLineAfter);
3048 }
3049
3050 if (ctx.m_html.m_html && !ctx.m_html.m_continueHtml) {
3051 finishHtml(ctx, parent, doc, collectRefLinks, top, dontProcessLastFreeHtml);
3052 } else if (!ctx.m_html.m_html) {
3053 ctx.m_html.m_toAdjustLastPos.clear();
3054 }
3055 }
3056 }
3057
3058 if (ctx.m_html.m_html) {
3059 finishHtml(ctx, parent, doc, collectRefLinks, top, dontProcessLastFreeHtml);
3060 }
3061}
3062
3063template<class Trait>
3064inline std::pair<RawHtmlBlock<Trait>, long long int>
3066 std::shared_ptr<Block<Trait>> parent,
3067 std::shared_ptr<Document<Trait>> doc,
3068 typename Trait::StringList &linksToParse,
3069 const typename Trait::String &workingPath,
3070 const typename Trait::String &fileName,
3071 bool collectRefLinks,
3072 bool top,
3073 bool dontProcessLastFreeHtml,
3074 bool stopOnMayBreakList)
3075{
3076 ParserContext ctx;
3077
3078 auto line = parseFirstStep(ctx, stream, parent, doc, linksToParse, workingPath, fileName,
3079 collectRefLinks);
3080
3081 while (line.first != -1 && !(stopOnMayBreakList && line.second)) {
3082 stream.setLineNumber(line.first);
3083
3084 line = parseFirstStep(ctx, stream, parent, doc, linksToParse, workingPath, fileName,
3085 collectRefLinks);
3086 }
3087
3088 parseSecondStep(ctx, parent, doc, linksToParse, workingPath, fileName,
3089 collectRefLinks, top, dontProcessLastFreeHtml);
3090
3091 return {ctx.m_html, line.first};
3092}
3093
3094#ifdef MD4QT_QT_SUPPORT
3095
3096template<>
3097inline void
3098Parser<QStringTrait>::parseFile(const QString &fileName,
3099 bool recursive,
3100 std::shared_ptr<Document<QStringTrait>> doc,
3101 const QStringList &ext,
3102 QStringList *parentLinks)
3103{
3104 QFileInfo fi(fileName);
3105
3106 if (fi.exists() && ext.contains(fi.suffix().toLower())) {
3107 QFile f(fileName);
3108
3109 if (f.open(QIODevice::ReadOnly)) {
3110 QTextStream s(f.readAll());
3111 f.close();
3112
3113 parseStream(s, fi.absolutePath(), fi.fileName(), recursive, doc, ext, parentLinks);
3114 }
3115 }
3116}
3117
3118#endif
3119
3120#ifdef MD4QT_ICU_STL_SUPPORT
3121
3122template<>
3123inline void
3124Parser<UnicodeStringTrait>::parseFile(const UnicodeString &fileName,
3125 bool recursive,
3126 std::shared_ptr<Document<UnicodeStringTrait>> doc,
3127 const std::vector<UnicodeString> &ext,
3128 std::vector<UnicodeString> *parentLinks)
3129{
3130 if (UnicodeStringTrait::fileExists(fileName)) {
3131 std::string fn;
3132 fileName.toUTF8String(fn);
3133
3134 try {
3135 auto e = UnicodeString::fromUTF8(std::filesystem::u8path(fn).extension().u8string());
3136
3137 if (!e.isEmpty()) {
3138 e.remove(0, 1);
3139 }
3140
3141 if (std::find(ext.cbegin(), ext.cend(), e.toLower()) != ext.cend()) {
3142 auto path = std::filesystem::canonical(std::filesystem::u8path(fn));
3143 std::ifstream file(path.c_str(), std::ios::in | std::ios::binary);
3144
3145 if (file.good()) {
3146 const auto fileNameS = path.filename().u8string();
3147 auto workingDirectory = path.remove_filename().u8string();
3148
3149 if (!workingDirectory.empty()) {
3150 workingDirectory.erase(workingDirectory.size() - 1, 1);
3151 }
3152
3153 std::replace(workingDirectory.begin(), workingDirectory.end(), '\\', '/');
3154
3155 parseStream(file, UnicodeString::fromUTF8(workingDirectory),
3156 UnicodeString::fromUTF8(fileNameS), recursive, doc, ext, parentLinks);
3157
3158 file.close();
3159 }
3160 }
3161 } catch (const std::exception &) {
3162 }
3163 }
3164}
3165
3166#endif
3167
3168//! Resolve links in the document.
3169template<class Trait>
3170void
3171resolveLinks(typename Trait::StringList &linksToParse,
3172 std::shared_ptr<Document<Trait>> doc)
3173{
3174 for (auto it = linksToParse.begin(), last = linksToParse.end(); it != last; ++it) {
3175 auto nextFileName = *it;
3176
3177 if (nextFileName.startsWith(Trait::latin1ToString("#"))) {
3178 const auto lit = doc->labeledLinks().find(nextFileName);
3179
3180 if (lit != doc->labeledLinks().cend()) {
3181 nextFileName = lit->second->url();
3182 } else {
3183 continue;
3184 }
3185 }
3186
3187 if (Trait::fileExists(nextFileName)) {
3188 *it = Trait::absoluteFilePath(nextFileName);
3189 }
3190 }
3191}
3192
3193template<class Trait>
3194inline void
3195Parser<Trait>::parseStream(typename Trait::TextStream &s,
3196 const typename Trait::String &workingPath,
3197 const typename Trait::String &fileName,
3198 bool recursive,
3199 std::shared_ptr<Document<Trait>> doc,
3200 const typename Trait::StringList &ext,
3201 typename Trait::StringList *parentLinks)
3202{
3203 typename Trait::StringList linksToParse;
3204
3205 const auto path = workingPath.isEmpty() ? typename Trait::String(fileName) :
3206 typename Trait::String(workingPath + Trait::latin1ToString("/") + fileName);
3207
3208 doc->appendItem(std::shared_ptr<Anchor<Trait>>(new Anchor<Trait>(path)));
3209
3210 typename MdBlock<Trait>::Data data;
3211
3212 {
3213 TextStream<Trait> stream(s);
3214
3215 long long int i = 0;
3216
3217 while (!stream.atEnd()) {
3218 data.push_back(std::pair<typename Trait::InternalString, MdLineData>(stream.readLine(), {i}));
3219 ++i;
3220 }
3221 }
3222
3223 StringListStream<Trait> stream(data);
3224
3225 parse(stream, doc, doc, linksToParse, workingPath, fileName, true, true);
3226
3227 m_parsedFiles.push_back(path);
3228
3229 resolveLinks<Trait>(linksToParse, doc);
3230
3231 // Parse all links if parsing is recursive.
3232 if (recursive && !linksToParse.empty()) {
3233 const auto tmpLinks = linksToParse;
3234
3235 while (!linksToParse.empty()) {
3236 auto nextFileName = linksToParse.front();
3237 linksToParse.erase(linksToParse.cbegin());
3238
3239 if (parentLinks) {
3240 const auto pit = std::find(parentLinks->cbegin(), parentLinks->cend(), nextFileName);
3241
3242 if (pit != parentLinks->cend()) {
3243 continue;
3244 }
3245 }
3246
3247 if (nextFileName.startsWith(Trait::latin1ToString("#"))) {
3248 continue;
3249 }
3250
3251 const auto pit = std::find(m_parsedFiles.cbegin(), m_parsedFiles.cend(), nextFileName);
3252
3253 if (pit == m_parsedFiles.cend()) {
3254 if (!doc->isEmpty() && doc->items().back()->type() != ItemType::PageBreak) {
3255 doc->appendItem(std::shared_ptr<PageBreak<Trait>>(new PageBreak<Trait>));
3256 }
3257
3258 parseFile(nextFileName, recursive, doc, ext, &linksToParse);
3259 }
3260 }
3261
3262 if (parentLinks) {
3263 std::copy(tmpLinks.cbegin(), tmpLinks.cend(), std::back_inserter(*parentLinks));
3264 }
3265 }
3266}
3267
3268//! \return Position of first character in list item.
3269template<class Trait>
3270inline long long int
3271posOfListItem(const typename Trait::String &s,
3272 bool ordered)
3273{
3274 long long int p = 0;
3275
3276 for (; p < s.size(); ++p) {
3277 if (!s[p].isSpace()) {
3278 break;
3279 }
3280 }
3281
3282 if (ordered) {
3283 for (; p < s.size(); ++p) {
3284 if (!s[p].isDigit()) {
3285 break;
3286 }
3287 }
3288 }
3289
3290 ++p;
3291
3292 long long int sc = 0;
3293
3294 for (; p < s.size(); ++p) {
3295 if (!s[p].isSpace()) {
3296 break;
3297 } else {
3298 ++sc;
3299 }
3300 }
3301
3302 if (p == s.length() || sc > 4) {
3303 p = p - sc + 1;
3304 } else if (sc == 0) {
3305 ++p;
3306 }
3307
3308 return p;
3309}
3310
3311//! \return Level in indents for the given position.
3312inline long long int
3313listLevel(const std::vector<long long int> &indents,
3314 long long int pos)
3315{
3316 long long int level = indents.size();
3317
3318 for (auto it = indents.crbegin(), last = indents.crend(); it != last; ++it) {
3319 if (pos >= *it) {
3320 break;
3321 } else {
3322 --level;
3323 }
3324 }
3325
3326 return level;
3327}
3328
3329template<class Trait>
3330inline typename Parser<Trait>::BlockType
3331Parser<Trait>::whatIsTheLine(typename Trait::InternalString &str,
3332 bool inList,
3333 bool inListWithFirstEmptyLine,
3334 bool fensedCodeInList,
3335 typename Trait::String *startOfCode,
3336 ListIndent *indent,
3337 bool emptyLinePreceded,
3338 bool calcIndent,
3339 const std::vector<long long int> *indents)
3340{
3341 replaceTabs<Trait>(str);
3342
3343 const auto first = skipSpaces<Trait>(0, str.asString());
3344
3345 if (first < str.length()) {
3346 auto s = str.sliced(first);
3347
3348 const bool isBlockquote = s.asString().startsWith(Trait::latin1ToString(">"));
3349 const bool indentIn = indentInList(indents, first, false);
3350 bool isHeading = false;
3351
3352 if (first < 4 && isFootnote<Trait>(s.asString())) {
3353 return BlockType::Footnote;
3354 }
3355
3356 if (s.asString().startsWith(Trait::latin1ToString("#")) &&
3357 (indent ? first - indent->m_indent < 4 : first < 4)) {
3358 long long int c = 0;
3359
3360 while (c < s.length() && s[c] == Trait::latin1ToChar('#')) {
3361 ++c;
3362 }
3363
3364 if (c <= 6 && ((c < s.length() && s[c].isSpace()) || c == s.length())) {
3365 isHeading = true;
3366 }
3367 }
3368
3369 if (inList) {
3370 bool isFirstLineEmpty = false;
3371 const auto orderedList = isOrderedList<Trait>(str.asString(), nullptr, nullptr, nullptr,
3372 &isFirstLineEmpty);
3373 const bool fensedCode = isCodeFences<Trait>(s.asString());
3374 const auto codeIndentedBySpaces = emptyLinePreceded && first >= 4 &&
3375 !indentInList(indents, first, true);
3376
3377 if (fensedCodeInList) {
3378 if (indentInList(indents, first, true)) {
3379 if (fensedCode) {
3380 if (startOfCode && startSequence<Trait>(s.asString()).contains(*startOfCode)) {
3381 return BlockType::FensedCodeInList;
3382 }
3383 }
3384
3385 return BlockType::SomethingInList;
3386 }
3387 }
3388
3389 if (fensedCode && indentIn) {
3390 if (startOfCode) {
3391 *startOfCode = startSequence<Trait>(s.asString());
3392 }
3393
3394 return BlockType::FensedCodeInList;
3395 } else if ((((s.asString().startsWith(Trait::latin1ToString("-")) ||
3396 s.asString().startsWith(Trait::latin1ToString("+")) ||
3397 s.asString().startsWith(Trait::latin1ToString("*"))) &&
3398 ((s.length() > 1 && s[1] == Trait::latin1ToChar(' ')) || s.length() == 1)) ||
3399 orderedList) && (first < 4 || indentIn)) {
3400 if (codeIndentedBySpaces) {
3401 return BlockType::CodeIndentedBySpaces;
3402 }
3403
3404 if (indent && calcIndent) {
3405 indent->m_indent = posOfListItem<Trait>(str.asString(), orderedList);
3406 indent->m_level = (indents ? listLevel(*indents, first) : -1);
3407 }
3408
3409 if (s.simplified().length() == 1 || isFirstLineEmpty) {
3410 return BlockType::ListWithFirstEmptyLine;
3411 } else {
3412 return BlockType::List;
3413 }
3414 } else if (indentInList(indents, first, true)) {
3415 return BlockType::SomethingInList;
3416 }
3417 else {
3418 if (!isHeading && !isBlockquote &&
3419 !(fensedCode && first < 4) && !emptyLinePreceded && !inListWithFirstEmptyLine) {
3420 return BlockType::SomethingInList;
3421 }
3422 }
3423 } else {
3424 bool isFirstLineEmpty = false;
3425
3426 const auto orderedList = isOrderedList<Trait>(str.asString(), nullptr, nullptr, nullptr,
3427 &isFirstLineEmpty);
3428 const bool isHLine = first < 4 && isHorizontalLine<Trait>(s.asString());
3429
3430 if (!isHLine &&
3431 (((s.asString().startsWith(Trait::latin1ToString("-")) || s.asString().startsWith(Trait::latin1ToString("+")) ||
3432 s.asString().startsWith(Trait::latin1ToString("*"))) &&
3433 ((s.length() > 1 && s[1] == Trait::latin1ToChar(' ')) || s.length() == 1)) ||
3434 orderedList) && first < 4) {
3435 if (indent && calcIndent) {
3436 indent->m_indent = posOfListItem<Trait>(str.asString(), orderedList);
3437 indent->m_level = (indents ? listLevel(*indents, first) : -1);
3438 }
3439
3440 if (s.simplified().length() == 1 || isFirstLineEmpty) {
3441 return BlockType::ListWithFirstEmptyLine;
3442 } else {
3443 return BlockType::List;
3444 }
3445 }
3446 }
3447
3448 if (str.asString().startsWith(typename Trait::String(4, Trait::latin1ToChar(' ')))) {
3449 return BlockType::CodeIndentedBySpaces;
3450 } else if (isCodeFences<Trait>(str.asString())) {
3451 return BlockType::Code;
3452 } else if (isBlockquote) {
3453 return BlockType::Blockquote;
3454 } else if (isHeading) {
3455 return BlockType::Heading;
3456 }
3457 } else {
3458 return BlockType::EmptyLine;
3459 }
3460
3461 return BlockType::Text;
3462}
3463
3464template<class Trait>
3465inline long long int
3466Parser<Trait>::parseFragment(MdBlock<Trait> &fr,
3467 std::shared_ptr<Block<Trait>> parent,
3468 std::shared_ptr<Document<Trait>> doc,
3469 typename Trait::StringList &linksToParse,
3470 const typename Trait::String &workingPath,
3471 const typename Trait::String &fileName,
3472 bool collectRefLinks,
3473 RawHtmlBlock<Trait> &html)
3474{
3475 if (html.m_continueHtml) {
3476 parseText(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3477 } else {
3478 if (html.m_html) {
3479 if (!collectRefLinks) {
3480 parent->appendItem(html.m_html);
3481 }
3482
3483 resetHtmlTag(html);
3484 }
3485
3486 switch (whatIsTheLine(fr.m_data.front().first)) {
3487 case BlockType::Footnote:
3488 parseFootnote(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3489 break;
3490
3491 case BlockType::Text:
3492 parseText(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3493 break;
3494
3495 case BlockType::Blockquote:
3496 return parseBlockquote(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3497 break;
3498
3499 case BlockType::Code:
3500 return parseCode(fr, parent, collectRefLinks);
3501 break;
3502
3503 case BlockType::CodeIndentedBySpaces: {
3504 int indent = 1;
3505
3506 if (fr.m_data.front().first.asString().startsWith(Trait::latin1ToString(" "))) {
3507 indent = 4;
3508 }
3509
3510 return parseCodeIndentedBySpaces(fr, parent, collectRefLinks, indent, {}, -1, -1, false);
3511 } break;
3512
3513 case BlockType::Heading:
3514 parseHeading(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3515 break;
3516
3517 case BlockType::List:
3518 case BlockType::ListWithFirstEmptyLine:
3519 return parseList(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3520
3521 default:
3522 break;
3523 }
3524 }
3525
3526 return -1;
3527}
3528
3529template<class Trait>
3530inline void
3531Parser<Trait>::clearCache()
3532{
3533 m_parsedFiles.clear();
3534}
3535
3536//! \return Number of columns in table, if the given string is a table header.
3537template<class Trait>
3538inline int
3539isTableHeader(const typename Trait::String &s)
3540{
3541 if (s.contains(Trait::latin1ToChar('|'))) {
3542 int c = 0;
3543
3544 const auto tmp = s.simplified();
3545 const auto p = tmp.startsWith(Trait::latin1ToString("|")) ? 1 : 0;
3546 const auto n = tmp.size() - p - (tmp.endsWith(Trait::latin1ToString("|")) && tmp.size() > 1 ? 1 : 0);
3547 const auto v = tmp.sliced(p, n);
3548
3549 bool backslash = false;
3550
3551 for (long long int i = 0; i < v.size(); ++i) {
3552 bool now = false;
3553
3554 if (v[i] == Trait::latin1ToChar('\\') && !backslash) {
3555 backslash = true;
3556 now = true;
3557 } else if (v[i] == Trait::latin1ToChar('|') && !backslash) {
3558 ++c;
3559 }
3560
3561 if (!now) {
3562 backslash = false;
3563 }
3564 }
3565
3566 ++c;
3567
3568 return c;
3569 } else {
3570 return 0;
3571 }
3572}
3573
3574template<class Trait>
3575inline void
3576Parser<Trait>::parseText(MdBlock<Trait> &fr,
3577 std::shared_ptr<Block<Trait>> parent,
3578 std::shared_ptr<Document<Trait>> doc,
3579 typename Trait::StringList &linksToParse,
3580 const typename Trait::String &workingPath,
3581 const typename Trait::String &fileName,
3582 bool collectRefLinks,
3583 RawHtmlBlock<Trait> &html)
3584{
3585 const auto h = isTableHeader<Trait>(fr.m_data.front().first.asString());
3586 const auto c = fr.m_data.size() > 1 ? isTableAlignment<Trait>(fr.m_data[1].first.asString()) : 0;
3587
3588 if (c && h && c == h && !html.m_continueHtml) {
3589 parseTable(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, c);
3590
3591 if (!fr.m_data.empty()) {
3592 StringListStream<Trait> stream(fr.m_data);
3593
3594 Parser<Trait>::parse(stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3595 }
3596 } else {
3597 parseParagraph(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3598 }
3599}
3600
3601//! Find and remove heading label.
3602template<class Trait>
3603inline std::pair<typename Trait::String, WithPosition>
3604findAndRemoveHeaderLabel(typename Trait::InternalString &s)
3605{
3606 const auto start = s.asString().indexOf(Trait::latin1ToString("{#"));
3607
3608 if (start >= 0) {
3609 long long int p = start + 2;
3610
3611 for (; p < s.length(); ++p) {
3612 if (s[p] == Trait::latin1ToChar('}')) {
3613 break;
3614 }
3615 }
3616
3617 if (p < s.length() && s[p] == Trait::latin1ToChar('}')) {
3618 WithPosition pos;
3619 pos.setStartColumn(s.virginPos(start));
3620 pos.setEndColumn(s.virginPos(p));
3621
3622 const auto label = s.sliced(start, p - start + 1).asString();
3623 s.remove(start, p - start + 1);
3624 return {label, pos};
3625 }
3626 }
3627
3628 return {};
3629}
3630
3631//! Convert string to label.
3632template<class Trait>
3633inline typename Trait::String
3634stringToLabel(const typename Trait::String &s)
3635{
3636 typename Trait::String res;
3637
3638 for (long long int i = 0; i < s.length(); ++i) {
3639 const auto c = s[i];
3640
3641 if (c.isLetter() || c.isDigit() || c == Trait::latin1ToChar('-') ||
3642 c == Trait::latin1ToChar('_')) {
3643 res.push_back(c.toLower());
3644 } else if (c.isSpace()) {
3645 res.push_back(Trait::latin1ToString("-"));
3646 }
3647 }
3648
3649 return res;
3650}
3651
3652//! Convert Paragraph to label.
3653template<class Trait>
3654inline typename Trait::String
3656{
3657 typename Trait::String l;
3658
3659 if (!p) {
3660 return l;
3661 }
3662
3663 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it) {
3664 switch ((*it)->type()) {
3665 case ItemType::Text: {
3666 auto t = static_cast<Text<Trait> *>(it->get());
3667 const auto text = t->text();
3668 l.push_back(stringToLabel<Trait>(text));
3669 } break;
3670
3671 case ItemType::Image: {
3672 auto i = static_cast<Image<Trait> *>(it->get());
3673
3674 if (!i->p()->isEmpty()) {
3675 l.push_back(paragraphToLabel(i->p().get()));
3676 } else if (!i->text().isEmpty()) {
3677 l.push_back(stringToLabel<Trait>(i->text()));
3678 }
3679 } break;
3680
3681 case ItemType::Link: {
3682 auto link = static_cast<Link<Trait> *>(it->get());
3683
3684 if (!link->p()->isEmpty()) {
3685 l.push_back(paragraphToLabel(link->p().get()));
3686 } else if (!link->text().isEmpty()) {
3687 l.push_back(stringToLabel<Trait>(link->text()));
3688 }
3689 } break;
3690
3691 case ItemType::Code: {
3692 auto c = static_cast<Code<Trait> *>(it->get());
3693
3694 if (!c->text().isEmpty()) {
3695 l.push_back(stringToLabel<Trait>(c->text()));
3696 }
3697 } break;
3698
3699 default:
3700 break;
3701 }
3702 }
3703
3704 return l;
3705}
3706
3707//! Find and remove closing sequence of "#" in heading.
3708template<class Trait>
3709inline WithPosition
3710findAndRemoveClosingSequence(typename Trait::InternalString &s)
3711{
3712 long long int end = -1;
3713 long long int start = -1;
3714
3715 for (long long int i = s.length() - 1; i >= 0; --i) {
3716 if (!s[i].isSpace() && s[i] != Trait::latin1ToChar('#') && end == -1) {
3717 return {};
3718 }
3719
3720 if (s[i] == Trait::latin1ToChar('#')) {
3721 if (end == -1) {
3722 end = i;
3723 }
3724
3725 if (i - 1 >= 0) {
3726 if (s[i - 1].isSpace()) {
3727 start = i;
3728 break;
3729 } else if (s[i - 1] != Trait::latin1ToChar('#')) {
3730 return {};
3731 }
3732 } else {
3733 start = 0;
3734 }
3735 }
3736 }
3737
3738 WithPosition ret;
3739
3740 if (start != -1 && end != -1) {
3741 ret.setStartColumn(s.virginPos(start));
3742 ret.setEndColumn(s.virginPos(end));
3743
3744 s.remove(start, end - start + 1);
3745 }
3746
3747 return ret;
3748}
3749
3750template<class Trait>
3751inline void
3752Parser<Trait>::parseHeading(MdBlock<Trait> &fr,
3753 std::shared_ptr<Block<Trait>> parent,
3754 std::shared_ptr<Document<Trait>> doc,
3755 typename Trait::StringList &linksToParse,
3756 const typename Trait::String &workingPath,
3757 const typename Trait::String &fileName,
3758 bool collectRefLinks)
3759{
3760 if (!fr.m_data.empty() && !collectRefLinks) {
3761 auto line = fr.m_data.front().first;
3762
3763 std::shared_ptr<Heading<Trait>> h(new Heading<Trait>);
3764 h->setStartColumn(line.virginPos(skipSpaces<Trait>(0, line.asString())));
3765 h->setStartLine(fr.m_data.front().second.m_lineNumber);
3766 h->setEndColumn(line.virginPos(line.length() - 1));
3767 h->setEndLine(h->startLine());
3768
3769 long long int pos = 0;
3770 pos = skipSpaces<Trait>(pos, line.asString());
3771
3772 if (pos > 0) {
3773 line = line.sliced(pos);
3774 }
3775
3776 pos = 0;
3777 int lvl = 0;
3778
3779 while (pos < line.length() && line[pos] == Trait::latin1ToChar('#')) {
3780 ++lvl;
3781 ++pos;
3782 }
3783
3784 WithPosition startDelim = {h->startColumn(), h->startLine(),
3785 line.virginPos(pos - 1), h->startLine()};
3786
3787 pos = skipSpaces<Trait>(pos, line.asString());
3788
3789 if (pos > 0) {
3790 fr.m_data.front().first = line.sliced(pos);
3791 }
3792
3793 auto label = findAndRemoveHeaderLabel<Trait>(fr.m_data.front().first);
3794
3795 typename Heading<Trait>::Delims delims = {startDelim};
3796
3797 auto endDelim = findAndRemoveClosingSequence<Trait>(fr.m_data.front().first);
3798
3799 if (endDelim.startColumn() != -1) {
3800 endDelim.setStartLine(fr.m_data.front().second.m_lineNumber);
3801 endDelim.setEndLine(endDelim.startLine());
3802
3803 delims.push_back(endDelim);
3804 }
3805
3806 h->setDelims(delims);
3807
3808 h->setLevel(lvl);
3809
3810 if (!label.first.isEmpty()) {
3811 h->setLabel(label.first.sliced(1, label.first.length() - 2) + Trait::latin1ToString("/") +
3812 (!workingPath.isEmpty() ? workingPath + Trait::latin1ToString("/") :
3813 Trait::latin1ToString("")) + fileName);
3814
3815 label.second.setStartLine(fr.m_data.front().second.m_lineNumber);
3816 label.second.setEndLine(label.second.startLine());
3817
3818 h->setLabelPos(label.second);
3819 }
3820
3821 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
3822
3823 typename MdBlock<Trait>::Data tmp;
3825 tmp.push_back(fr.m_data.front());
3826 MdBlock<Trait> block = {tmp, 0};
3827
3829
3830 parseFormattedTextLinksImages(block, p, doc, linksToParse, workingPath, fileName,
3831 false, false, html, false);
3832
3833 fr.m_data.erase(fr.m_data.cbegin());
3834
3835 if (p->items().size() && p->items().at(0)->type() == ItemType::Paragraph) {
3836 h->setText(std::static_pointer_cast<Paragraph<Trait>>(p->items().at(0)));
3837 } else {
3838 h->setText(p);
3839 }
3840
3841 if (h->isLabeled()) {
3842 doc->insertLabeledHeading(h->label(), h);
3843 } else {
3844 typename Trait::String label = Trait::latin1ToString("#") +
3845 paragraphToLabel(h->text().get());
3846
3847 label += Trait::latin1ToString("/") +
3848 (!workingPath.isEmpty() ? workingPath + Trait::latin1ToString("/") :
3849 Trait::latin1ToString("")) + fileName;
3850
3851 h->setLabel(label);
3852
3853 doc->insertLabeledHeading(label, h);
3854 }
3855
3856 parent->appendItem(h);
3857 }
3858}
3859
3860//! Prepare data in table cell for parsing.
3861template<class Trait>
3862inline typename Trait::InternalString
3863prepareTableData(typename Trait::InternalString s)
3864{
3865 s.replace(Trait::latin1ToString("\\|"), Trait::latin1ToString("|"));
3866
3867 return s;
3868}
3869
3870//! Split table's row on cells.
3871template<class Trait>
3872inline std::pair<typename Trait::InternalStringList, std::vector<long long int>>
3873splitTableRow(const typename Trait::InternalString &s)
3874{
3875 typename Trait::InternalStringList res;
3876 std::vector<long long int> columns;
3877
3878 bool backslash = false;
3879 long long int start = 0;
3880
3881 for (long long int i = 0; i < s.length(); ++i) {
3882 bool now = false;
3883
3884 if (s[i] == Trait::latin1ToChar('\\') && !backslash) {
3885 backslash = true;
3886 now = true;
3887 } else if (s[i] == Trait::latin1ToChar('|') && !backslash) {
3888 res.push_back(prepareTableData<Trait>(s.sliced(start, i - start)));
3889 columns.push_back(s.virginPos(i));
3890 start = i + 1;
3891 }
3892
3893 if (!now) {
3894 backslash = false;
3895 }
3896 }
3897
3898 res.push_back(prepareTableData<Trait>(s.sliced(start, s.length() - start)));
3899
3900 return {res, columns};
3901}
3902
3903template<class Trait>
3904inline void
3905Parser<Trait>::parseTable(MdBlock<Trait> &fr,
3906 std::shared_ptr<Block<Trait>> parent,
3907 std::shared_ptr<Document<Trait>> doc,
3908 typename Trait::StringList &linksToParse,
3909 const typename Trait::String &workingPath,
3910 const typename Trait::String &fileName,
3911 bool collectRefLinks,
3912 int columnsCount)
3913{
3914 static const char sep = '|';
3915
3916 if (fr.m_data.size() >= 2) {
3917 std::shared_ptr<Table<Trait>> table(new Table<Trait>);
3918 table->setStartColumn(fr.m_data.front().first.virginPos(0));
3919 table->setStartLine(fr.m_data.front().second.m_lineNumber);
3920 table->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
3921 table->setEndLine(fr.m_data.back().second.m_lineNumber);
3922
3923 auto parseTableRow = [&](const typename MdBlock<Trait>::Line &lineData) -> bool {
3924 const auto &row = lineData.first;
3925
3926 if (row.asString().startsWith(Trait::latin1ToString(" "))) {
3927 return false;
3928 }
3929
3930 auto line = row;
3931 auto p = skipSpaces<Trait>(0, line.asString());
3932
3933 if (p == line.length()) {
3934 return false;
3935 }
3936
3937 if (line[p] == Trait::latin1ToChar(sep)) {
3938 line.remove(0, p + 1);
3939 }
3940
3941 for (p = line.length() - 1; p >= 0; --p) {
3942 if (!line[p].isSpace()) {
3943 break;
3944 }
3945 }
3946
3947 if (p < 0) {
3948 return false;
3949 }
3950
3951 if (line[p] == Trait::latin1ToChar(sep)) {
3952 line.remove(p, line.length() - p);
3953 }
3954
3955 auto columns = splitTableRow<Trait>(line);
3956 columns.second.insert(columns.second.begin(), row.virginPos(0));
3957 columns.second.push_back(row.virginPos(row.length() - 1));
3958
3959 std::shared_ptr<TableRow<Trait>> tr(new TableRow<Trait>);
3960 tr->setStartColumn(row.virginPos(0));
3961 tr->setStartLine(lineData.second.m_lineNumber);
3962 tr->setEndColumn(row.virginPos(row.length() - 1));
3963 tr->setEndLine(lineData.second.m_lineNumber);
3964
3965 int col = 0;
3966
3967 for (auto it = columns.first.begin(), last = columns.first.end(); it != last; ++it, ++col) {
3968 if (col == columnsCount) {
3969 break;
3970 }
3971
3972 std::shared_ptr<TableCell<Trait>> c(new TableCell<Trait>);
3973 c->setStartColumn(columns.second.at(col));
3974 c->setStartLine(lineData.second.m_lineNumber);
3975 c->setEndColumn(columns.second.at(col + 1));
3976 c->setEndLine(lineData.second.m_lineNumber);
3977
3978 if (!it->isEmpty()) {
3979 it->replace(Trait::latin1ToString("&#124;"), Trait::latin1ToChar(sep));
3980
3981 typename MdBlock<Trait>::Data fragment;
3982 fragment.push_back({*it, lineData.second});
3983 MdBlock<Trait> block = {fragment, 0};
3984
3985 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
3986
3988
3989 parseFormattedTextLinksImages(block, p, doc, linksToParse, workingPath, fileName,
3990 collectRefLinks, false, html, false);
3991
3992 if (!p->isEmpty()) {
3993 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it ) {
3994 switch ((*it)->type()) {
3995 case ItemType::Paragraph: {
3996 const auto pp = std::static_pointer_cast<Paragraph<Trait>>(*it);
3997
3998 for (auto it = pp->items().cbegin(), last = pp->items().cend(); it != last; ++it) {
3999 c->appendItem((*it));
4000 }
4001 }
4002 break;
4003
4004 default:
4005 c->appendItem((*it));
4006 break;
4007 }
4008 }
4009 }
4010
4011 if (html.m_html.get()) {
4012 c->appendItem(html.m_html);
4013 }
4014 }
4015
4016 tr->appendCell(c);
4017 }
4018
4019 if (!tr->isEmpty())
4020 table->appendRow(tr);
4021
4022 return true;
4023 };
4024
4025 {
4026 auto fmt = fr.m_data.at(1).first;
4027
4028 auto columns = fmt.split(typename Trait::InternalString(Trait::latin1ToChar(sep)));
4029
4030 for (auto it = columns.begin(), last = columns.end(); it != last; ++it) {
4031 *it = it->simplified();
4032
4033 if (!it->isEmpty()) {
4035
4036 if (it->asString().endsWith(Trait::latin1ToString(":")) &&
4037 it->asString().startsWith(Trait::latin1ToString(":"))) {
4039 } else if (it->asString().endsWith(Trait::latin1ToString(":"))) {
4041 }
4042
4043 table->setColumnAlignment(table->columnsCount(), a);
4044 }
4045 }
4046 }
4047
4048 fr.m_data.erase(fr.m_data.cbegin() + 1);
4049
4050 long long int r = 0;
4051
4052 for (const auto &line : std::as_const(fr.m_data)) {
4053 if (!parseTableRow(line)) {
4054 break;
4055 }
4056
4057 ++r;
4058 }
4059
4060 fr.m_data.erase(fr.m_data.cbegin(), fr.m_data.cbegin() + r);
4061
4062 if (!table->isEmpty() && !collectRefLinks) {
4063 parent->appendItem(table);
4064 }
4065 }
4066}
4067
4068//! \return Is the given string a heading's service sequence?
4069template<class Trait>
4070inline bool
4071isH(const typename Trait::String &s,
4072 const typename Trait::Char &c)
4073{
4074 long long int p = skipSpaces<Trait>(0, s);
4075
4076 if (p > 3) {
4077 return false;
4078 }
4079
4080 const auto start = p;
4081
4082 for (; p < s.size(); ++p) {
4083 if (s[p] != c) {
4084 break;
4085 }
4086 }
4087
4088 if (p - start < 1) {
4089 return false;
4090 }
4091
4092 for (; p < s.size(); ++p) {
4093 if (!s[p].isSpace()) {
4094 return false;
4095 }
4096 }
4097
4098 return true;
4099}
4100
4101//! \return Is the given string a heading's service sequence of level 1?
4102template<class Trait>
4103inline bool
4104isH1(const typename Trait::String &s)
4105{
4106 return isH<Trait>(s, Trait::latin1ToChar('='));
4107}
4108
4109//! \return Is the given string a heading's service sequence of level 2?
4110template<class Trait>
4111inline bool
4112isH2(const typename Trait::String &s)
4113{
4114 return isH<Trait>(s, Trait::latin1ToChar('-'));
4115}
4116
4117//! \return Previous position in the block.
4118template<class Trait>
4119inline std::pair<long long int, long long int>
4121 long long int pos,
4122 long long int line)
4123{
4124 if (pos > 0) {
4125 return {pos - 1, line};
4126 }
4127
4128 for (long long int i = 0; i < static_cast<long long int>(fr.m_data.size()); ++i) {
4129 if (fr.m_data.at(i).second.m_lineNumber == line) {
4130 if (i > 0) {
4131 return {fr.m_data.at(i - 1).first.virginPos(fr.m_data.at(i - 1).first.length() - 1),
4132 line - 1};
4133 }
4134 }
4135 }
4136
4137 return {pos, line};
4138}
4139
4140//! \return Next position in the block.
4141template<class Trait>
4142inline std::pair<long long int, long long int>
4144 long long int pos,
4145 long long int line)
4146{
4147 for (long long int i = 0; i < static_cast<long long int>(fr.m_data.size()); ++i) {
4148 if (fr.m_data.at(i).second.m_lineNumber == line) {
4149 if (fr.m_data.at(i).first.virginPos(fr.m_data.at(i).first.length() - 1) >= pos + 1) {
4150 return {pos + 1, line};
4151 } else if (i + 1 < static_cast<long long int>(fr.m_data.size())) {
4152 return {fr.m_data.at(i + 1).first.virginPos(0), fr.m_data.at(i + 1).second.m_lineNumber};
4153 } else {
4154 return {pos, line};
4155 }
4156 }
4157 }
4158
4159 return {pos, line};
4160}
4161
4162template<class Trait>
4163inline void
4164Parser<Trait>::parseParagraph(MdBlock<Trait> &fr,
4165 std::shared_ptr<Block<Trait>> parent,
4166 std::shared_ptr<Document<Trait>> doc,
4167 typename Trait::StringList &linksToParse,
4168 const typename Trait::String &workingPath,
4169 const typename Trait::String &fileName,
4170 bool collectRefLinks,
4171 RawHtmlBlock<Trait> &html)
4172{
4173 parseFormattedTextLinksImages(fr, parent, doc, linksToParse, workingPath, fileName,
4174 collectRefLinks, false, html, false);
4175}
4176
4177template<class Trait>
4179 static bool
4180 isFreeTag(std::shared_ptr<RawHtml<Trait>> html)
4181 {
4182 return html->isFreeTag();
4183 }
4184
4185 static void
4186 setFreeTag(std::shared_ptr<RawHtml<Trait>> html, bool on)
4187 {
4188 html->setFreeTag(on);
4189 }
4190};
4191
4192template<class Trait>
4193inline typename Parser<Trait>::Delims
4195{
4196 Delims d;
4197
4198 for (long long int line = 0; line < (long long int)fr.size(); ++line) {
4199 const typename Trait::String &str = fr.at(line).first.asString();
4200 const auto p = skipSpaces<Trait>(0, str);
4201 const auto withoutSpaces = str.sliced(p);
4202
4203 if (isHorizontalLine<Trait>(withoutSpaces) && p < 4) {
4204 d.push_back({Delimiter::HorizontalLine, line, 0, str.length(), false, false, false});
4205 } else if (isH1<Trait>(withoutSpaces) && p < 4) {
4206 d.push_back({Delimiter::H1, line, 0, str.length(), false, false, false});
4207 } else if (isH2<Trait>(withoutSpaces) && p < 4) {
4208 d.push_back({Delimiter::H2, line, 0, str.length(), false, false, false});
4209 } else {
4210 bool backslash = false;
4211 bool word = false;
4212
4213 for (long long int i = p; i < str.size(); ++i) {
4214 bool now = false;
4215
4216 if (str[i] == Trait::latin1ToChar('\\') && !backslash) {
4217 backslash = true;
4218 now = true;
4219 }
4220 // * or _
4221 else if ((str[i] == Trait::latin1ToChar('_') || str[i] == Trait::latin1ToChar('*')) && !backslash) {
4222 typename Trait::String style;
4223
4224 const bool punctBefore = (i > 0 ? str[i - 1].isPunct() || str[i - 1].isSymbol() : true);
4225 const bool uWhitespaceBefore = (i > 0 ? Trait::isUnicodeWhitespace(str[i - 1]) : true);
4226 const bool uWhitespaceOrPunctBefore = uWhitespaceBefore || punctBefore;
4227 const bool alNumBefore = (i > 0 ? str[i - 1].isLetterOrNumber() : false);
4228
4229 const auto ch = str[i];
4230
4231 while (i < str.length() && str[i] == ch) {
4232 style.push_back(str[i]);
4233 ++i;
4234 }
4235
4236 typename Delimiter::DelimiterType dt = Delimiter::Unknown;
4237
4238 if (ch == Trait::latin1ToChar('*')) {
4239 dt = Delimiter::Emphasis1;
4240 } else {
4241 dt = Delimiter::Emphasis2;
4242 }
4243
4244 const bool punctAfter = (i < str.length() ? str[i].isPunct() || str[i].isSymbol() : true);
4245 const bool uWhitespaceAfter = (i < str.length() ? Trait::isUnicodeWhitespace(str[i]) : true);
4246 const bool alNumAfter = (i < str.length() ? str[i].isLetterOrNumber() : false);
4247 const bool leftFlanking = !uWhitespaceAfter && (!punctAfter || (punctAfter && uWhitespaceOrPunctBefore))
4248 && !(ch == Trait::latin1ToChar('_') && alNumBefore && alNumAfter);
4249 const bool rightFlanking = !uWhitespaceBefore && (!punctBefore || (punctBefore && (uWhitespaceAfter || punctAfter)))
4250 && !(ch == Trait::latin1ToChar('_') && alNumBefore && alNumAfter);
4251
4252 if (leftFlanking || rightFlanking) {
4253 for (auto j = 0; j < style.length(); ++j) {
4254 d.push_back({dt, line, i - style.length() + j, 1,
4255 word, false, leftFlanking, rightFlanking});
4256 }
4257
4258 word = false;
4259 } else {
4260 word = true;
4261 }
4262
4263 --i;
4264 }
4265 // ~
4266 else if (str[i] == Trait::latin1ToChar('~') && !backslash) {
4267 typename Trait::String style;
4268
4269 const bool punctBefore = (i > 0 ? str[i - 1].isPunct() || str[i - 1].isSymbol() : true);
4270 const bool uWhitespaceBefore = (i > 0 ? Trait::isUnicodeWhitespace(str[i - 1]) : true);
4271 const bool uWhitespaceOrPunctBefore = uWhitespaceBefore || punctBefore;
4272
4273 while (i < str.length() && str[i] == Trait::latin1ToChar('~')) {
4274 style.push_back(str[i]);
4275 ++i;
4276 }
4277
4278 if (style.length() <= 2) {
4279 const bool punctAfter = (i < str.length() ? str[i].isPunct() || str[i].isSymbol() : true);
4280 const bool uWhitespaceAfter = (i < str.length() ? Trait::isUnicodeWhitespace(str[i]) : true);
4281 const bool leftFlanking = !uWhitespaceAfter && (!punctAfter || (punctAfter && uWhitespaceOrPunctBefore));
4282 const bool rightFlanking = !uWhitespaceBefore && (!punctBefore || (punctBefore && (uWhitespaceAfter || punctAfter)));
4283
4284 if (leftFlanking || rightFlanking) {
4285 d.push_back({Delimiter::Strikethrough,
4286 line,
4287 i - style.length(),
4288 style.length(),
4289 word,
4290 false,
4291 leftFlanking,
4292 rightFlanking});
4293
4294 word = false;
4295 } else {
4296 word = true;
4297 }
4298 } else {
4299 word = true;
4300 }
4301
4302 --i;
4303 }
4304 // [
4305 else if (str[i] == Trait::latin1ToChar('[') && !backslash) {
4306 d.push_back({Delimiter::SquareBracketsOpen, line, i, 1, word, false});
4307
4308 word = false;
4309 }
4310 // !
4311 else if (str[i] == Trait::latin1ToChar('!') && !backslash) {
4312 if (i + 1 < str.length()) {
4313 if (str[i + 1] == Trait::latin1ToChar('[')) {
4314 d.push_back({Delimiter::ImageOpen, line, i, 2, word, false});
4315
4316 ++i;
4317
4318 word = false;
4319 } else {
4320 word = true;
4321 }
4322 } else {
4323 word = true;
4324 }
4325 }
4326 // (
4327 else if (str[i] == Trait::latin1ToChar('(') && !backslash) {
4328 d.push_back({Delimiter::ParenthesesOpen, line, i, 1, word, false});
4329
4330 word = false;
4331 }
4332 // ]
4333 else if (str[i] == Trait::latin1ToChar(']') && !backslash) {
4334 d.push_back({Delimiter::SquareBracketsClose, line, i, 1, word, false});
4335
4336 word = false;
4337 }
4338 // )
4339 else if (str[i] == Trait::latin1ToChar(')') && !backslash) {
4340 d.push_back({Delimiter::ParenthesesClose, line, i, 1, word, false});
4341
4342 word = false;
4343 }
4344 // <
4345 else if (str[i] == Trait::latin1ToChar('<') && !backslash) {
4346 d.push_back({Delimiter::Less, line, i, 1, word, false});
4347
4348 word = false;
4349 }
4350 // >
4351 else if (str[i] == Trait::latin1ToChar('>') && !backslash) {
4352 d.push_back({Delimiter::Greater, line, i, 1, word, false});
4353
4354 word = false;
4355 }
4356 // `
4357 else if (str[i] == Trait::latin1ToChar('`')) {
4358 typename Trait::String code;
4359
4360 while (i < str.length() && str[i] == Trait::latin1ToChar('`')) {
4361 code.push_back(str[i]);
4362 ++i;
4363 }
4364
4365 d.push_back({Delimiter::InlineCode,
4366 line,
4367 i - code.length() - (backslash ? 1 : 0),
4368 code.length() + (backslash ? 1 : 0),
4369 word,
4370 backslash});
4371
4372 word = false;
4373
4374 --i;
4375 }
4376 // $
4377 else if (str[i] == Trait::latin1ToChar('$')) {
4378 typename Trait::String m;
4379
4380 while (i < str.length() && str[i] == Trait::latin1ToChar('$')) {
4381 m.push_back(str[i]);
4382 ++i;
4383 }
4384
4385 if (m.length() <= 2 && !backslash) {
4386 d.push_back({Delimiter::Math, line, i - m.length(), m.length(),
4387 false, false, false, false});
4388 }
4389
4390 word = false;
4391
4392 --i;
4393 } else {
4394 word = true;
4395 }
4396
4397 if (!now) {
4398 backslash = false;
4399 }
4400 }
4401 }
4402 }
4403
4404 return d;
4405}
4406
4407//! \return Is the given string a line break.
4408template<class Trait>
4409inline bool
4410isLineBreak(const typename Trait::String &s)
4411{
4412 long long int count = 0, pos = s.length() - 1, end = s.length() - 1;
4413
4414 while ((pos = Trait::lastIndexOf(s, Trait::latin1ToString("\\"), pos)) != -1 && pos == end) {
4415 --end;
4416 --pos;
4417 ++count;
4418 }
4419
4420 return (s.endsWith(Trait::latin1ToString(" ")) || (count % 2 != 0));
4421}
4422
4423//! \return Length of line break.
4424template<class Trait>
4425inline long long int
4426lineBreakLength(const typename Trait::String &s)
4427{
4428 return (s.endsWith(Trait::latin1ToString(" ")) ? 2 : 1);
4429}
4430
4431//! Remove line break from the end of string.
4432template<class Trait>
4433inline typename Trait::String
4434removeLineBreak(const typename Trait::String &s)
4435{
4436 if (s.endsWith(Trait::latin1ToString("\\"))) {
4437 return s.sliced(0, s.size() - 1);
4438 } else {
4439 return s;
4440 }
4441}
4442
4443//! Initialize item with style information and set it as last item.
4444template<class Trait>
4445inline void
4447 std::shared_ptr<ItemWithOpts<Trait>> item)
4448{
4449 item->openStyles() = po.m_openStyles;
4450 po.m_openStyles.clear();
4451 po.m_lastItemWithStyle = item;
4452}
4453
4454//! Make text item.
4455template<class Trait>
4456inline void
4457makeTextObject(const typename Trait::String &text,
4459 long long int startPos,
4460 long long int startLine,
4461 long long int endPos,
4462 long long int endLine,
4463 bool doRemoveSpacesAtEnd = false)
4464{
4465 if (endPos < 0 && endLine - 1 >= 0) {
4466 endPos = po.m_fr.m_data.at(endLine - 1).first.length() - 1;
4467 --endLine;
4468 }
4469
4470 if (endPos == po.m_fr.m_data.at(endLine).first.length() - 1) {
4471 doRemoveSpacesAtEnd = true;
4472 }
4473
4475
4476 if (doRemoveSpacesAtEnd) {
4478 }
4479
4480 if (startPos == 0) {
4481 if (s.length()) {
4482 const auto p = skipSpaces<Trait>(0, s);
4483
4484 if (p > 0) {
4485 s.remove(0, p);
4486 }
4487 }
4488 }
4489
4490 if (!s.isEmpty()) {
4491 po.m_rawTextData.push_back({text, startPos, startLine});
4492
4493 std::shared_ptr<Text<Trait>> t(new Text<Trait>);
4494 t->setText(s);
4495 t->setOpts(po.m_opts);
4496 t->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
4497 t->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
4498 t->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(endPos, true));
4499 t->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4500
4502
4503 po.m_parent->setEndColumn(t->endColumn());
4504 po.m_parent->setEndLine(t->endLine());
4505
4506 po.m_wasRefLink = false;
4507 po.m_firstInParagraph = false;
4508 po.m_parent->appendItem(t);
4509
4510 po.m_lastText = t;
4511 } else {
4512 po.m_pos = startPos;
4513 }
4514}
4515
4516//! Make text item with line break.
4517template<class Trait>
4518inline void
4519makeTextObjectWithLineBreak(const typename Trait::String &text,
4521 long long int startPos,
4522 long long int startLine,
4523 long long int endPos,
4524 long long int endLine)
4525{
4526 makeTextObject(text, po, startPos, startLine, endPos, endLine, true);
4527
4528 std::shared_ptr<LineBreak<Trait>> hr(new LineBreak<Trait>);
4529 hr->setText(po.m_fr.m_data.at(endLine).first.asString().sliced(endPos + 1));
4530 hr->setStartColumn(po.m_fr.m_data.at(endLine).first.virginPos(endPos + 1));
4531 hr->setStartLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4532 hr->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(po.m_fr.m_data.at(endLine).first.length() - 1));
4533 hr->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4534 po.m_parent->setEndColumn(hr->endColumn());
4535 po.m_parent->setEndLine(hr->endLine());
4536 po.m_wasRefLink = false;
4537 po.m_firstInParagraph = false;
4538 po.m_parent->appendItem(hr);
4539}
4540
4541//! Check for table in paragraph.
4542template<class Trait>
4543inline void
4545 long long int lastLine)
4546{
4547 if (!po.m_opts) {
4548 long long int i = po.m_pos > 0 ? po.m_line + 1 : po.m_line;
4549
4550 for (; i <= lastLine; ++i) {
4551 const auto h = isTableHeader<Trait>(po.m_fr.m_data[i].first.asString());
4552 const auto c = i + 1 < static_cast<long long int>(po.m_fr.m_data.size()) ?
4553 isTableAlignment<Trait>(po.m_fr.m_data[i + 1].first.asString()) : 0;
4554
4555 if (h && c && c == h) {
4557 po.m_startTableLine = i;
4558 po.m_columnsCount = c;
4559 po.m_lastTextLine = i - 1;
4560 po.m_lastTextPos = po.m_fr.m_data[po.m_lastTextLine].first.length();
4561
4562 return;
4563 }
4564 }
4565 }
4566
4567 po.m_lastTextLine = po.m_fr.m_data.size() - 1;
4568 po.m_lastTextPos = po.m_fr.m_data.back().first.length();
4569}
4570
4571//! Make text item.
4572template<class Trait>
4573inline void
4575 // Inclusive. Don't pass lastLine > actual line position with 0 lastPos. Pass as is,
4576 // i.e. if line length is 18 and you need whole line then pass lastLine = index of line,
4577 // and lastPos = 18, or you may crash here if you will pass lastLine = index of line + 1
4578 // and lastPos = 0...
4579 long long int lastLine,
4580 // Not inclusive
4581 long long int lastPos,
4583{
4584 if (po.m_line > lastLine) {
4585 return;
4586 } else if (po.m_line == lastLine && po.m_pos >= lastPos) {
4587 return;
4588 }
4589
4590 typename Trait::String text;
4591
4592 const auto isLastChar = po.m_pos >= po.m_fr.m_data.at(po.m_line).first.length();
4593 long long int startPos = (isLastChar ? 0 : po.m_pos);
4594 long long int startLine = (isLastChar ? po.m_line + 1 : po.m_line);
4595
4596 bool lineBreak =
4597 (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4598 (po.m_line == lastLine ? (lastPos == po.m_fr.m_data.at(po.m_line).first.length() &&
4599 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString())) :
4600 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString())));
4601
4602 // makeTOWLB
4603 auto makeTOWLB = [&]() {
4604 if (po.m_line != (long long int)(po.m_fr.m_data.size() - 1)) {
4605 const auto &line = po.m_fr.m_data.at(po.m_line).first.asString();
4606
4607 makeTextObjectWithLineBreak(text, po, startPos, startLine,
4608 line.length() - lineBreakLength<Trait>(line) - 1, po.m_line);
4609
4610 startPos = 0;
4611 startLine = po.m_line + 1;
4612
4613 text.clear();
4614 }
4615 }; // makeTOWLB
4616
4617 if (lineBreak) {
4618 text.push_back(removeLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString(po.m_pos)));
4619
4620 makeTOWLB();
4621 } else {
4622 const auto length = (po.m_line == lastLine ?
4623 lastPos - po.m_pos : po.m_fr.m_data.at(po.m_line).first.length() - po.m_pos);
4624 const auto s = po.m_fr.m_data.at(po.m_line).first.virginSubString(po.m_pos, length);
4625 text.push_back(s);
4626
4627 po.m_pos = (po.m_line == lastLine ? lastPos : po.m_fr.m_data.at(po.m_line).first.length());
4628
4629 makeTextObject(text,
4630 po,
4631 startPos,
4632 startLine,
4633 po.m_line == lastLine ? lastPos - 1 : po.m_fr.m_data.at(po.m_line).first.length() - 1,
4634 po.m_line);
4635
4636 text.clear();
4637 }
4638
4639 if (po.m_line != lastLine) {
4640 ++po.m_line;
4641
4642 for (; po.m_line < lastLine; ++po.m_line) {
4643 startPos = 0;
4644 startLine = po.m_line;
4645
4646 lineBreak = (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4647 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.asString()));
4648
4649 const auto s = (lineBreak ? removeLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString()) :
4650 po.m_fr.m_data.at(po.m_line).first.virginSubString());
4651 text.push_back(s);
4652
4653 if (lineBreak) {
4654 makeTOWLB();
4655 } else {
4656 makeTextObject(text, po, 0, po.m_line,
4657 po.m_fr.m_data.at(po.m_line).first.length() - 1, po.m_line);
4658 }
4659
4660 text.clear();
4661 }
4662
4663 lineBreak = (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4664 lastPos == po.m_fr.m_data.at(po.m_line).first.length() &&
4665 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.asString()));
4666
4667 auto s = po.m_fr.m_data.at(po.m_line).first.virginSubString(0, lastPos);
4668
4669 po.m_pos = lastPos;
4670
4671 if (!lineBreak) {
4672 text.push_back(s);
4673
4674 makeTextObject(text, po, 0, lastLine, lastPos - 1, lastLine);
4675 } else {
4677 text.push_back(s);
4678
4679 makeTOWLB();
4680 }
4681 }
4682}
4683
4684//! Skip spaces.
4685template<class Trait>
4686inline void
4687skipSpacesInHtml(long long int &l,
4688 long long int &p,
4689 const typename MdBlock<Trait>::Data &fr)
4690{
4691 while (l < (long long int)fr.size()) {
4692 p = skipSpaces<Trait>(p, fr[l].first.asString());
4693
4694 if (p < fr[l].first.length()) {
4695 return;
4696 }
4697
4698 p = 0;
4699 ++l;
4700 }
4701}
4702
4703//! Read HTML attribute value.
4704template<class Trait>
4705inline std::pair<bool, bool>
4707 long long int &p,
4708 const typename MdBlock<Trait>::Data &fr)
4709{
4710 static const typename Trait::String notAllowed = Trait::latin1ToString("\"`=<'");
4711
4712 const auto start = p;
4713
4714 for (; p < fr[l].first.length(); ++p) {
4715 if (fr[l].first[p].isSpace()) {
4716 break;
4717 } else if (notAllowed.contains(fr[l].first[p])) {
4718 return {false, false};
4719 } else if (fr[l].first[p] == Trait::latin1ToChar('>')) {
4720 return {p - start > 0, p - start > 0};
4721 }
4722 }
4723
4724 return {p - start > 0, p - start > 0};
4725}
4726
4727//! Read HTML attribute value.
4728template<class Trait>
4729inline std::pair<bool, bool>
4730readHtmlAttrValue(long long int &l,
4731 long long int &p,
4732 const typename MdBlock<Trait>::Data &fr)
4733{
4734 if (p < fr[l].first.length() && fr[l].first[p] != Trait::latin1ToChar('"') &&
4735 fr[l].first[p] != Trait::latin1ToChar('\'')) {
4736 return readUnquotedHtmlAttrValue<Trait>(l, p, fr);
4737 }
4738
4739 const auto s = fr[l].first[p];
4740
4741 ++p;
4742
4743 if (p >= fr[l].first.length()) {
4744 return {false, false};
4745 }
4746
4747 for (; l < (long long int)fr.size(); ++l) {
4748 bool doBreak = false;
4749
4750 for (; p < fr[l].first.length(); ++p) {
4751 const auto ch = fr[l].first[p];
4752
4753 if (ch == s) {
4754 doBreak = true;
4755
4756 break;
4757 }
4758 }
4759
4760 if (doBreak) {
4761 break;
4762 }
4763
4764 p = 0;
4765 }
4766
4767 if (l >= (long long int)fr.size()) {
4768 return {false, false};
4769 }
4770
4771 if (p >= fr[l].first.length()) {
4772 return {false, false};
4773 }
4774
4775 if (fr[l].first[p] != s) {
4776 return {false, false};
4777 }
4778
4779 ++p;
4780
4781 return {true, true};
4782}
4783
4784//! Read HTML attribute.
4785template<class Trait>
4786inline std::pair<bool, bool>
4787readHtmlAttr(long long int &l,
4788 long long int &p,
4789 const typename MdBlock<Trait>::Data &fr,
4790 bool checkForSpace)
4791{
4792 long long int tl = l, tp = p;
4793
4794 skipSpacesInHtml<Trait>(l, p, fr);
4795
4796 if (l >= (long long int)fr.size()) {
4797 return {false, false};
4798 }
4799
4800 // /
4801 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('/')) {
4802 return {false, true};
4803 }
4804
4805 // >
4806 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('>')) {
4807 return {false, true};
4808 }
4809
4810 if (checkForSpace) {
4811 if (tl == l && tp == p) {
4812 return {false, false};
4813 }
4814 }
4815
4816 const auto start = p;
4817
4818 for (; p < fr[l].first.length(); ++p) {
4819 const auto ch = fr[l].first[p];
4820
4821 if (ch.isSpace() || ch == Trait::latin1ToChar('>') || ch == Trait::latin1ToChar('=')) {
4822 break;
4823 }
4824 }
4825
4826 const typename Trait::String name = fr[l].first.asString().sliced(start, p - start).toLower();
4827
4828 if (!name.startsWith(Trait::latin1ToString("_")) && !name.startsWith(Trait::latin1ToString(":")) &&
4829 !name.isEmpty() && !(name[0].unicode() >= 97 && name[0].unicode() <= 122)) {
4830 return {false, false};
4831 }
4832
4833 static const typename Trait::String allowedInName =
4834 Trait::latin1ToString("abcdefghijklmnopqrstuvwxyz0123456789_.:-");
4835
4836 for (long long int i = 1; i < name.length(); ++i) {
4837 if (!allowedInName.contains(name[i])) {
4838 return {false, false};
4839 }
4840 }
4841
4842 // >
4843 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('>')) {
4844 return {false, true};
4845 }
4846
4847 tl = l;
4848 tp = p;
4849
4850 skipSpacesInHtml<Trait>(l, p, fr);
4851
4852 if (l >= (long long int)fr.size()) {
4853 return {false, false};
4854 }
4855
4856 // =
4857 if (p < fr[l].first.length()) {
4858 if (fr[l].first[p] != Trait::latin1ToChar('=')) {
4859 l = tl;
4860 p = tp;
4861
4862 return {true, true};
4863 } else {
4864 ++p;
4865 }
4866 } else {
4867 return {true, false};
4868 }
4869
4870 skipSpacesInHtml<Trait>(l, p, fr);
4871
4872 if (l >= (long long int)fr.size()) {
4873 return {false, false};
4874 }
4875
4876 return readHtmlAttrValue<Trait>(l, p, fr);
4877}
4878
4879//! \return Is HTML tag at the given position?
4880template<class Trait>
4881inline std::tuple<bool, long long int, long long int, bool, typename Trait::String>
4882isHtmlTag(long long int line, long long int pos, TextParsingOpts<Trait> &po, int rule);
4883
4884//! \return Is after the given position only HTML tags?
4885template<class Trait>
4886inline bool
4888 long long int pos,
4890 int rule)
4891{
4892 static const std::set<typename Trait::String> s_rule1Finish = {Trait::latin1ToString("/pre"),
4893 Trait::latin1ToString("/script"),
4894 Trait::latin1ToString("/style"),
4895 Trait::latin1ToString("/textarea")};
4896
4897 auto p = skipSpaces<Trait>(pos, po.m_fr.m_data[line].first.asString());
4898
4899 while (p < po.m_fr.m_data[line].first.length()) {
4900 bool ok = false;
4901
4902 long long int l;
4903 typename Trait::String tag;
4904
4905 std::tie(ok, l, p, std::ignore, tag) = isHtmlTag(line, p, po, rule);
4906
4907 ++p;
4908
4909 if (rule != 1) {
4910 if (!ok) {
4911 return false;
4912 }
4913
4914 if (l > line) {
4915 return true;
4916 }
4917 } else {
4918 if (s_rule1Finish.find(tag.toLower()) != s_rule1Finish.cend() && l == line) {
4919 return true;
4920 }
4921
4922 if (l > line) {
4923 return false;
4924 }
4925 }
4926
4927 p = skipSpaces<Trait>(p, po.m_fr.m_data[line].first.asString());
4928 }
4929
4930 if (p >= po.m_fr.m_data[line].first.length()) {
4931 return true;
4932 }
4933
4934 return false;
4935}
4936
4937//! \return Is setext heading in the lines?
4938template<class Trait>
4939inline bool
4941 long long int startLine,
4942 long long int endLine)
4943{
4944 for (; startLine <= endLine; ++startLine) {
4945 const auto pos = skipSpaces<Trait>(0, po.m_fr.m_data.at(startLine).first.asString());
4946 const auto line = po.m_fr.m_data.at(startLine).first.asString().sliced(pos);
4947
4948 if ((isH1<Trait>(line) || isH2<Trait>(line)) && pos < 4) {
4949 return true;
4950 }
4951 }
4952
4953 return false;
4954}
4955
4956//! \return Is HTML tag at the given position?
4957template<class Trait>
4958inline std::tuple<bool, long long int, long long int, bool, typename Trait::String>
4959isHtmlTag(long long int line,
4960 long long int pos,
4962 int rule)
4963{
4964 if (po.m_fr.m_data[line].first[pos] != Trait::latin1ToChar('<')) {
4965 return {false, line, pos, false, {}};
4966 }
4967
4968 typename Trait::String tag;
4969
4970 long long int l = line;
4971 long long int p = pos + 1;
4972 bool first = false;
4973
4974 {
4975 const auto tmp = skipSpaces<Trait>(0, po.m_fr.m_data[l].first.asString());
4976 first = (tmp == pos);
4977 }
4978
4979 if (p >= po.m_fr.m_data[l].first.length()) {
4980 return {false, line, pos, first, tag};
4981 }
4982
4983 bool closing = false;
4984
4985 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
4986 closing = true;
4987
4988 tag.push_back(Trait::latin1ToChar('/'));
4989
4990 ++p;
4991 }
4992
4993 const auto start = p;
4994
4995 // tag
4996 for (; p < po.m_fr.m_data[l].first.length(); ++p) {
4997 const auto ch = po.m_fr.m_data[l].first[p];
4998
4999 if (ch.isSpace() || ch == Trait::latin1ToChar('>') || ch == Trait::latin1ToChar('/')) {
5000 break;
5001 }
5002 }
5003
5004 tag.push_back(po.m_fr.m_data[l].first.asString().sliced(start, p - start));
5005
5006 if (p < po.m_fr.m_data[l].first.length() && po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
5007 if (p + 1 < po.m_fr.m_data[l].first.length() &&
5008 po.m_fr.m_data[l].first[p + 1] == Trait::latin1ToChar('>')) {
5009 long long int tmp = 0;
5010
5011 if (rule == 7) {
5012 tmp = skipSpaces<Trait>(p + 2, po.m_fr.m_data[l].first.asString());
5013 }
5014
5015 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
5016 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 2, po, rule == 1)));
5017
5018 if (!isSetextHeadingBetween(po, line, l)) {
5019 return {true, l, p + 1, onLine, tag};
5020 } else {
5021 return {false, line, pos, first, tag};
5022 }
5023 } else {
5024 return {false, line, pos, first, tag};
5025 }
5026 }
5027
5028 if (p < po.m_fr.m_data[l].first.length() && po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
5029 long long int tmp = 0;
5030
5031 if (rule == 7) {
5032 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
5033 }
5034
5035 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
5036 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
5037
5038 if (!isSetextHeadingBetween(po, line, l)) {
5039 return {true, l, p, onLine, tag};
5040 } else {
5041 return {false, line, pos, first, tag};
5042 }
5043 }
5044
5045 skipSpacesInHtml<Trait>(l, p, po.m_fr.m_data);
5046
5047 if (l >= (long long int)po.m_fr.m_data.size()) {
5048 return {false, line, pos, first, tag};
5049 }
5050
5051 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
5052 long long int tmp = 0;
5053
5054 if (rule == 7) {
5055 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
5056 }
5057
5058 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
5059 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
5060
5061 if (!isSetextHeadingBetween(po, line, l)) {
5062 return {true, l, p, onLine, tag};
5063 } else {
5064 return {false, line, pos, first, tag};
5065 }
5066 }
5067
5068 bool attr = true;
5069 bool firstAttr = true;
5070
5071 while (attr) {
5072 bool ok = false;
5073
5074 std::tie(attr, ok) = readHtmlAttr<Trait>(l, p, po.m_fr.m_data, !firstAttr);
5075
5076 firstAttr = false;
5077
5078 if (closing && attr) {
5079 return {false, line, pos, first, tag};
5080 }
5081
5082 if (!ok) {
5083 return {false, line, pos, first, tag};
5084 }
5085 }
5086
5087 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
5088 ++p;
5089 } else {
5090 skipSpacesInHtml<Trait>(l, p, po.m_fr.m_data);
5091
5092 if (l >= (long long int)po.m_fr.m_data.size()) {
5093 return {false, line, pos, first, tag};
5094 }
5095 }
5096
5097 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
5098 long long int tmp = 0;
5099
5100 if (rule == 7) {
5101 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
5102 }
5103
5104 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
5105 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
5106
5107 if (!isSetextHeadingBetween(po, line, l)) {
5108 return {true, l, p, onLine, tag};
5109 } else {
5110 return {false, line, pos, first, tag};
5111 }
5112 }
5113
5114 return {false, line, pos, first, {}};
5115}
5116
5117//! Read HTML tag.
5118template<class Trait>
5119inline std::pair<typename Trait::String, bool>
5120Parser<Trait>::readHtmlTag(typename Delims::iterator it,
5121 TextParsingOpts<Trait> &po)
5122{
5123 long long int i = it->m_pos + 1;
5124 const auto start = i;
5125
5126 if (start >= po.m_fr.m_data[it->m_line].first.length()) {
5127 return {{}, false};
5128 }
5129
5130 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5131 const auto ch = po.m_fr.m_data[it->m_line].first[i];
5132
5133 if (ch.isSpace() || ch == Trait::latin1ToChar('>')) {
5134 break;
5135 }
5136 }
5137
5138 return {po.m_fr.m_data[it->m_line].first.asString().sliced(start, i - start),
5139 i < po.m_fr.m_data[it->m_line].first.length() ?
5140 po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('>') : false};
5141}
5142
5143template<class Trait>
5144inline typename Parser<Trait>::Delims::iterator
5145Parser<Trait>::findIt(typename Delims::iterator it,
5146 typename Delims::iterator last,
5147 TextParsingOpts<Trait> &po)
5148{
5149 auto ret = it;
5150
5151 for (; it != last; ++it) {
5152 if ((it->m_line == po.m_line && it->m_pos < po.m_pos) || it->m_line < po.m_line) {
5153 ret = it;
5154 } else {
5155 break;
5156 }
5157 }
5158
5159 return ret;
5160}
5161
5162//! Read HTML data.
5163template<class Trait>
5164inline void
5165eatRawHtml(long long int line,
5166 long long int pos,
5167 long long int toLine,
5168 long long int toPos,
5170 bool finish,
5171 int htmlRule,
5172 bool onLine,
5173 bool continueEating = false)
5174{
5175 if (line <= toLine) {
5176 typename Trait::String h = po.m_html.m_html->text();
5177
5178 if (!h.isEmpty() && !continueEating) {
5179 for (long long int i = 0; i < po.m_fr.m_emptyLinesBefore; ++i) {
5180 h.push_back(Trait::latin1ToChar('\n'));
5181 }
5182 }
5183
5184 const auto first = po.m_fr.m_data[line].first.asString().sliced(
5185 pos,
5186 (line == toLine ? (toPos >= 0 ? toPos - pos : po.m_fr.m_data[line].first.length() - pos) :
5187 po.m_fr.m_data[line].first.length() - pos));
5188
5189 if (!h.isEmpty() && !first.isEmpty() && po.m_html.m_html->endLine() != po.m_fr.m_data[line].second.m_lineNumber) {
5190 h.push_back(Trait::latin1ToChar('\n'));
5191 }
5192
5193 if (!first.isEmpty()) {
5194 h.push_back(first);
5195 }
5196
5197 ++line;
5198
5199 for (; line < toLine; ++line) {
5200 h.push_back(Trait::latin1ToChar('\n'));
5201 h.push_back(po.m_fr.m_data[line].first.asString());
5202 }
5203
5204 if (line == toLine && toPos != 0) {
5205 h.push_back(Trait::latin1ToChar('\n'));
5206 h.push_back(po.m_fr.m_data[line].first.asString().sliced(0, toPos > 0 ?
5207 toPos : po.m_fr.m_data[line].first.length()));
5208 }
5209
5210 auto endColumn = toPos;
5211 auto endLine = toLine;
5212
5213 if (endColumn == 0 && endLine > 0) {
5214 --endLine;
5215 endColumn = po.m_fr.m_data.at(endLine).first.length();
5216 }
5217
5218 po.m_html.m_html->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(endColumn >= 0 ?
5219 endColumn - 1 : po.m_fr.m_data.at(endLine).first.length() - 1));
5220 po.m_html.m_html->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
5221
5222 po.m_line = (toPos >= 0 ? toLine : toLine + 1);
5223 po.m_pos = (toPos >= 0 ? toPos : 0);
5224
5225 if (po.m_line + 1 < static_cast<long long int>(po.m_fr.m_data.size()) &&
5226 po.m_pos >= po.m_fr.m_data.at(po.m_line).first.length()) {
5227 ++po.m_line;
5228 po.m_pos = 0;
5229 }
5230
5231 po.m_html.m_html->setText(h);
5232 }
5233
5235
5236 if (finish) {
5237 if (po.m_html.m_onLine || htmlRule == 7 || po.m_line < (long long int)po.m_fr.m_data.size()) {
5238 if (!po.m_collectRefLinks) {
5239 po.m_parent->appendItem(po.m_html.m_html);
5240 po.m_parent->setEndColumn(po.m_html.m_html->endColumn());
5241 po.m_parent->setEndLine(po.m_html.m_html->endLine());
5242 initLastItemWithOpts<Trait>(po, po.m_html.m_html);
5243 po.m_html.m_html->setOpts(po.m_opts);
5244 po.m_lastText = nullptr;
5245 } else {
5246 po.m_tmpHtml = po.m_html.m_html;
5247 }
5248
5249 resetHtmlTag(po.m_html);
5250 }
5251 } else {
5252 po.m_html.m_continueHtml = true;
5253 }
5254}
5255
5256template<class Trait>
5257inline bool
5258Parser<Trait>::isNewBlockIn(MdBlock<Trait> &fr,
5259 long long int startLine,
5260 long long int endLine)
5261{
5262 for (auto i = startLine + 1; i <= endLine; ++i) {
5263 const auto type = whatIsTheLine(fr.m_data[i].first);
5264
5265 switch (type) {
5275 return true;
5276
5277 default:
5278 break;
5279 }
5280
5281 const auto ns = skipSpaces<Trait>(0, fr.m_data[i].first.asString());
5282
5283 if (ns < 4) {
5284 const auto s = fr.m_data[i].first.asString().sliced(ns);
5285
5286 if (isHorizontalLine<Trait>(s) || isH1<Trait>(s) || isH2<Trait>(s)) {
5287 return true;
5288 }
5289 }
5290 }
5291
5292 return false;
5293}
5294
5295template<class Trait>
5296inline void
5297Parser<Trait>::finishRule1HtmlTag(typename Delims::iterator it,
5298 typename Delims::iterator last,
5299 TextParsingOpts<Trait> &po,
5300 bool skipFirst)
5301{
5302 static const std::set<typename Trait::String> s_finish = {Trait::latin1ToString("/pre"),
5303 Trait::latin1ToString("/script"),
5304 Trait::latin1ToString("/style"),
5305 Trait::latin1ToString("/textarea")};
5306
5307 if (it != last) {
5308 bool ok = false;
5309 long long int l = -1, p = -1;
5310
5311 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less && skipFirst) {
5312 std::tie(ok, l, p, po.m_html.m_onLine, std::ignore) =
5313 isHtmlTag(it->m_line, it->m_pos, po, 1);
5314 }
5315
5316 if (po.m_html.m_onLine) {
5317 for (it = (skipFirst && it != last ? std::next(it) : it); it != last; ++it) {
5318 if (it->m_type == Delimiter::Less) {
5319 typename Trait::String tag;
5320 bool closed = false;
5321
5322 std::tie(tag, closed) = readHtmlTag(it, po);
5323
5324 if (closed) {
5325 if (s_finish.find(tag.toLower()) != s_finish.cend()) {
5326 eatRawHtml(po.m_line, po.m_pos, it->m_line, -1, po,
5327 true, 1, po.m_html.m_onLine);
5328
5329 return;
5330 }
5331 }
5332 }
5333 }
5334 } else if (ok && !isNewBlockIn(po.m_fr, it->m_line, l)) {
5335 eatRawHtml(po.m_line, po.m_pos, l, p + 1, po, true, 1, false);
5336
5337 return;
5338 } else {
5339 resetHtmlTag(po.m_html);
5340
5341 return;
5342 }
5343 }
5344
5345 if (po.m_html.m_onLine) {
5346 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 1, po.m_html.m_onLine);
5347 } else {
5348 resetHtmlTag(po.m_html);
5349 }
5350}
5351
5352template<class Trait>
5353inline void
5354Parser<Trait>::finishRule2HtmlTag(typename Delims::iterator it,
5355 typename Delims::iterator last,
5356 TextParsingOpts<Trait> &po)
5357{
5358 if (it != last) {
5359 const auto start = it;
5360
5361 MdLineData::CommentData commentData = {2, true};
5362 bool onLine = po.m_html.m_onLine;
5363
5364 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5365 long long int i = po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos);
5366
5367 commentData = po.m_fr.m_data[it->m_line].second.m_htmlCommentData[i];
5368
5369 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5370 po.m_html.m_onLine = onLine;
5371 }
5372
5373 if (commentData.first != -1 && commentData.second) {
5374 for (; it != last; ++it) {
5375 if (it->m_type == Delimiter::Greater) {
5376 auto p = it->m_pos;
5377
5378 bool doContinue = false;
5379
5380 for (char i = 0; i < commentData.first; ++i) {
5381 if (!(p > 0 && po.m_fr.m_data[it->m_line].first[p - 1] == Trait::latin1ToChar('-'))) {
5382 doContinue = true;
5383
5384 break;
5385 }
5386
5387 --p;
5388 }
5389
5390 if (doContinue) {
5391 continue;
5392 }
5393
5394 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5395 eatRawHtml(po.m_line, po.m_pos, it->m_line,
5396 onLine ? po.m_fr.m_data[it->m_line].first.length() : it->m_pos + 1,
5397 po, true, 2, onLine);
5398 } else {
5399 resetHtmlTag(po.m_html);
5400 }
5401
5402 return;
5403 }
5404 }
5405 }
5406 }
5407
5408 if (po.m_html.m_onLine) {
5409 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 2, po.m_html.m_onLine);
5410 } else {
5411 resetHtmlTag(po.m_html);
5412 }
5413}
5414
5415template<class Trait>
5416inline void
5417Parser<Trait>::finishRule3HtmlTag(typename Delims::iterator it,
5418 typename Delims::iterator last,
5419 TextParsingOpts<Trait> &po)
5420{
5421 bool onLine = po.m_html.m_onLine;
5422
5423 if (it != last) {
5424 const auto start = it;
5425
5426 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5427 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5428 po.m_html.m_onLine = onLine;
5429 }
5430
5431 for (; it != last; ++it) {
5432 if (it->m_type == Delimiter::Greater) {
5433 if (it->m_pos > 0 && po.m_fr.m_data[it->m_line].first[it->m_pos - 1] == Trait::latin1ToChar('?')) {
5434 long long int i = it->m_pos + 1;
5435
5436 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5437 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5438 break;
5439 }
5440 }
5441
5442 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5443 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 3, onLine);
5444 } else {
5445 resetHtmlTag(po.m_html);
5446 }
5447
5448 return;
5449 }
5450 }
5451 }
5452 }
5453
5454 if (po.m_html.m_onLine) {
5455 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 3, onLine);
5456 } else {
5457 resetHtmlTag(po.m_html);
5458 }
5459}
5460
5461template<class Trait>
5462inline void
5463Parser<Trait>::finishRule4HtmlTag(typename Delims::iterator it,
5464 typename Delims::iterator last,
5465 TextParsingOpts<Trait> &po)
5466{
5467 if (it != last) {
5468 const auto start = it;
5469
5470 bool onLine = po.m_html.m_onLine;
5471
5472 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5473 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5474 po.m_html.m_onLine = onLine;
5475 }
5476
5477 for (; it != last; ++it) {
5478 if (it->m_type == Delimiter::Greater) {
5479 long long int i = it->m_pos + 1;
5480
5481 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5482 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5483 break;
5484 }
5485 }
5486
5487 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5488 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 4, onLine);
5489 } else {
5490 resetHtmlTag(po.m_html);
5491 }
5492
5493 return;
5494 }
5495 }
5496 }
5497
5498 if (po.m_html.m_onLine) {
5499 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 4, true);
5500 } else {
5501 resetHtmlTag(po.m_html);
5502 }
5503}
5504
5505template<class Trait>
5506inline void
5507Parser<Trait>::finishRule5HtmlTag(typename Delims::iterator it,
5508 typename Delims::iterator last,
5509 TextParsingOpts<Trait> &po)
5510{
5511 if (it != last) {
5512 const auto start = it;
5513
5514 bool onLine = po.m_html.m_onLine;
5515
5516 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5517 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5518 po.m_html.m_onLine = onLine;
5519 }
5520
5521 for (; it != last; ++it) {
5522 if (it->m_type == Delimiter::Greater) {
5523 if (it->m_pos > 1 && po.m_fr.m_data[it->m_line].first[it->m_pos - 1] == Trait::latin1ToChar(']') &&
5524 po.m_fr.m_data[it->m_line].first[it->m_pos - 2] == Trait::latin1ToChar(']')) {
5525 long long int i = it->m_pos + 1;
5526
5527 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5528 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5529 break;
5530 }
5531 }
5532
5533 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5534 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 5, onLine);
5535 } else {
5536 resetHtmlTag(po.m_html);
5537 }
5538
5539 return;
5540 }
5541 }
5542 }
5543 }
5544
5545 if (po.m_html.m_onLine) {
5546 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 5, true);
5547 } else {
5548 resetHtmlTag(po.m_html);
5549 }
5550}
5551
5552template<class Trait>
5553inline void
5554Parser<Trait>::finishRule6HtmlTag(typename Delims::iterator it,
5555 typename Delims::iterator last,
5556 TextParsingOpts<Trait> &po)
5557{
5558 po.m_html.m_onLine = (it != last ?
5559 it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()) : true);
5560
5561 if (po.m_html.m_onLine) {
5562 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po,
5563 false, 6, po.m_html.m_onLine);
5564 } else {
5565 const auto nit = std::find_if(std::next(it), last, [](const auto &d) {
5566 return (d.m_type == Delimiter::Greater);
5567 });
5568
5569 if (nit != last && !isNewBlockIn(po.m_fr, it->m_line, nit->m_line)) {
5570 eatRawHtml(po.m_line, po.m_pos, nit->m_line, nit->m_pos + nit->m_len, po,
5571 true, 6, false);
5572 }
5573 }
5574
5575 if (po.m_fr.m_emptyLineAfter && po.m_html.m_html) {
5576 po.m_html.m_continueHtml = false;
5577 }
5578}
5579
5580template<class Trait>
5581inline typename Parser<Trait>::Delims::iterator
5582Parser<Trait>::finishRawHtmlTag(typename Delims::iterator it,
5583 typename Delims::iterator last,
5584 TextParsingOpts<Trait> &po,
5585 bool skipFirst)
5586{
5587 po.m_detected = TextParsingOpts<Trait>::Detected::HTML;
5588
5589 switch (po.m_html.m_htmlBlockType) {
5590 case 1:
5591 finishRule1HtmlTag(it, last, po, skipFirst);
5592 break;
5593
5594 case 2:
5595 finishRule2HtmlTag(it, last, po);
5596 break;
5597
5598 case 3:
5599 finishRule3HtmlTag(it, last, po);
5600 break;
5601
5602 case 4:
5603 finishRule4HtmlTag(it, last, po);
5604 break;
5605
5606 case 5:
5607 finishRule5HtmlTag(it, last, po);
5608 break;
5609
5610 case 6:
5611 finishRule6HtmlTag(it, last, po);
5612 break;
5613
5614 case 7:
5615 return finishRule7HtmlTag(it, last, po);
5616
5617 default:
5618 po.m_detected = TextParsingOpts<Trait>::Detected::Nothing;
5619 break;
5620 }
5621
5622 return findIt(it, last, po);
5623}
5624
5625template<class Trait>
5626inline int
5627Parser<Trait>::htmlTagRule(typename Delims::iterator it,
5628 typename Delims::iterator last,
5629 TextParsingOpts<Trait> &po)
5630{
5631 MD_UNUSED(last)
5632
5633 typename Trait::String tag;
5634
5635 std::tie(tag, std::ignore) = readHtmlTag(it, po);
5636
5637 if (tag.startsWith(Trait::latin1ToString("![CDATA["))) {
5638 return 5;
5639 }
5640
5641 tag = tag.toLower();
5642
5643 static const typename Trait::String s_validHtmlTagLetters =
5644 Trait::latin1ToString("abcdefghijklmnopqrstuvwxyz0123456789-");
5645
5646 bool closing = false;
5647
5648 if (tag.startsWith(Trait::latin1ToString("/"))) {
5649 tag.remove(0, 1);
5650 closing = true;
5651 }
5652
5653 if (tag.endsWith(Trait::latin1ToString("/"))) {
5654 tag.remove(tag.size() - 1, 1);
5655 }
5656
5657 if (tag.isEmpty()) {
5658 return -1;
5659 }
5660
5661 if (!tag.startsWith(Trait::latin1ToString("!")) &&
5662 !tag.startsWith(Trait::latin1ToString("?")) &&
5663 !(tag[0].unicode() >= 97 && tag[0].unicode() <= 122)) {
5664 return -1;
5665 }
5666
5667 static const std::set<typename Trait::String> s_rule1 = {Trait::latin1ToString("pre"),
5668 Trait::latin1ToString("script"),
5669 Trait::latin1ToString("style"),
5670 Trait::latin1ToString("textarea")};
5671
5672 if (!closing && s_rule1.find(tag) != s_rule1.cend()) {
5673 return 1;
5674 } else if (tag.startsWith(Trait::latin1ToString("!--"))) {
5675 return 2;
5676 } else if (tag.startsWith(Trait::latin1ToString("?"))) {
5677 return 3;
5678 } else if (tag.startsWith(Trait::latin1ToString("!")) && tag.size() > 1 &&
5679 ((tag[1].unicode() >= 65 && tag[1].unicode() <= 90) ||
5680 (tag[1].unicode() >= 97 && tag[1].unicode() <= 122))) {
5681 return 4;
5682 } else {
5683 static const std::set<typename Trait::String> s_rule6 = {
5684 Trait::latin1ToString("address"), Trait::latin1ToString("article"), Trait::latin1ToString("aside"), Trait::latin1ToString("base"),
5685 Trait::latin1ToString("basefont"), Trait::latin1ToString("blockquote"), Trait::latin1ToString("body"), Trait::latin1ToString("caption"),
5686 Trait::latin1ToString("center"), Trait::latin1ToString("col"), Trait::latin1ToString("colgroup"), Trait::latin1ToString("dd"),
5687 Trait::latin1ToString("details"), Trait::latin1ToString("dialog"), Trait::latin1ToString("dir"), Trait::latin1ToString("div"),
5688 Trait::latin1ToString("dl"), Trait::latin1ToString("dt"), Trait::latin1ToString("fieldset"), Trait::latin1ToString("figcaption"),
5689 Trait::latin1ToString("figure"), Trait::latin1ToString("footer"), Trait::latin1ToString("form"), Trait::latin1ToString("frame"),
5690 Trait::latin1ToString("frameset"), Trait::latin1ToString("h1"), Trait::latin1ToString("h2"), Trait::latin1ToString("h3"),
5691 Trait::latin1ToString("h4"), Trait::latin1ToString("h5"), Trait::latin1ToString("h6"), Trait::latin1ToString("head"),
5692 Trait::latin1ToString("header"), Trait::latin1ToString("hr"), Trait::latin1ToString("html"), Trait::latin1ToString("iframe"),
5693 Trait::latin1ToString("legend"), Trait::latin1ToString("li"), Trait::latin1ToString("link"), Trait::latin1ToString("main"),
5694 Trait::latin1ToString("menu"), Trait::latin1ToString("menuitem"), Trait::latin1ToString("nav"), Trait::latin1ToString("noframes"),
5695 Trait::latin1ToString("ol"), Trait::latin1ToString("optgroup"), Trait::latin1ToString("option"), Trait::latin1ToString("p"),
5696 Trait::latin1ToString("param"), Trait::latin1ToString("section"), Trait::latin1ToString("search"), Trait::latin1ToString("summary"),
5697 Trait::latin1ToString("table"), Trait::latin1ToString("tbody"), Trait::latin1ToString("td"), Trait::latin1ToString("tfoot"),
5698 Trait::latin1ToString("th"), Trait::latin1ToString("thead"), Trait::latin1ToString("title"), Trait::latin1ToString("tr"),
5699 Trait::latin1ToString("track"), Trait::latin1ToString("ul")};
5700
5701 for (long long int i = 1; i < tag.size(); ++i) {
5702 if (!s_validHtmlTagLetters.contains(tag[i])) {
5703 return -1;
5704 }
5705 }
5706
5707 if (s_rule6.find(tag) != s_rule6.cend()) {
5708 return 6;
5709 } else {
5710 bool tag = false;
5711
5712 std::tie(tag, std::ignore, std::ignore, std::ignore, std::ignore) =
5713 isHtmlTag(it->m_line, it->m_pos, po, 7);
5714
5715 if (tag) {
5716 return 7;
5717 }
5718 }
5719 }
5720
5721 return -1;
5722}
5723
5724template<class Trait>
5725inline typename Parser<Trait>::Delims::iterator
5726Parser<Trait>::checkForRawHtml(typename Delims::iterator it,
5727 typename Delims::iterator last,
5728 TextParsingOpts<Trait> &po)
5729{
5730 const auto rule = htmlTagRule(it, last, po);
5731
5732 if (rule == -1) {
5733 resetHtmlTag(po.m_html);
5734
5735 po.m_firstInParagraph = false;
5736
5737 return it;
5738 }
5739
5740 po.m_html.m_htmlBlockType = rule;
5741 po.m_html.m_html.reset(new RawHtml<Trait>);
5742 po.m_html.m_html->setStartColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
5743 po.m_html.m_html->setStartLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
5744
5745 return finishRawHtmlTag(it, last, po, true);
5746}
5747
5748template<class Trait>
5749inline typename Parser<Trait>::Delims::iterator
5750Parser<Trait>::finishRule7HtmlTag(typename Delims::iterator it,
5751 typename Delims::iterator last,
5752 TextParsingOpts<Trait> &po)
5753{
5754 if (it != last) {
5755 const auto start = it;
5756 long long int l = -1, p = -1;
5757 bool onLine = false;
5758 bool ok = false;
5759
5760 std::tie(ok, l, p, onLine, std::ignore) = isHtmlTag(it->m_line, it->m_pos, po, 7);
5761
5762 onLine = onLine && it->m_line == 0 && l == start->m_line;
5763
5764 if (ok) {
5765 eatRawHtml(po.m_line, po.m_pos, l, ++p, po, !onLine, 7, onLine);
5766
5767 po.m_html.m_onLine = onLine;
5768
5769 it = findIt(it, last, po);
5770
5771 if (onLine) {
5772 for (; it != last; ++it) {
5773 if (it->m_type == Delimiter::Less) {
5774 const auto rule = htmlTagRule(it, last, po);
5775
5776 if (rule != -1 && rule != 7) {
5777 eatRawHtml(po.m_line, po.m_pos, it->m_line, it->m_pos, po, true, 7, onLine, true);
5778
5779 return std::prev(it);
5780 }
5781 }
5782 }
5783
5784 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 7, onLine, true);
5785
5786 return std::prev(last);
5787 } else {
5788 return it;
5789 }
5790 } else {
5791 return it;
5792 }
5793 } else {
5794 if (po.m_html.m_onLine) {
5795 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, true, 7, true);
5796
5797 return last;
5798 } else {
5799 resetHtmlTag(po.m_html);
5800 }
5801 }
5802
5803 return it;
5804}
5805
5806template<class Trait>
5807inline typename Parser<Trait>::Delims::iterator
5808Parser<Trait>::checkForMath(typename Delims::iterator it,
5809 typename Delims::iterator last,
5810 TextParsingOpts<Trait> &po)
5811{
5812 po.m_wasRefLink = false;
5813 po.m_firstInParagraph = false;
5814
5815 const auto end = std::find_if(std::next(it), last, [&](const auto &d) {
5816 return (d.m_type == Delimiter::Math && d.m_len == it->m_len);
5817 });
5818
5819 if (end != last && end->m_line <= po.m_lastTextLine) {
5820 typename Trait::String math;
5821
5822 if (it->m_line == end->m_line) {
5823 math = po.m_fr.m_data[it->m_line].first.asString().sliced(
5824 it->m_pos + it->m_len, end->m_pos - (it->m_pos + it->m_len));
5825 } else {
5826 math = po.m_fr.m_data[it->m_line].first.asString().sliced(it->m_pos + it->m_len);
5827
5828 for (long long int i = it->m_line + 1; i < end->m_line; ++i) {
5829 math.push_back(Trait::latin1ToChar('\n'));
5830 math.push_back(po.m_fr.m_data[i].first.asString());
5831 }
5832
5833 math.push_back(Trait::latin1ToChar('\n'));
5834 math.push_back(po.m_fr.m_data[end->m_line].first.asString().sliced(0, end->m_pos));
5835 }
5836
5837 if (!po.m_collectRefLinks) {
5838 std::shared_ptr<Math<Trait>> m(new Math<Trait>);
5839
5840 auto startLine = po.m_fr.m_data.at(it->m_line).second.m_lineNumber;
5841 auto startColumn = po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len);
5842
5843 if (it->m_pos + it->m_len >= po.m_fr.m_data.at(it->m_line).first.length()) {
5844 std::tie(startColumn, startLine) = nextPosition(po.m_fr, startColumn, startLine);
5845 }
5846
5847 auto endColumn = po.m_fr.m_data.at(end->m_line).first.virginPos(end->m_pos);
5848 auto endLine = po.m_fr.m_data.at(end->m_line).second.m_lineNumber;
5849
5850 if (endColumn == 0) {
5851 std::tie(endColumn, endLine) = prevPosition(po.m_fr, endColumn, endLine);
5852 } else {
5853 --endColumn;
5854 }
5855
5856 m->setStartColumn(startColumn);
5857 m->setStartLine(startLine);
5858 m->setEndColumn(endColumn);
5859 m->setEndLine(endLine);
5860 m->setInline(it->m_len == 1);
5861 m->setStartDelim({po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
5862 po.m_fr.m_data[it->m_line].second.m_lineNumber,
5863 po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos + it->m_len - 1),
5864 po.m_fr.m_data[it->m_line].second.m_lineNumber});
5865 m->setEndDelim({po.m_fr.m_data[end->m_line].first.virginPos(end->m_pos),
5866 po.m_fr.m_data[end->m_line].second.m_lineNumber,
5867 po.m_fr.m_data[end->m_line].first.virginPos(end->m_pos + end->m_len - 1),
5868 po.m_fr.m_data[end->m_line].second.m_lineNumber});
5869 m->setFensedCode(false);
5870
5871 initLastItemWithOpts<Trait>(po, m);
5872
5873 if (math.startsWith(Trait::latin1ToString("`")) &&
5874 math.endsWith(Trait::latin1ToString("`")) &&
5875 !math.endsWith(Trait::latin1ToString("\\`")) &&
5876 math.length() > 1) {
5877 math = math.sliced(1, math.length() - 2);
5878 }
5879
5880 m->setExpr(math);
5881
5882 po.m_parent->appendItem(m);
5883
5884 po.m_pos = end->m_pos + end->m_len;
5885 po.m_line = end->m_line;
5886 po.m_lastText = nullptr;
5887 }
5888
5889 return end;
5890 }
5891
5892 return it;
5893}
5894
5895template<class Trait>
5896inline typename Parser<Trait>::Delims::iterator
5897Parser<Trait>::checkForAutolinkHtml(typename Delims::iterator it,
5898 typename Delims::iterator last,
5899 TextParsingOpts<Trait> &po,
5900 bool updatePos)
5901{
5902 const auto nit = std::find_if(std::next(it), last, [](const auto &d) {
5903 return (d.m_type == Delimiter::Greater);
5904 });
5905
5906 if (nit != last) {
5907 if (nit->m_line == it->m_line) {
5908 const auto url = po.m_fr.m_data.at(it->m_line).first.asString().sliced(
5909 it->m_pos + 1, nit->m_pos - it->m_pos - 1);
5910
5911 bool isUrl = true;
5912
5913 for (long long int i = 0; i < url.size(); ++i) {
5914 if (url[i].isSpace()) {
5915 isUrl = false;
5916
5917 break;
5918 }
5919 }
5920
5921 if (isUrl) {
5922 if (!isValidUrl<Trait>(url) && !isEmail<Trait>(url)) {
5923 isUrl = false;
5924 }
5925 }
5926
5927 if (isUrl) {
5928 if (!po.m_collectRefLinks) {
5929 std::shared_ptr<Link<Trait>> lnk(new Link<Trait>);
5930 lnk->setStartColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
5931 lnk->setStartLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
5932 lnk->setEndColumn(po.m_fr.m_data.at(nit->m_line).first.virginPos(nit->m_pos + nit->m_len - 1));
5933 lnk->setEndLine(po.m_fr.m_data.at(nit->m_line).second.m_lineNumber);
5934 lnk->setUrl(url);
5935 lnk->setOpts(po.m_opts);
5936 lnk->setTextPos({po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos + 1),
5937 po.m_fr.m_data[it->m_line].second.m_lineNumber,
5938 po.m_fr.m_data[nit->m_line].first.virginPos(nit->m_pos - 1),
5939 po.m_fr.m_data[nit->m_line].second.m_lineNumber});
5940 lnk->setUrlPos(lnk->textPos());
5941 po.m_parent->appendItem(lnk);
5942 }
5943
5944 po.m_wasRefLink = false;
5945 po.m_firstInParagraph = false;
5946 po.m_lastText = nullptr;
5947
5948 if (updatePos) {
5949 po.m_pos = nit->m_pos + nit->m_len;
5950 po.m_line = nit->m_line;
5951 }
5952
5953 return nit;
5954 } else {
5955 return checkForRawHtml(it, last, po);
5956 }
5957 } else {
5958 return checkForRawHtml(it, last, po);
5959 }
5960 } else {
5961 return checkForRawHtml(it, last, po);
5962 }
5963}
5964
5965template<class Trait>
5966inline void
5967Parser<Trait>::makeInlineCode(long long int startLine,
5968 long long int startPos,
5969 long long int lastLine,
5970 long long int lastPos,
5971 TextParsingOpts<Trait> &po,
5972 typename Delims::iterator startDelimIt,
5973 typename Delims::iterator endDelimIt)
5974{
5975 typename Trait::String c;
5976
5977 for (; po.m_line <= lastLine; ++po.m_line) {
5978 c.push_back(po.m_fr.m_data.at(po.m_line).first.asString().sliced(
5979 po.m_pos, (po.m_line == lastLine ? lastPos - po.m_pos :
5980 po.m_fr.m_data.at(po.m_line).first.length() - po.m_pos)));
5981
5982 if (po.m_line < lastLine) {
5983 c.push_back(Trait::latin1ToChar(' '));
5984 }
5985
5986 po.m_pos = 0;
5987 }
5988
5989 po.m_line = lastLine;
5990
5991 if (c[0] == Trait::latin1ToChar(' ') && c[c.size() - 1] == Trait::latin1ToChar(' ') &&
5992 skipSpaces<Trait>(0, c) < c.size()) {
5993 c.remove(0, 1);
5994 c.remove(c.size() - 1, 1);
5995 ++startPos;
5996 --lastPos;
5997 }
5998
5999 if (!c.isEmpty()) {
6000 auto code = std::make_shared<Code<Trait>>(c, false, true);
6001
6002 code->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
6003 code->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
6004 code->setEndColumn(po.m_fr.m_data.at(lastLine).first.virginPos(lastPos - 1));
6005 code->setEndLine(po.m_fr.m_data.at(lastLine).second.m_lineNumber);
6006 code->setStartDelim({po.m_fr.m_data.at(startDelimIt->m_line).first.virginPos(
6007 startDelimIt->m_pos + (startDelimIt->m_backslashed ? 1 : 0)),
6008 po.m_fr.m_data.at(startDelimIt->m_line).second.m_lineNumber,
6009 po.m_fr.m_data.at(startDelimIt->m_line).first.virginPos(
6010 startDelimIt->m_pos + (startDelimIt->m_backslashed ? 1 : 0)) +
6011 startDelimIt->m_len - 1 - (startDelimIt->m_backslashed ? 1 : 0),
6012 po.m_fr.m_data.at(startDelimIt->m_line).second.m_lineNumber});
6013 code->setEndDelim(
6014 {po.m_fr.m_data.at(endDelimIt->m_line).first.virginPos(
6015 endDelimIt->m_pos + (endDelimIt->m_backslashed ? 1 : 0)),
6016 po.m_fr.m_data.at(endDelimIt->m_line).second.m_lineNumber,
6017 po.m_fr.m_data.at(endDelimIt->m_line).first.virginPos(
6018 endDelimIt->m_pos + (endDelimIt->m_backslashed ? 1 : 0) +
6019 endDelimIt->m_len - 1 - (endDelimIt->m_backslashed ? 1 : 0)),
6020 po.m_fr.m_data.at(endDelimIt->m_line).second.m_lineNumber});
6021 code->setOpts(po.m_opts);
6022
6023 initLastItemWithOpts<Trait>(po, code);
6024
6025 po.m_parent->appendItem(code);
6026 }
6027
6028 po.m_wasRefLink = false;
6029 po.m_firstInParagraph = false;
6030 po.m_lastText = nullptr;
6031}
6032
6033template<class Trait>
6034inline typename Parser<Trait>::Delims::iterator
6035Parser<Trait>::checkForInlineCode(typename Delims::iterator it,
6036 typename Delims::iterator last,
6037 TextParsingOpts<Trait> &po)
6038{
6039 const auto len = it->m_len;
6040 const auto start = it;
6041
6042 po.m_wasRefLink = false;
6043 po.m_firstInParagraph = false;
6044
6045 ++it;
6046
6047 for (; it != last; ++it) {
6048 if (it->m_line <= po.m_lastTextLine) {
6049 const auto p = skipSpaces<Trait>(0, po.m_fr.m_data.at(it->m_line).first.asString());
6050 const auto withoutSpaces = po.m_fr.m_data.at(it->m_line).first.asString().sliced(p);
6051
6052 if ((it->m_type == Delimiter::HorizontalLine && withoutSpaces[0] == Trait::latin1ToChar('-')) ||
6053 it->m_type == Delimiter::H1 || it->m_type == Delimiter::H2) {
6054 break;
6055 } else if (it->m_type == Delimiter::InlineCode && (it->m_len - (it->m_backslashed ? 1 : 0)) == len) {
6056 if (!po.m_collectRefLinks) {
6057 makeText(start->m_line, start->m_pos, po);
6058
6059 po.m_pos = start->m_pos + start->m_len;
6060
6061 makeInlineCode(start->m_line, start->m_pos + start->m_len, it->m_line,
6062 it->m_pos + (it->m_backslashed ? 1 : 0), po, start, it);
6063
6064 po.m_line = it->m_line;
6065 po.m_pos = it->m_pos + it->m_len;
6066 }
6067
6068 return it;
6069 }
6070 } else {
6071 break;
6072 }
6073 }
6074
6075 if (!po.m_collectRefLinks) {
6076 makeText(start->m_line, start->m_pos + start->m_len, po);
6077 }
6078
6079 return start;
6080}
6081
6082template<class Trait>
6083inline std::pair<typename MdBlock<Trait>::Data, typename Parser<Trait>::Delims::iterator>
6084Parser<Trait>::readTextBetweenSquareBrackets(typename Delims::iterator start,
6085 typename Delims::iterator it,
6086 typename Delims::iterator last,
6087 TextParsingOpts<Trait> &po,
6088 bool doNotCreateTextOnFail,
6089 WithPosition *pos)
6090{
6091 if (it != last && it->m_line <= po.m_lastTextLine) {
6092 if (start->m_line == it->m_line) {
6093 const auto p = start->m_pos + start->m_len;
6094 const auto n = it->m_pos - p;
6095
6096 if (pos) {
6097 long long int startPos, startLine, endPos, endLine;
6098 std::tie(startPos, startLine) = nextPosition(po.m_fr,
6099 po.m_fr.m_data[start->m_line].first.virginPos(
6100 start->m_pos + start->m_len - 1),
6101 po.m_fr.m_data[start->m_line].second.m_lineNumber);
6102 std::tie(endPos, endLine) =
6103 prevPosition(po.m_fr, po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
6104 po.m_fr.m_data[it->m_line].second.m_lineNumber);
6105
6106 *pos = {startPos, startLine, endPos, endLine};
6107 }
6108
6109 return {{{po.m_fr.m_data.at(start->m_line).first.sliced(p, n),
6110 {po.m_fr.m_data.at(start->m_line).second.m_lineNumber}}}, it};
6111 } else {
6112 if (it->m_line - start->m_line < 3) {
6113 typename MdBlock<Trait>::Data res;
6114 res.push_back({po.m_fr.m_data.at(start->m_line).first.sliced(
6115 start->m_pos + start->m_len), po.m_fr.m_data.at(start->m_line).second});
6116
6117 long long int i = start->m_line + 1;
6118
6119 for (; i <= it->m_line; ++i) {
6120 if (i == it->m_line) {
6121 res.push_back({po.m_fr.m_data.at(i).first.sliced(0, it->m_pos),
6122 po.m_fr.m_data.at(i).second});
6123 } else {
6124 res.push_back({po.m_fr.m_data.at(i).first, po.m_fr.m_data.at(i).second});
6125 }
6126 }
6127
6128 if (pos) {
6129 long long int startPos, startLine, endPos, endLine;
6130 std::tie(startPos, startLine) = nextPosition(po.m_fr,
6131 po.m_fr.m_data[start->m_line].first.virginPos(
6132 start->m_pos + start->m_len - 1),
6133 po.m_fr.m_data[start->m_line].second.m_lineNumber);
6134 std::tie(endPos, endLine) =
6135 prevPosition(po.m_fr, po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
6136 po.m_fr.m_data[it->m_line].second.m_lineNumber);
6137
6138 *pos = {startPos, startLine, endPos, endLine};
6139 }
6140
6141 return {res, it};
6142 } else {
6143 if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6144 makeText(start->m_line, start->m_pos + start->m_len, po);
6145 }
6146
6147 return {{}, start};
6148 }
6149 }
6150 } else {
6151 if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6152 makeText(start->m_line, start->m_pos + start->m_len, po);
6153 }
6154
6155 return {{}, start};
6156 }
6157}
6158
6159template<class Trait>
6160inline std::pair<typename MdBlock<Trait>::Data, typename Parser<Trait>::Delims::iterator>
6161Parser<Trait>::checkForLinkText(typename Delims::iterator it,
6162 typename Delims::iterator last,
6163 TextParsingOpts<Trait> &po,
6164 WithPosition *pos)
6165{
6166 const auto start = it;
6167
6168 long long int brackets = 0;
6169
6170 const bool collectRefLinks = po.m_collectRefLinks;
6171 po.m_collectRefLinks = true;
6172 long long int l = po.m_line, p = po.m_pos;
6173
6174 for (it = std::next(it); it != last; ++it) {
6175 bool quit = false;
6176
6177 switch (it->m_type) {
6178 case Delimiter::SquareBracketsClose: {
6179 if (!brackets)
6180 quit = true;
6181 else
6182 --brackets;
6183 } break;
6184
6185 case Delimiter::SquareBracketsOpen:
6186 case Delimiter::ImageOpen:
6187 ++brackets;
6188 break;
6189
6190 case Delimiter::InlineCode:
6191 it = checkForInlineCode(it, last, po);
6192 break;
6193
6194 case Delimiter::Less:
6195 it = checkForAutolinkHtml(it, last, po, false);
6196 break;
6197
6198 default:
6199 break;
6200 }
6201
6202 if (quit) {
6203 break;
6204 }
6205 }
6206
6207 const auto r = readTextBetweenSquareBrackets(start, it, last, po, false, pos);
6208
6209 po.m_collectRefLinks = collectRefLinks;
6210 resetHtmlTag(po.m_html);
6211 po.m_line = l;
6212 po.m_pos = p;
6213
6214 return r;
6215}
6216
6217template<class Trait>
6218inline std::pair<typename MdBlock<Trait>::Data, typename Parser<Trait>::Delims::iterator>
6219Parser<Trait>::checkForLinkLabel(typename Delims::iterator it,
6220 typename Delims::iterator last,
6221 TextParsingOpts<Trait> &po,
6222 WithPosition *pos)
6223{
6224 const auto start = it;
6225
6226 for (it = std::next(it); it != last; ++it) {
6227 bool quit = false;
6228
6229 switch (it->m_type) {
6230 case Delimiter::SquareBracketsClose: {
6231 quit = true;
6232 } break;
6233
6234 case Delimiter::SquareBracketsOpen:
6235 case Delimiter::ImageOpen: {
6236 it = last;
6237 quit = true;
6238 } break;
6239
6240 default:
6241 break;
6242 }
6243
6244 if (quit)
6245 break;
6246 }
6247
6248 return readTextBetweenSquareBrackets(start, it, last, po, true, pos);
6249}
6250
6251template<class Trait>
6252inline typename Trait::String
6253Parser<Trait>::toSingleLine(const typename MdBlock<Trait>::Data &d)
6254{
6255 typename Trait::String res;
6256 bool first = true;
6257
6258 for (const auto &s : d) {
6259 if (!first) {
6260 res.push_back(Trait::latin1ToChar(' '));
6261 }
6262 res.push_back(s.first.asString().simplified());
6263 first = false;
6264 }
6265
6266 return res;
6267}
6268
6269template<class Trait>
6270inline std::shared_ptr<Link<Trait>>
6271Parser<Trait>::makeLink(const typename Trait::String &url,
6272 const typename MdBlock<Trait>::Data &text,
6273 TextParsingOpts<Trait> &po,
6274 bool doNotCreateTextOnFail,
6275 long long int startLine,
6276 long long int startPos,
6277 long long int lastLine,
6278 long long int lastPos,
6279 const WithPosition &textPos,
6280 const WithPosition &urlPos)
6281{
6282 MD_UNUSED(doNotCreateTextOnFail)
6283
6284 typename Trait::String u = (url.startsWith(Trait::latin1ToString("#")) ?
6285 url : removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(url)));
6286
6287 if (!u.isEmpty()) {
6288 if (!u.startsWith(Trait::latin1ToString("#"))) {
6289 const auto checkForFile = [&](typename Trait::String &url,
6290 const typename Trait::String &ref = {}) -> bool {
6291 if (Trait::fileExists(url)) {
6292 url = Trait::absoluteFilePath(url);
6293
6294 if (!po.m_collectRefLinks) {
6295 po.m_linksToParse.push_back(url);
6296 }
6297
6298 if (!ref.isEmpty()) {
6299 url = ref + Trait::latin1ToString("/") + url;
6300 }
6301
6302 return true;
6303 } else if (Trait::fileExists(url, po.m_workingPath)) {
6304 url = Trait::absoluteFilePath(po.m_workingPath + Trait::latin1ToString("/") + url);
6305
6306 if (!po.m_collectRefLinks) {
6307 po.m_linksToParse.push_back(url);
6308 }
6309
6310 if (!ref.isEmpty()) {
6311 url = ref + Trait::latin1ToString("/") + url;
6312 }
6313
6314 return true;
6315 } else {
6316 return false;
6317 }
6318 };
6319
6320 if (!checkForFile(u) && u.contains(Trait::latin1ToChar('#'))) {
6321 const auto i = u.indexOf(Trait::latin1ToChar('#'));
6322 const auto ref = u.sliced(i);
6323 u = u.sliced(0, i);
6324
6325 if (!checkForFile(u, ref)) {
6326 u = u + ref;
6327 }
6328 }
6329 } else
6330 u = u + (po.m_workingPath.isEmpty() ? typename Trait::String() :
6331 Trait::latin1ToString("/") + po.m_workingPath) + Trait::latin1ToString("/") +
6332 po.m_fileName;
6333 }
6334
6335 std::shared_ptr<Link<Trait>> link(new Link<Trait>);
6336 link->setUrl(u);
6337 link->setOpts(po.m_opts);
6338 link->setTextPos(textPos);
6339 link->setUrlPos(urlPos);
6340
6341 MdBlock<Trait> block = {text, 0};
6342
6343 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
6344
6345 RawHtmlBlock<Trait> html;
6346
6347 parseFormattedTextLinksImages(block,
6348 std::static_pointer_cast<Block<Trait>>(p),
6349 po.m_doc,
6350 po.m_linksToParse,
6351 po.m_workingPath,
6352 po.m_fileName,
6353 po.m_collectRefLinks,
6354 true,
6355 html,
6356 true);
6357
6358 if (!p->isEmpty()) {
6359 std::shared_ptr<Image<Trait>> img;
6360
6361 if (p->items().size() == 1 && p->items().at(0)->type() == ItemType::Paragraph) {
6362 const auto ip = std::static_pointer_cast<Paragraph<Trait>>(p->items().at(0));
6363
6364 for (auto it = ip->items().cbegin(), last = ip->items().cend(); it != last; ++it) {
6365 switch ((*it)->type()) {
6366 case ItemType::Link:
6367 return {};
6368
6369 case ItemType::Image: {
6370 img = std::static_pointer_cast<Image<Trait>>(*it);
6371 } break;
6372
6373 default:
6374 break;
6375 }
6376 }
6377
6378 if (img.get()) {
6379 link->setImg(img);
6380 }
6381
6382 link->setP(ip);
6383 }
6384 }
6385
6386 if (html.m_html.get()) {
6387 link->p()->appendItem(html.m_html);
6388 }
6389
6390 link->setText(toSingleLine(text));
6391 link->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
6392 link->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
6393 link->setEndColumn(po.m_fr.m_data.at(lastLine).first.virginPos(lastPos - 1));
6394 link->setEndLine(po.m_fr.m_data.at(lastLine).second.m_lineNumber);
6395
6396 initLastItemWithOpts<Trait>(po, link);
6397
6398 po.m_lastText = nullptr;
6399
6400 return link;
6401}
6402
6403template<class Trait>
6404inline bool
6405Parser<Trait>::createShortcutLink(const typename MdBlock<Trait>::Data &text,
6406 TextParsingOpts<Trait> &po,
6407 long long int startLine,
6408 long long int startPos,
6409 long long int lastLineForText,
6410 long long int lastPosForText,
6411 typename Delims::iterator lastIt,
6412 const typename MdBlock<Trait>::Data &linkText,
6413 bool doNotCreateTextOnFail,
6414 const WithPosition &textPos,
6415 const WithPosition &linkTextPos)
6416{
6417 const auto u = Trait::latin1ToString("#") + toSingleLine(text).toCaseFolded().toUpper();
6418 const auto url = u + Trait::latin1ToString("/") + (po.m_workingPath.isEmpty() ?
6419 typename Trait::String() : po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName;
6420
6421 po.m_wasRefLink = false;
6422 po.m_firstInParagraph = false;
6423
6424 if (po.m_doc->labeledLinks().find(url) != po.m_doc->labeledLinks().cend()) {
6425 if (!po.m_collectRefLinks) {
6426 const auto isLinkTextEmpty = toSingleLine(linkText).isEmpty();
6427
6428 const auto link = makeLink(u,
6429 removeBackslashes<Trait>(isLinkTextEmpty ? text : linkText),
6430 po,
6431 doNotCreateTextOnFail,
6432 startLine,
6433 startPos,
6434 lastIt->m_line,
6435 lastIt->m_pos + lastIt->m_len,
6436 (isLinkTextEmpty ? textPos : linkTextPos),
6437 textPos);
6438
6439 if (link.get()) {
6440 po.m_linksToParse.push_back(url);
6441 po.m_parent->appendItem(link);
6442
6443 po.m_line = lastIt->m_line;
6444 po.m_pos = lastIt->m_pos + lastIt->m_len;
6445 } else {
6446 if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6447 makeText(lastLineForText, lastPosForText, po);
6448 }
6449
6450 return false;
6451 }
6452 }
6453
6454 return true;
6455 } else if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6456 makeText(lastLineForText, lastPosForText, po);
6457 }
6458
6459 return false;
6460}
6461
6462template<class Trait>
6463inline std::shared_ptr<Image<Trait>>
6464Parser<Trait>::makeImage(const typename Trait::String &url,
6465 const typename MdBlock<Trait>::Data &text,
6466 TextParsingOpts<Trait> &po,
6467 bool doNotCreateTextOnFail,
6468 long long int startLine,
6469 long long int startPos,
6470 long long int lastLine,
6471 long long int lastPos,
6472 const WithPosition &textPos,
6473 const WithPosition &urlPos)
6474{
6475 MD_UNUSED(doNotCreateTextOnFail)
6476
6477 std::shared_ptr<Image<Trait>> img(new Image<Trait>);
6478
6479 typename Trait::String u = (url.startsWith(Trait::latin1ToString("#")) ? url :
6480 removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(url)));
6481
6482 if (Trait::fileExists(u)) {
6483 img->setUrl(u);
6484 } else if (Trait::fileExists(u, po.m_workingPath)) {
6485 img->setUrl(po.m_workingPath + Trait::latin1ToString("/") + u);
6486 } else {
6487 img->setUrl(u);
6488 }
6489
6490 MdBlock<Trait> block = {text, 0};
6491
6492 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
6493
6494 RawHtmlBlock<Trait> html;
6495
6496 parseFormattedTextLinksImages(block,
6497 std::static_pointer_cast<Block<Trait>>(p),
6498 po.m_doc,
6499 po.m_linksToParse,
6500 po.m_workingPath,
6501 po.m_fileName,
6502 po.m_collectRefLinks,
6503 true,
6504 html,
6505 true);
6506
6507 if (!p->isEmpty()) {
6508 if (p->items().size() == 1 && p->items().at(0)->type() == ItemType::Paragraph) {
6509 img->setP(std::static_pointer_cast<Paragraph<Trait>>(p->items().at(0)));
6510 }
6511 }
6512
6513 img->setText(toSingleLine(removeBackslashes<Trait>(text)));
6514 img->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
6515 img->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
6516 img->setEndColumn(po.m_fr.m_data.at(lastLine).first.virginPos(lastPos - 1));
6517 img->setEndLine(po.m_fr.m_data.at(lastLine).second.m_lineNumber);
6518 img->setTextPos(textPos);
6519 img->setUrlPos(urlPos);
6520
6521 initLastItemWithOpts<Trait>(po, img);
6522
6523 po.m_lastText = nullptr;
6524
6525 return img;
6526}
6527
6528template<class Trait>
6529inline bool
6530Parser<Trait>::createShortcutImage(const typename MdBlock<Trait>::Data &text,
6531 TextParsingOpts<Trait> &po,
6532 long long int startLine,
6533 long long int startPos,
6534 long long int lastLineForText,
6535 long long int lastPosForText,
6536 typename Delims::iterator lastIt,
6537 const typename MdBlock<Trait>::Data &linkText,
6538 bool doNotCreateTextOnFail,
6539 const WithPosition &textPos,
6540 const WithPosition &linkTextPos)
6541{
6542 const auto url = Trait::latin1ToString("#") + toSingleLine(text).toCaseFolded().toUpper() +
6543 Trait::latin1ToString("/") + (po.m_workingPath.isEmpty() ? typename Trait::String() :
6544 po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName;
6545
6546 po.m_wasRefLink = false;
6547 po.m_firstInParagraph = false;
6548
6549 const auto iit = po.m_doc->labeledLinks().find(url);
6550
6551 if (iit != po.m_doc->labeledLinks().cend()) {
6552 if (!po.m_collectRefLinks) {
6553 const auto isLinkTextEmpty = toSingleLine(linkText).isEmpty();
6554
6555 const auto img = makeImage(iit->second->url(),
6556 (isLinkTextEmpty ? text : linkText),
6557 po,
6558 doNotCreateTextOnFail,
6559 startLine,
6560 startPos,
6561 lastIt->m_line,
6562 lastIt->m_pos + lastIt->m_len,
6563 (isLinkTextEmpty ? textPos : linkTextPos),
6564 textPos);
6565
6566 po.m_parent->appendItem(img);
6567
6568 po.m_line = lastIt->m_line;
6569 po.m_pos = lastIt->m_pos + lastIt->m_len;
6570 }
6571
6572 return true;
6573 } else if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6574 makeText(lastLineForText, lastPosForText, po);
6575 }
6576
6577 return false;
6578}
6579
6580//! Skip space in the block up to 1 new line.
6581template<class Trait>
6582inline void
6583skipSpacesUpTo1Line(long long int &line,
6584 long long int &pos,
6585 const typename MdBlock<Trait>::Data &fr)
6586{
6587 pos = skipSpaces<Trait>(pos, fr.at(line).first.asString());
6588
6589 if (pos == fr.at(line).first.length() && line + 1 < (long long int)fr.size()) {
6590 ++line;
6591 pos = skipSpaces<Trait>(0, fr.at(line).first.asString());
6592 }
6593}
6594
6595//! Read link's destination.
6596template<class Trait>
6597inline std::tuple<long long int, long long int, bool, typename Trait::String, long long int>
6598readLinkDestination(long long int line,
6599 long long int pos,
6600 const TextParsingOpts<Trait> &po,
6601 WithPosition *urlPos = nullptr)
6602{
6603 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6604
6605 const auto destLine = line;
6606 const auto &s = po.m_fr.m_data.at(line).first.asString();
6607 bool backslash = false;
6608
6609 if (pos < s.length() && line <= po.m_lastTextLine) {
6610 if (s[pos] == Trait::latin1ToChar('<')) {
6611 ++pos;
6612
6613 if (urlPos) {
6614 urlPos->setStartColumn(po.m_fr.m_data[line].first.virginPos(pos));
6615 urlPos->setStartLine(po.m_fr.m_data[line].second.m_lineNumber);
6616 }
6617
6618 const auto start = pos;
6619
6620 while (pos < s.size()) {
6621 bool now = false;
6622
6623 if (s[pos] == Trait::latin1ToChar('\\') && !backslash) {
6624 backslash = true;
6625 now = true;
6626 } else if (!backslash && s[pos] == Trait::latin1ToChar('<')) {
6627 return {line, pos, false, {}, destLine};
6628 } else if (!backslash && s[pos] == Trait::latin1ToChar('>')) {
6629 break;
6630 }
6631
6632 if (!now) {
6633 backslash = false;
6634 }
6635
6636 ++pos;
6637 }
6638
6639 if (pos < s.size() && s[pos] == Trait::latin1ToChar('>')) {
6640 if (urlPos) {
6641 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6642 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6643 }
6644
6645 ++pos;
6646
6647 return {line, pos, true, s.sliced(start, pos - start - 1), destLine};
6648 } else {
6649 return {line, pos, false, {}, destLine};
6650 }
6651 } else {
6652 long long int pc = 0;
6653
6654 const auto start = pos;
6655
6656 if (urlPos) {
6657 urlPos->setStartColumn(po.m_fr.m_data[line].first.virginPos(pos));
6658 urlPos->setStartLine(po.m_fr.m_data[line].second.m_lineNumber);
6659 }
6660
6661 while (pos < s.size()) {
6662 bool now = false;
6663
6664 if (s[pos] == Trait::latin1ToChar('\\') && !backslash) {
6665 backslash = true;
6666 now = true;
6667 } else if (!backslash && s[pos] == Trait::latin1ToChar(' ')) {
6668 if (!pc) {
6669 if (urlPos) {
6670 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6671 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6672 }
6673
6674 return {line, pos, true, s.sliced(start, pos - start), destLine};
6675 } else {
6676 return {line, pos, false, {}, destLine};
6677 }
6678 } else if (!backslash && s[pos] == Trait::latin1ToChar('(')) {
6679 ++pc;
6680 } else if (!backslash && s[pos] == Trait::latin1ToChar(')')) {
6681 if (!pc) {
6682 if (urlPos) {
6683 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6684 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6685 }
6686
6687 return {line, pos, true, s.sliced(start, pos - start), destLine};
6688 } else {
6689 --pc;
6690 }
6691 }
6692
6693 if (!now) {
6694 backslash = false;
6695 }
6696
6697 ++pos;
6698 }
6699
6700 if (urlPos) {
6701 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6702 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6703 }
6704
6705 return {line, pos, true, s.sliced(start, pos - start), destLine};
6706 }
6707 } else {
6708 return {line, pos, false, {}, destLine};
6709 }
6710}
6711
6712//! Read link's title.
6713template<class Trait>
6714inline std::tuple<long long int, long long int, bool, typename Trait::String, long long int>
6715readLinkTitle(long long int line,
6716 long long int pos,
6717 const TextParsingOpts<Trait> &po)
6718{
6719 const auto space = (pos < po.m_fr.m_data.at(line).first.length() ?
6720 po.m_fr.m_data.at(line).first[pos].isSpace() : true);
6721
6722 const auto firstLine = line;
6723
6724 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6725
6726 if (pos >= po.m_fr.m_data.at(line).first.length()) {
6727 return {line, pos, true, {}, firstLine};
6728 }
6729
6730 const auto sc = po.m_fr.m_data.at(line).first[pos];
6731
6732 if (sc != Trait::latin1ToChar('"') && sc != Trait::latin1ToChar('\'') &&
6733 sc != Trait::latin1ToChar('(') && sc != Trait::latin1ToChar(')')) {
6734 return {line, pos, (firstLine != line && line <= po.m_lastTextLine), {}, firstLine};
6735 } else if (!space && sc != Trait::latin1ToChar(')')) {
6736 return {line, pos, false, {}, firstLine};
6737 }
6738
6739 if (sc == Trait::latin1ToChar(')')) {
6740 return {line, pos, line <= po.m_lastTextLine, {}, firstLine};
6741 }
6742
6743 const auto startLine = line;
6744
6745 bool backslash = false;
6746
6747 ++pos;
6748
6749 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6750
6751 typename Trait::String title;
6752
6753 while (line < (long long int)po.m_fr.m_data.size() && pos < po.m_fr.m_data.at(line).first.length()) {
6754 bool now = false;
6755
6756 if (po.m_fr.m_data.at(line).first[pos] == Trait::latin1ToChar('\\') && !backslash) {
6757 backslash = true;
6758 now = true;
6759 } else if (sc == Trait::latin1ToChar('(') &&
6760 po.m_fr.m_data.at(line).first[pos] == Trait::latin1ToChar(')') && !backslash) {
6761 ++pos;
6762 return {line, pos, line <= po.m_lastTextLine, title, startLine};
6763 } else if (sc == Trait::latin1ToChar('(') &&
6764 po.m_fr.m_data.at(line).first[pos] == Trait::latin1ToChar('(') && !backslash) {
6765 return {line, pos, false, {}, startLine};
6766 } else if (sc != Trait::latin1ToChar('(') && po.m_fr.m_data.at(line).first[pos] == sc && !backslash) {
6767 ++pos;
6768 return {line, pos, line <= po.m_lastTextLine, title, startLine};
6769 } else {
6770 title.push_back(po.m_fr.m_data.at(line).first[pos]);
6771 }
6772
6773 if (!now) {
6774 backslash = false;
6775 }
6776
6777 ++pos;
6778
6779 if (pos == po.m_fr.m_data.at(line).first.length()) {
6780 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6781 }
6782 }
6783
6784 return {line, pos, false, {}, startLine};
6785}
6786
6787template<class Trait>
6788inline std::tuple<typename Trait::String, typename Trait::String, typename Parser<Trait>::Delims::iterator, bool>
6789Parser<Trait>::checkForInlineLink(typename Delims::iterator it,
6790 typename Delims::iterator last,
6791 TextParsingOpts<Trait> &po,
6792 WithPosition *urlPos)
6793{
6794 long long int p = it->m_pos + it->m_len;
6795 long long int l = it->m_line;
6796 bool ok = false;
6797 typename Trait::String dest, title;
6798 long long int destStartLine = 0;
6799
6800 std::tie(l, p, ok, dest, destStartLine) = readLinkDestination<Trait>(l, p, po, urlPos);
6801
6802 if (!ok) {
6803 return {{}, {}, it, false};
6804 }
6805
6806 long long int s = 0;
6807
6808 std::tie(l, p, ok, title, s) = readLinkTitle<Trait>(l, p, po);
6809
6810 skipSpacesUpTo1Line<Trait>(l, p, po.m_fr.m_data);
6811
6812 if (!ok || (l >= (long long int)po.m_fr.m_data.size() || p >= po.m_fr.m_data.at(l).first.length() ||
6813 po.m_fr.m_data.at(l).first[p] != Trait::latin1ToChar(')'))) {
6814 return {{}, {}, it, false};
6815 }
6816
6817 for (; it != last; ++it) {
6818 if (it->m_line == l && it->m_pos == p) {
6819 return {dest, title, it, true};
6820 }
6821 }
6822
6823 return {{}, {}, it, false};
6824}
6825
6826template<class Trait>
6827inline std::tuple<typename Trait::String, typename Trait::String, typename Parser<Trait>::Delims::iterator, bool>
6828Parser<Trait>::checkForRefLink(typename Delims::iterator it,
6829 typename Delims::iterator last,
6830 TextParsingOpts<Trait> &po,
6831 WithPosition *urlPos)
6832{
6833 long long int p = it->m_pos + it->m_len + 1;
6834 long long int l = it->m_line;
6835 bool ok = false;
6836 typename Trait::String dest, title;
6837 long long int destStartLine = 0;
6838
6839 std::tie(l, p, ok, dest, destStartLine) = readLinkDestination<Trait>(l, p, po, urlPos);
6840
6841 if (!ok) {
6842 return {{}, {}, it, false};
6843 }
6844
6845 long long int titleStartLine = 0;
6846
6847 std::tie(l, p, ok, title, titleStartLine) = readLinkTitle<Trait>(l, p, po);
6848
6849 if (!ok) {
6850 return {{}, {}, it, false};
6851 }
6852
6853 if (!title.isEmpty()) {
6854 p = skipSpaces<Trait>(p, po.m_fr.m_data.at(l).first.asString());
6855
6856 if (titleStartLine == destStartLine && p < po.m_fr.m_data.at(l).first.length()) {
6857 return {{}, {}, it, false};
6858 } else if (titleStartLine != destStartLine && p < po.m_fr.m_data.at(l).first.length()) {
6859 l = destStartLine;
6860 p = po.m_fr.m_data.at(l).first.length();
6861 title.clear();
6862 }
6863 }
6864
6865 for (; it != last; ++it) {
6866 if (it->m_line > l || (it->m_line == l && it->m_pos >= p)) {
6867 break;
6868 }
6869 }
6870
6871 po.m_line = l;
6872 po.m_pos = p;
6873
6874 return {dest, title, std::prev(it), true};
6875}
6876
6877template<class Trait>
6878inline typename Parser<Trait>::Delims::iterator
6879Parser<Trait>::checkForImage(typename Delims::iterator it,
6880 typename Delims::iterator last,
6881 TextParsingOpts<Trait> &po)
6882{
6883 const auto start = it;
6884
6885 typename MdBlock<Trait>::Data text;
6886
6887 po.m_wasRefLink = false;
6888 po.m_firstInParagraph = false;
6889
6890 WithPosition textPos;
6891 std::tie(text, it) = checkForLinkText(it, last, po, &textPos);
6892
6893 if (it != start) {
6894 if (it->m_pos + it->m_len < po.m_fr.m_data.at(it->m_line).first.length()) {
6895 // Inline -> (
6896 if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('(')) {
6897 typename Trait::String url, title;
6898 typename Delims::iterator iit;
6899 bool ok;
6900
6901 WithPosition urlPos;
6902 std::tie(url, title, iit, ok) = checkForInlineLink(std::next(it), last, po, &urlPos);
6903
6904 if (ok) {
6905 if (!po.m_collectRefLinks) {
6906 po.m_parent->appendItem(
6907 makeImage(url, text, po, false, start->m_line, start->m_pos,
6908 iit->m_line, iit->m_pos + iit->m_len, textPos, urlPos));
6909 }
6910
6911 po.m_line = iit->m_line;
6912 po.m_pos = iit->m_pos + iit->m_len;
6913
6914 return iit;
6915 } else if (createShortcutImage(text, po, start->m_line, start->m_pos, start->m_line,
6916 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
6917 return it;
6918 }
6919 }
6920 // Reference -> [
6921 else if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('[')) {
6922 typename MdBlock<Trait>::Data label;
6923 typename Delims::iterator lit;
6924
6925 WithPosition labelPos;
6926 std::tie(label, lit) = checkForLinkLabel(std::next(it), last, po, &labelPos);
6927
6928 if (lit != std::next(it)) {
6929 const auto isLabelEmpty = toSingleLine(label).isEmpty();
6930
6931 if (!isLabelEmpty
6932 && createShortcutImage(label,
6933 po,
6934 start->m_line,
6935 start->m_pos,
6936 start->m_line,
6937 start->m_pos + start->m_len,
6938 lit,
6939 text,
6940 true,
6941 labelPos,
6942 textPos)) {
6943 return lit;
6944 } else if (isLabelEmpty
6945 && createShortcutImage(text,
6946 po,
6947 start->m_line,
6948 start->m_pos,
6949 start->m_line,
6950 start->m_pos + start->m_len,
6951 lit,
6952 {},
6953 false,
6954 textPos,
6955 {})) {
6956 return lit;
6957 }
6958 } else if (createShortcutImage(text, po, start->m_line, start->m_pos, start->m_line,
6959 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
6960 return it;
6961 }
6962 } else {
6963 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutImage);
6964 }
6965 } else {
6966 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutImage);
6967 }
6968 }
6969
6970 return start;
6971}
6972
6973template<class Trait>
6974inline typename Parser<Trait>::Delims::iterator
6975Parser<Trait>::checkForLink(typename Delims::iterator it,
6976 typename Delims::iterator last,
6977 TextParsingOpts<Trait> &po)
6978{
6979 const auto start = it;
6980
6981 typename MdBlock<Trait>::Data text;
6982
6983 const auto wasRefLink = po.m_wasRefLink;
6984 const auto firstInParagraph = po.m_firstInParagraph;
6985 po.m_wasRefLink = false;
6986 po.m_firstInParagraph = false;
6987
6988 const auto ns = skipSpaces<Trait>(0, po.m_fr.m_data.at(po.m_line).first.asString());
6989
6990 WithPosition textPos;
6991 std::tie(text, it) = checkForLinkText(it, last, po, &textPos);
6992
6993 if (it != start) {
6994 // Footnote reference.
6995 if (text.front().first.asString().startsWith(Trait::latin1ToString("^")) &&
6996 text.front().first.asString().length() > 1 && text.size() == 1 &&
6997 start->m_line == it->m_line) {
6998 if (!po.m_collectRefLinks) {
6999 std::shared_ptr<FootnoteRef<Trait>> fnr(new FootnoteRef<Trait>(
7000 Trait::latin1ToString("#") + toSingleLine(text).toCaseFolded().toUpper() +
7001 Trait::latin1ToString("/") + (po.m_workingPath.isEmpty() ? typename Trait::String() :
7002 po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName));
7003 fnr->setStartColumn(po.m_fr.m_data.at(start->m_line).first.virginPos(start->m_pos));
7004 fnr->setStartLine(po.m_fr.m_data.at(start->m_line).second.m_lineNumber);
7005 fnr->setEndColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
7006 fnr->setEndLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
7007 fnr->setIdPos(textPos);
7008
7009 typename Trait::String fnrText = Trait::latin1ToString("[");
7010 bool firstFnrText = true;
7011
7012 for (const auto &t : text) {
7013 if (!firstFnrText) {
7014 fnrText.push_back(Trait::latin1ToString("\n"));
7015 }
7016
7017 firstFnrText = false;
7018
7019 fnrText.push_back(t.first.asString());
7020 }
7021
7022 fnrText.push_back(Trait::latin1ToString("]"));
7023 fnr->setText(fnrText);
7024 po.m_parent->appendItem(fnr);
7025
7026 initLastItemWithOpts<Trait>(po, fnr);
7027 }
7028
7029 po.m_line = it->m_line;
7030 po.m_pos = it->m_pos + it->m_len;
7031
7032 return it;
7033 } else if (it->m_pos + it->m_len < po.m_fr.m_data.at(it->m_line).first.length()) {
7034 // Reference definition -> :
7035 if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar(':')) {
7036 // Reference definitions allowed only at start of paragraph.
7037 if ((po.m_line == 0 || wasRefLink || firstInParagraph) && ns < 4 && start->m_pos == ns) {
7038 typename Trait::String url, title;
7039 typename Delims::iterator iit;
7040 bool ok;
7041
7042 WithPosition labelPos;
7043
7044 std::tie(text, it) = checkForLinkLabel(start, last, po, &labelPos);
7045
7046 if (it != start && !toSingleLine(text).simplified().isEmpty()) {
7047 WithPosition urlPos;
7048 std::tie(url, title, iit, ok) = checkForRefLink(it, last, po, &urlPos);
7049
7050 if (ok) {
7051 const auto label = Trait::latin1ToString("#") +
7052 toSingleLine(text).toCaseFolded().toUpper() +
7053 Trait::latin1ToString("/") +
7054 (po.m_workingPath.isEmpty() ? typename Trait::String() :
7055 po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName;
7056
7057 std::shared_ptr<Link<Trait>> link(new Link<Trait>);
7058 link->setStartColumn(po.m_fr.m_data.at(start->m_line).first.virginPos(
7059 start->m_pos));
7060 link->setStartLine(po.m_fr.m_data.at(start->m_line).second.m_lineNumber);
7061
7062 const auto endPos = prevPosition(po.m_fr,
7063 po.m_fr.m_data.at(po.m_line).first.virginPos(po.m_pos),
7064 po.m_fr.m_data.at(po.m_line).second.m_lineNumber);
7065
7066 link->setEndColumn(endPos.first);
7067 link->setEndLine(endPos.second);
7068
7069 link->setTextPos(labelPos);
7070 link->setUrlPos(urlPos);
7071
7072 url = removeBackslashes<typename Trait::String, Trait>(
7073 replaceEntity<Trait>(url));
7074
7075 if (!url.isEmpty()) {
7076 if (Trait::fileExists(url)) {
7077 url = Trait::absoluteFilePath(url);
7078 } else if (Trait::fileExists(url, po.m_workingPath)) {
7079 url = Trait::absoluteFilePath(
7080 (po.m_workingPath.isEmpty() ? typename Trait::String() :
7081 po.m_workingPath + Trait::latin1ToString("/")) + url);
7082 }
7083 }
7084
7085 link->setUrl(url);
7086
7087 po.m_wasRefLink = true;
7088
7089 if (po.m_doc->labeledLinks().find(label) == po.m_doc->labeledLinks().cend()) {
7090 po.m_doc->insertLabeledLink(label, link);
7091 }
7092
7093 return iit;
7094 } else {
7095 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
7096 }
7097 } else {
7098 return start;
7099 }
7100 } else {
7101 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
7102 }
7103 }
7104 // Inline -> (
7105 else if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('(')) {
7106 typename Trait::String url, title;
7107 typename Delims::iterator iit;
7108 bool ok;
7109
7110 WithPosition urlPos;
7111 std::tie(url, title, iit, ok) = checkForInlineLink(std::next(it), last, po, &urlPos);
7112
7113 if (ok) {
7114 const auto link = makeLink(url,
7115 removeBackslashes<Trait>(text),
7116 po,
7117 false,
7118 start->m_line,
7119 start->m_pos,
7120 iit->m_line,
7121 iit->m_pos + iit->m_len,
7122 textPos,
7123 urlPos);
7124
7125 if (link.get()) {
7126 if (!po.m_collectRefLinks) {
7127 po.m_parent->appendItem(link);
7128 }
7129
7130 po.m_line = iit->m_line;
7131 po.m_pos = iit->m_pos + iit->m_len;
7132
7133 return iit;
7134 } else {
7135 return start;
7136 }
7137 } else if (createShortcutLink(text, po, start->m_line, start->m_pos, start->m_line,
7138 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
7139 return it;
7140 }
7141 }
7142 // Reference -> [
7143 else if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('[')) {
7144 typename MdBlock<Trait>::Data label;
7145 typename Delims::iterator lit;
7146
7147 WithPosition labelPos;
7148 std::tie(label, lit) = checkForLinkLabel(std::next(it), last, po, &labelPos);
7149
7150 const auto isLabelEmpty = toSingleLine(label).isEmpty();
7151
7152 if (lit != std::next(it)) {
7153 if (!isLabelEmpty
7154 && createShortcutLink(label,
7155 po,
7156 start->m_line,
7157 start->m_pos,
7158 start->m_line,
7159 start->m_pos + start->m_len,
7160 lit,
7161 text,
7162 true,
7163 labelPos,
7164 textPos)) {
7165 return lit;
7166 } else if (isLabelEmpty
7167 && createShortcutLink(text,
7168 po,
7169 start->m_line,
7170 start->m_pos,
7171 start->m_line,
7172 start->m_pos + start->m_len,
7173 lit,
7174 {},
7175 false,
7176 textPos,
7177 {})) {
7178 return lit;
7179 }
7180 } else if (createShortcutLink(text, po, start->m_line, start->m_pos, start->m_line,
7181 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
7182 return it;
7183 }
7184 } else {
7185 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
7186 }
7187 } else {
7188 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
7189 }
7190 }
7191
7192 return start;
7193}
7194
7195//! Close style.
7196template<class Trait>
7197inline void
7198closeStyle(std::vector<typename TextParsingOpts<Trait>::StyleInfo> &styles,
7199 Style s)
7200{
7201 const auto it = std::find_if(styles.crbegin(), styles.crend(), [&](const auto &p) {
7202 return (p.m_style == s);
7203 });
7204
7205 if (it != styles.crend()) {
7206 styles.erase(it.base() - 1);
7207 }
7208}
7209
7210//! Apply styles.
7211template<class Trait>
7212inline void
7213applyStyles(int &opts,
7214 std::vector<typename TextParsingOpts<Trait>::StyleInfo> &styles)
7215{
7216 opts = 0;
7217
7218 for (const auto &s : styles) {
7219 switch (s.m_style) {
7221 opts |= StrikethroughText;
7222 break;
7223
7224 case Style::Italic1:
7225 case Style::Italic2:
7226 opts |= ItalicText;
7227 break;
7228
7229 case Style::Bold1:
7230 case Style::Bold2:
7231 opts |= BoldText;
7232 break;
7233
7234 default:
7235 break;
7236 }
7237 }
7238}
7239
7240template<class Trait>
7241inline int
7242Parser<Trait>::emphasisToInt(typename Delimiter::DelimiterType t)
7243{
7244 switch (t) {
7245 case Delimiter::Strikethrough:
7246 return 0;
7247
7248 case Delimiter::Emphasis1:
7249 return 1;
7250
7251 case Delimiter::Emphasis2:
7252 return 2;
7253
7254 default:
7255 return -1;
7256 }
7257}
7258
7259template<class Trait>
7260inline void
7261Parser<Trait>::createStyles(std::vector<std::pair<Style, long long int>> & styles,
7262 typename Delimiter::DelimiterType t,
7263 long long int style)
7264{
7265 if (t != Delimiter::Strikethrough) {
7266 if (style % 2 == 1) {
7267 styles.push_back({t == Delimiter::Emphasis1 ? Style::Italic1 : Style::Italic2, 1});
7268 }
7269
7270 if (style >= 2) {
7271 for (long long int i = 0; i < style / 2; ++i) {
7272 styles.push_back({t == Delimiter::Emphasis1 ? Style::Bold1 : Style::Bold2, 2});
7273 }
7274 }
7275 } else {
7276 styles.push_back({Style::Strikethrough, style});
7277 }
7278}
7279
7280template<class Trait>
7281inline std::vector<std::pair<Style, long long int>>
7282Parser<Trait>::createStyles(typename Delimiter::DelimiterType t,
7283 const std::vector<long long int> &styles,
7284 long long int lastStyle)
7285{
7286 std::vector<std::pair<Style, long long int>> ret;
7287
7288 createStyles(ret, t, lastStyle);
7289
7290 for (auto it = styles.crbegin(), last = styles.crend(); it != last; ++it) {
7291 createStyles(ret, t, *it);
7292 }
7293
7294 return ret;
7295}
7296
7297template<class Trait>
7298inline bool
7299Parser<Trait>::isSequence(typename Delims::iterator it,
7300 long long int itLine,
7301 long long int itPos,
7302 typename Delimiter::DelimiterType t)
7303{
7304 return (itLine == it->m_line && itPos + it->m_len == it->m_pos && it->m_type == t);
7305}
7306
7307template<class Trait>
7308inline typename Parser<Trait>::Delims::iterator
7309Parser<Trait>::readSequence(typename Delims::iterator it,
7310 typename Delims::iterator last,
7311 long long int &line,
7312 long long int &pos,
7313 long long int &len,
7314 long long int &itCount)
7315{
7316 line = it->m_line;
7317 pos = it->m_pos;
7318 len = it->m_len;
7319 const auto t = it->m_type;
7320 itCount = 1;
7321
7322 it = std::next(it);
7323
7324 while (it != last && isSequence(it, line, pos, t)) {
7325 pos += it->m_len;
7326 len += it->m_len;
7327
7328 ++it;
7329 ++itCount;
7330 }
7331
7332 return std::prev(it);
7333}
7334
7335inline bool
7336isMult3(long long int i1, long long int i2)
7337{
7338 return ((((i1 + i2) % 3) == 0) && !((i1 % 3 == 0) && (i2 % 3 == 0)));
7339}
7340
7341template<class Trait>
7342inline std::tuple<bool, std::vector<std::pair<Style, long long int>>, long long int, long long int>
7343Parser<Trait>::isStyleClosed(typename Delims::iterator first,
7344 typename Delims::iterator it,
7345 typename Delims::iterator last,
7346 typename Delims::iterator &stackBottom,
7347 TextParsingOpts<Trait> &po)
7348{
7349 const auto open = it;
7350 long long int openPos, openLength, itCount, lengthFromIt, tmp;
7351
7352 it = std::next(readSequence(first, open, last, openPos, openLength, tmp, lengthFromIt, itCount).second);
7353
7354 const auto length = lengthFromIt;
7355 long long int itLine, itPos, itLength;
7356
7357 struct RollbackValues {
7358 RollbackValues(TextParsingOpts<Trait> &po,
7359 long long int line,
7360 long long int pos,
7361 bool collectRefLinks,
7362 typename Delims::iterator &stackBottom,
7363 typename Delims::iterator last)
7364 : m_po(po)
7365 , m_line(line)
7366 , m_pos(pos)
7367 , m_collectRefLinks(collectRefLinks)
7368 , m_stackBottom(stackBottom)
7369 , m_last(last)
7370 , m_it(m_last)
7371 {
7372 }
7373
7374 void setIterator(typename Delims::iterator it)
7375 {
7376 m_it = it;
7377 }
7378
7379 ~RollbackValues()
7380 {
7381 m_po.m_line = m_line;
7382 m_po.m_pos = m_pos;
7383 m_po.m_collectRefLinks = m_collectRefLinks;
7384
7385 if (m_it != m_last && (m_it > m_stackBottom || m_stackBottom == m_last)) {
7386 m_stackBottom = m_it;
7387 }
7388 }
7389
7390 TextParsingOpts<Trait> &m_po;
7391 long long int m_line;
7392 long long int m_pos;
7393 bool m_collectRefLinks;
7394 typename Delims::iterator &m_stackBottom;
7395 typename Delims::iterator m_last;
7396 typename Delims::iterator m_it;
7397 };
7398
7399 RollbackValues rollback(po, po.m_line, po.m_pos, po.m_collectRefLinks, stackBottom, last);
7400
7401 po.m_collectRefLinks = true;
7402
7403 std::vector<long long int> styles;
7404
7405 struct Opener {
7406 std::vector<typename Delims::iterator> m_its;
7407 long long int m_length;
7408 };
7409
7410 std::vector<Opener> openers;
7411
7412 std::function<void(long long int, long long int)> dropOpeners;
7413
7414 dropOpeners = [&openers](long long int pos, long long int line) {
7415 while (!openers.empty()) {
7416 if (openers.back().m_its.front()->m_line > line || (openers.back().m_its.front()->m_line == line &&
7417 openers.back().m_its.front()->m_pos > pos)) {
7418 std::for_each( openers.back().m_its.begin(), openers.back().m_its.end(),
7419 [](auto &i) { i->m_skip = true; });
7420 openers.pop_back();
7421 } else {
7422 break;
7423 }
7424 }
7425 };
7426
7427 auto tryCloseEmphasis = [&dropOpeners, this, &openers, &open](typename Delims::iterator first,
7428 typename Delims::iterator it,
7429 typename Delims::iterator last) -> bool
7430 {
7431 const auto type = it->m_type;
7432 const auto both = it->m_leftFlanking && it->m_rightFlanking;
7433 long long int tmp1, tmp2, tmp3, tmp4;
7434 long long int closeLength;
7435
7436 it = this->readSequence(first, it, last, tmp1, closeLength, tmp2, tmp3, tmp4).first;
7437 it = std::prev(it);
7438
7439 long long int tmpLength = closeLength;
7440
7441 for (;; --it) {
7442 switch (it->m_type) {
7443 case Delimiter::Strikethrough: {
7444 if (it->m_leftFlanking && it->m_len == closeLength && type == it->m_type) {
7445 dropOpeners(it->m_pos, it->m_line);
7446 return true;
7447 }
7448 } break;
7449
7450 case Delimiter::Emphasis1:
7451 case Delimiter::Emphasis2:
7452 {
7453 if (it->m_leftFlanking && type == it->m_type) {
7454 long long int pos, len;
7455 this->readSequence(first, it, last, pos, len, tmp1, tmp2, tmp3);
7456
7457 if ((both || (it->m_leftFlanking && it->m_rightFlanking)) && isMult3(len, closeLength)) {
7458 continue;
7459 }
7460
7461 dropOpeners(pos - len, it->m_line);
7462
7463 if (tmpLength >= len) {
7464 tmpLength -= len;
7465
7466 if (open->m_type == it->m_type) {
7467 openers.pop_back();
7468 }
7469
7470 if (!tmpLength) {
7471 return true;
7472 }
7473 } else {
7474 if (open->m_type == it->m_type) {
7475 openers.back().m_length -= tmpLength;
7476 }
7477
7478 return true;
7479 }
7480 }
7481 } break;
7482
7483 default:
7484 break;
7485 }
7486
7487 if (it == first) {
7488 break;
7489 }
7490 }
7491
7492 return false;
7493 };
7494
7495 auto fillIterators = [](typename Delims::iterator first,
7496 typename Delims::iterator last) -> std::vector<typename Delims::iterator>
7497 {
7498 std::vector<typename Delims::iterator> res;
7499
7500 for (; first != last; ++first) {
7501 res.push_back(first);
7502 }
7503
7504 res.push_back(last);
7505
7506 return res;
7507 };
7508
7509 for (; it != last; ++it) {
7510 if (it > stackBottom) {
7511 return {false, {{Style::Unknown, 0}}, open->m_len, 1};
7512 }
7513
7514 if (it->m_line <= po.m_lastTextLine) {
7515 po.m_line = it->m_line;
7516
7517 switch (it->m_type) {
7518 case Delimiter::SquareBracketsOpen:
7519 it = checkForLink(it, last, po);
7520 break;
7521
7522 case Delimiter::ImageOpen:
7523 it = checkForImage(it, last, po);
7524 break;
7525
7526 case Delimiter::Less:
7527 it = checkForAutolinkHtml(it, last, po, false);
7528 break;
7529
7530 case Delimiter::Strikethrough: {
7531 if (open->m_type == it->m_type && open->m_len == it->m_len && it->m_rightFlanking) {
7532 rollback.setIterator(it);
7533 return {true, createStyles(open->m_type, styles, open->m_len), open->m_len, 1};
7534 } else if (it->m_rightFlanking && tryCloseEmphasis(open, it, last)) {
7535 } else if (it->m_leftFlanking && open->m_type == it->m_type) {
7536 openers.push_back({{it}, it->m_len});
7537 }
7538 } break;
7539
7540 case Delimiter::Emphasis1:
7541 case Delimiter::Emphasis2: {
7542 if (open->m_type == it->m_type) {
7543 const auto itBoth = (it->m_leftFlanking && it->m_rightFlanking);
7544
7545 if (it->m_rightFlanking) {
7546 bool notCheck = (open->m_leftFlanking && open->m_rightFlanking) || itBoth;
7547
7548 long long int count;
7549 auto firstIt = it;
7550 it = readSequence(it, last, itLine, itPos, itLength, count);
7551
7552 if (notCheck) {
7553 notCheck = isMult3(openLength, itLength);
7554 }
7555
7556 if (!openers.empty()) {
7557 long long int i = openers.size() - 1;
7558 auto &top = openers[i];
7559
7560 while (!openers.empty()) {
7561 if (i >= 0) {
7562 top = openers[i];
7563 } else {
7564 break;
7565 }
7566
7567 if ((itBoth || (top.m_its.front()->m_rightFlanking && top.m_its.front()->m_leftFlanking))
7568 && isMult3(itLength, top.m_length)) {
7569 --i;
7570 continue;
7571 }
7572
7573 if (top.m_length <= itLength) {
7574 itLength -= top.m_length;
7575 openers.erase(openers.begin() + i);
7576 } else {
7577 top.m_length -= itLength;
7578 itLength = 0;
7579 }
7580
7581 --i;
7582
7583 if (!itLength) {
7584 break;
7585 }
7586 }
7587 }
7588
7589 if (itLength) {
7590 if (!notCheck) {
7591 if (itLength >= lengthFromIt) {
7592 rollback.setIterator(it);
7593 return {true, createStyles(open->m_type, styles, lengthFromIt), length, itCount};
7594 } else {
7595 styles.push_back(itLength);
7596 lengthFromIt -= itLength;
7597 }
7598 } else if (firstIt->m_leftFlanking) {
7599 openers.push_back({fillIterators(firstIt, it), itLength});
7600 }
7601 }
7602 } else {
7603 long long int count;
7604 auto firstIt = it;
7605 it = readSequence(it, last, itLine, itPos, itLength, count);
7606 openers.push_back({fillIterators(firstIt, it), itLength});
7607 }
7608 } else if (it->m_rightFlanking) {
7609 tryCloseEmphasis(open, it, last);
7610 }
7611 } break;
7612
7613 case Delimiter::InlineCode:
7614 it = checkForInlineCode(it, last, po);
7615 break;
7616
7617 default:
7618 break;
7619 }
7620 } else {
7621 break;
7622 }
7623 }
7624
7625 return {false, {{Style::Unknown, 0}}, open->m_len, 1};
7626}
7627
7628template<class Trait>
7629inline typename Parser<Trait>::Delims::iterator
7630Parser<Trait>::incrementIterator(typename Delims::iterator it,
7631 typename Delims::iterator last,
7632 long long int count)
7633{
7634 const auto len = std::distance(it, last);
7635
7636 if (count < len) {
7637 return it + count;
7638 } else {
7639 return it + (len - 1);
7640 }
7641}
7642
7643//! Append close style.
7644template<class Trait>
7645inline void
7647 const StyleDelim &s)
7648{
7649 if (po.m_lastItemWithStyle) {
7650 po.m_lastItemWithStyle->closeStyles().push_back(s);
7651 }
7652}
7653
7654template<class Trait>
7655inline std::pair<typename Parser<Trait>::Delims::iterator, typename Parser<Trait>::Delims::iterator>
7656Parser<Trait>::readSequence(typename Delims::iterator first,
7657 typename Delims::iterator it,
7658 typename Delims::iterator last,
7659 long long int &pos,
7660 long long int &length,
7661 long long int &itCount,
7662 long long int &lengthFromIt,
7663 long long int &itCountFromIt)
7664{
7665 long long int line = it->m_line;
7666 pos = it->m_pos + it->m_len;
7667 long long int ppos = it->m_pos;
7668 const auto t = it->m_type;
7669 lengthFromIt = it->m_len;
7670 itCountFromIt = 1;
7671
7672 auto retItLast = std::next(it);
7673
7674 for (; retItLast != last; ++retItLast) {
7675 if (retItLast->m_line == line && pos == retItLast->m_pos && retItLast->m_type == t) {
7676 lengthFromIt += retItLast->m_len;
7677 pos = retItLast->m_pos + retItLast->m_len;
7678 ++itCountFromIt;
7679 } else {
7680 break;
7681 }
7682 }
7683
7684 length = lengthFromIt;
7685 itCount = itCountFromIt;
7686
7687 auto retItFirst = it;
7688 bool useNext = false;
7689
7690 if (retItFirst != first) {
7691 retItFirst = std::prev(retItFirst);
7692 useNext = true;
7693
7694 for (;; --retItFirst) {
7695 if (retItFirst->m_line == line && ppos - retItFirst->m_len == retItFirst->m_pos && retItFirst->m_type == t) {
7696 length += retItFirst->m_len;
7697 ppos = retItFirst->m_pos;
7698 ++itCount;
7699 useNext = false;
7700 } else {
7701 useNext = true;
7702 break;
7703 }
7704
7705 if (retItFirst == first) {
7706 break;
7707 }
7708 }
7709 }
7710
7711 return {useNext ? std::next(retItFirst) : retItFirst, std::prev(retItLast)};
7712}
7713
7714template<class Trait>
7715inline typename Parser<Trait>::Delims::iterator
7716Parser<Trait>::checkForStyle(typename Delims::iterator first,
7717 typename Delims::iterator it,
7718 typename Delims::iterator last,
7719 typename Delims::iterator &stackBottom,
7720 TextParsingOpts<Trait> &po)
7721{
7722 long long int count = 1;
7723
7724 po.m_wasRefLink = false;
7725 po.m_firstInParagraph = false;
7726
7727 if (it->m_rightFlanking) {
7728 long long int pos, len, tmp1, tmp2;
7729 readSequence(first, it, last, pos, len, count, tmp1, tmp2);
7730 const auto t = it->m_type;
7731
7732 long long int opened = 0;
7733 bool bothFlanking = false;
7734
7735 for (auto it = po.m_styles.crbegin(), last = po.m_styles.crend(); it != last; ++it) {
7736 bool doBreak = false;
7737
7738 switch (t) {
7739 case Delimiter::Emphasis1: {
7740 if (it->m_style == Style::Italic1 || it->m_style == Style::Bold1) {
7741 opened = it->m_length;
7742 bothFlanking = it->m_bothFlanking;
7743 doBreak = true;
7744 }
7745 } break;
7746
7747 case Delimiter::Emphasis2: {
7748 if (it->m_style == Style::Italic2 || it->m_style == Style::Bold2) {
7749 opened = it->m_length;
7750 bothFlanking = it->m_bothFlanking;
7751 doBreak = true;
7752 }
7753 } break;
7754
7755 case Delimiter::Strikethrough: {
7756 if (it->m_style == Style::Strikethrough) {
7757 opened = it->m_length;
7758 bothFlanking = it->m_bothFlanking;
7759 doBreak = true;
7760 }
7761 } break;
7762
7763 default:
7764 break;
7765 }
7766
7767 if (doBreak)
7768 break;
7769 }
7770
7771 const bool sumMult3 = (it->m_leftFlanking || bothFlanking ? isMult3(opened, len) : false);
7772
7773 if (count && opened && !sumMult3) {
7774 if (count > opened) {
7775 count = opened;
7776 }
7777
7778 auto pos = po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos);
7779 const auto line = po.m_fr.m_data.at(it->m_line).second.m_lineNumber;
7780
7781 if (it->m_type == Delimiter::Strikethrough) {
7782 const auto len = it->m_len;
7783
7784 for (auto i = 0; i < count; ++i) {
7785 closeStyle<Trait>(po.m_styles, Style::Strikethrough);
7786 appendCloseStyle(po, {StrikethroughText, pos, line, pos + len - 1, line});
7787 pos += len;
7788 }
7789 } else {
7790 if (count % 2 == 1) {
7791 const auto st = (it->m_type == Delimiter::Emphasis1 ? Style::Italic1 : Style::Italic2);
7792
7793 closeStyle<Trait>(po.m_styles, st);
7794 appendCloseStyle(po, {ItalicText, pos, line, pos, line});
7795 ++pos;
7796 }
7797
7798 if (count >= 2) {
7799 const auto st = (it->m_type == Delimiter::Emphasis1 ? Style::Bold1 : Style::Bold2);
7800
7801 for (auto i = 0; i < count / 2; ++i) {
7802 closeStyle<Trait>(po.m_styles, st);
7803 appendCloseStyle(po, {BoldText, pos, line, pos + 1, line});
7804 pos += 2;
7805 }
7806 }
7807 }
7808
7809 applyStyles<Trait>(po.m_opts, po.m_styles);
7810
7811 const auto j = incrementIterator(it, last, count - 1);
7812
7813 po.m_pos = j->m_pos + j->m_len;
7814 po.m_line = j->m_line;
7815
7816 return j;
7817 }
7818 }
7819
7820 count = 1;
7821
7822 if (it->m_leftFlanking) {
7823 switch (it->m_type) {
7824 case Delimiter::Strikethrough:
7825 case Delimiter::Emphasis1:
7826 case Delimiter::Emphasis2: {
7827 bool closed = false;
7828 std::vector<std::pair<Style, long long int>> styles;
7829 long long int len = 0;
7830
7831 if (it > stackBottom) {
7832 stackBottom = last;
7833 }
7834
7835 if (it->m_skip) {
7836 closed = false;
7837 long long int tmp1, tmp2, tmp3;
7838 readSequence(it, last, tmp1, tmp2, len, tmp3);
7839 } else {
7840 std::tie(closed, styles, len, count) = isStyleClosed(first, it, last, stackBottom, po);
7841 }
7842
7843 if (closed) {
7844 auto pos = po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos);
7845 const auto line = po.m_fr.m_data.at(it->m_line).second.m_lineNumber;
7846
7847 for (const auto &p : styles) {
7848 po.m_styles.push_back({p.first, p.second, it->m_leftFlanking && it->m_rightFlanking});
7849
7850 if (!po.m_collectRefLinks) {
7851 po.m_openStyles.push_back({styleToTextOption(p.first), pos, line,
7852 pos + p.second - 1, line});
7853 }
7854
7855 pos += p.second;
7856 }
7857
7858 po.m_pos = it->m_pos + len;
7859 po.m_line = it->m_line;
7860
7861 applyStyles<Trait>(po.m_opts, po.m_styles);
7862 } else if (!po.m_collectRefLinks) {
7863 makeText(it->m_line, it->m_pos + len, po);
7864 }
7865 } break;
7866
7867 default: {
7868 if (!po.m_collectRefLinks) {
7869 makeText(it->m_line, it->m_pos + it->m_len, po);
7870 }
7871 } break;
7872 }
7873 }
7874
7875 if (!count) {
7876 count = 1;
7877 }
7878
7879 resetHtmlTag(po.m_html);
7880
7881 return incrementIterator(it, last, count - 1);
7882}
7883
7884//! Concatenate texts in block.
7885template<class Trait>
7886inline std::shared_ptr<Text<Trait>>
7889{
7890 std::shared_ptr<Text<Trait>> t(new Text<Trait>);
7891 t->setOpts(std::static_pointer_cast<Text<Trait>>(*it)->opts());
7892 t->setStartColumn((*it)->startColumn());
7893 t->setStartLine((*it)->startLine());
7894
7895 typename ItemWithOpts<Trait>::Styles close;
7896
7897 typename Trait::String data;
7898
7899 for (; it != last; ++it) {
7900 const auto tt = std::static_pointer_cast<Text<Trait>>(*it);
7901
7902 data.push_back(tt->text());
7903
7904 if (!tt->openStyles().empty()) {
7905 std::copy(tt->openStyles().cbegin(), tt->openStyles().cend(),
7906 std::back_inserter(t->openStyles()));
7907 }
7908
7909 if (!tt->closeStyles().empty()) {
7910 std::copy(tt->closeStyles().cbegin(), tt->closeStyles().cend(),
7911 std::back_inserter(close));
7912 }
7913 }
7914
7915 it = std::prev(it);
7916
7917 t->setText(data);
7918 t->setEndColumn((*it)->endColumn());
7919 t->setEndLine((*it)->endLine());
7920 t->closeStyles() = close;
7921
7922 return t;
7923}
7924
7925//! \return Is optimization type a semi one.
7926inline bool
7928{
7929 switch (t) {
7932 return true;
7933
7934 default:
7935 return false;
7936 }
7937}
7938
7939//! \return Is optimization type without raw data optimization?
7940inline bool
7942{
7943 switch (t) {
7946 return true;
7947
7948 default:
7949 return false;
7950 }
7951}
7952
7953//! Optimize Paragraph.
7954template<class Trait>
7955inline std::shared_ptr<Paragraph<Trait>>
7959{
7960 std::shared_ptr<Paragraph<Trait>> np(new Paragraph<Trait>);
7961 np->setStartColumn(p->startColumn());
7962 np->setStartLine(p->startLine());
7963 np->setEndColumn(p->endColumn());
7964 np->setEndLine(p->endLine());
7965
7966 int opts = TextWithoutFormat;
7967 auto start = p->items().cend();
7968 long long int line = -1;
7969 long long int auxStart = 0, auxIt = 0;
7970 bool finished = false;
7971
7972 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it) {
7973 if ((*it)->type() == ItemType::Text) {
7974 const auto t = std::static_pointer_cast<Text<Trait>>(*it);
7975
7976 if (start == last) {
7977 start = it;
7978 opts = t->opts();
7979 line = t->endLine();
7980 finished = (isSemiOptimization(type) && !t->closeStyles().empty());
7981 } else {
7982 if (opts != t->opts() || t->startLine() != line || finished ||
7983 (!t->openStyles().empty() && isSemiOptimization(type))) {
7984 if (!isWithoutRawDataOptimization(type)) {
7985 po.concatenateAuxText(auxStart, auxIt);
7986 auxIt = auxIt - (auxIt - auxStart) + 1;
7987 auxStart = auxIt;
7988 }
7989
7990 np->appendItem(concatenateText<Trait>(start, it));
7991 start = it;
7992 opts = t->opts();
7993 line = t->endLine();
7994 }
7995
7996 finished = (isSemiOptimization(type) && !t->closeStyles().empty());
7997 }
7998
8000 ++auxIt;
8001 } else {
8002 finished = false;
8003
8004 if (start != last) {
8005 if (!isWithoutRawDataOptimization(type)) {
8006 po.concatenateAuxText(auxStart, auxIt);
8007 auxIt = auxIt - (auxIt - auxStart) + 1;
8008 auxStart = auxIt;
8009 }
8010
8011 np->appendItem(concatenateText<Trait>(start, it));
8012 start = last;
8013 opts = TextWithoutFormat;
8014 line = (*it)->endLine();
8015 }
8016
8017 np->appendItem((*it));
8018 }
8019 }
8020
8021 if (start != p->items().cend()) {
8022 np->appendItem(concatenateText<Trait>(start, p->items().cend()));
8023
8024 if (!isWithoutRawDataOptimization(type)) {
8025 po.concatenateAuxText(auxStart, po.m_rawTextData.size());
8026 }
8027 }
8028
8029 p = np;
8030
8031 return p;
8032}
8033
8034template<class Trait>
8035inline void
8036Parser<Trait>::parseTableInParagraph(TextParsingOpts<Trait> &po,
8037 std::shared_ptr<Paragraph<Trait>> parent,
8038 std::shared_ptr<Document<Trait>> doc,
8039 typename Trait::StringList &linksToParse,
8040 const typename Trait::String &workingPath,
8041 const typename Trait::String &fileName,
8042 bool collectRefLinks)
8043{
8044 MdBlock<Trait> fr;
8045 std::copy(po.m_fr.m_data.cbegin() + po.m_startTableLine, po.m_fr.m_data.cend(),
8046 std::back_inserter(fr.m_data));
8047 fr.m_emptyLineAfter = po.m_fr.m_emptyLineAfter;
8048
8049 parseTable(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks,
8050 po.m_columnsCount);
8051
8052 po.m_line = po.m_fr.m_data.size() - fr.m_data.size();
8053 po.m_pos = 0;
8054
8055 if (!fr.m_data.empty()) {
8056 po.m_detected = TextParsingOpts<Trait>::Detected::Code;
8057 }
8058}
8059
8060//! Normalize position.
8061inline void
8062normalizePos(long long int &pos,
8063 long long int &line,
8064 long long int length,
8065 long long int linesCount)
8066{
8067 if (pos != 0 && line < linesCount && pos == length) {
8068 pos = 0;
8069 ++line;
8070 }
8071}
8072
8073template<class Trait>
8074inline bool
8075Parser<Trait>::isListOrQuoteAfterHtml(TextParsingOpts<Trait> &po)
8076{
8077 if (po.m_detected == TextParsingOpts<Trait>::Detected::HTML &&
8078 ((!po.m_parent->items().empty() &&
8079 po.m_parent->items().back()->type() == ItemType::RawHtml) || po.m_tmpHtml.get())) {
8080 auto html = (po.m_tmpHtml.get() ? po.m_tmpHtml :
8081 std::static_pointer_cast<RawHtml<Trait>>(po.m_parent->items().back()));
8082
8083 bool dontClearDetection = false;
8084
8085 long long int line = po.m_line;
8086 long long int pos = po.m_pos;
8087
8088 normalizePos(pos, line, line < static_cast<long long int>(po.m_fr.m_data.size()) ?
8089 po.m_fr.m_data[line].first.length() : 0, po.m_fr.m_data.size());
8090
8091 if (pos == 0) {
8092 if (line < static_cast<long long int>(po.m_fr.m_data.size())) {
8093 const auto type = whatIsTheLine(po.m_fr.m_data[line].first);
8094
8095 switch (type) {
8097 int num = 0;
8098
8099 if (isOrderedList<Trait>(po.m_fr.m_data[line].first.asString(), &num)) {
8100 if (num == 1)
8101 return true;
8102 } else {
8103 return true;
8104 }
8105 } break;
8106
8108 return true;
8109
8111 if (UnprotectedDocsMethods<Trait>::isFreeTag(html)) {
8112 return true;
8113 }
8114 } break;
8115
8117 dontClearDetection = true;
8118 break;
8119
8120 default:
8121 break;
8122 }
8123 }
8124 }
8125
8126 if (!dontClearDetection) {
8127 po.m_detected = TextParsingOpts<Trait>::Detected::Nothing;
8128 }
8129 }
8130
8131 po.m_tmpHtml.reset();
8132
8133 return false;
8134}
8135
8136//! Make Paragraph.
8137template<class Trait>
8138inline std::shared_ptr<Paragraph<Trait>>
8141{
8142 auto p = std::make_shared<Paragraph<Trait>>();
8143
8144 p->setStartColumn((*first)->startColumn());
8145 p->setStartLine((*first)->startLine());
8146
8147 for (; first != last; ++first) {
8148 p->appendItem(*first);
8149 p->setEndColumn((*first)->endColumn());
8150 p->setEndLine((*first)->endLine());
8151 }
8152
8153 return p;
8154}
8155
8156//! Split Paragraph and free HTML.
8157template<class Trait>
8158inline std::shared_ptr<Paragraph<Trait>>
8160 std::shared_ptr<Paragraph<Trait>> p,
8162 bool collectRefLinks,
8163 bool fullyOptimizeParagraphs = true)
8164{
8165 auto first = p->items().cbegin();
8166 auto it = first;
8167 auto last = p->items().cend();
8168
8169 for (; it != last; ++it) {
8170 if (first == last) {
8171 first = it;
8172 }
8173
8174 if ((*it)->type() == ItemType::RawHtml &&
8175 UnprotectedDocsMethods<Trait>::isFreeTag(std::static_pointer_cast<RawHtml<Trait>>(*it))) {
8176 auto p = makeParagraph<Trait>(first, it);
8177
8178 if (!collectRefLinks) {
8179 if (!p->isEmpty()) {
8180 parent->appendItem(optimizeParagraph<Trait>(p, po,
8181 fullyOptimizeParagraphs ?
8184 }
8185
8186 parent->appendItem(*it);
8187 }
8188
8189 first = last;
8190 }
8191 }
8192
8193 if (first != last) {
8194 if (first != p->items().cbegin()) {
8195 const auto c = std::count_if(first, last, [](const auto &i) {
8196 return (i->type() == MD::ItemType::Text);
8197 });
8198 po.m_rawTextData.erase(po.m_rawTextData.cbegin(), po.m_rawTextData.cbegin() +
8199 (po.m_rawTextData.size() - c));
8200
8201 return makeParagraph<Trait>(first, last);
8202 } else {
8203 return p;
8204 }
8205 } else {
8206 po.m_rawTextData.clear();
8207
8208 return std::make_shared<Paragraph<Trait>>();
8209 }
8210}
8211
8212//! \return Last virgin position of the item.
8213template<class Trait>
8214inline long long int
8216{
8217 switch (item->type()) {
8218 case ItemType::Text:
8219 case ItemType::Link:
8220 case ItemType::Image:
8222 case ItemType::RawHtml:
8223 {
8224 auto i = static_cast<ItemWithOpts<Trait> *>(item);
8225
8226 if (!i->closeStyles().empty()) {
8227 return i->closeStyles().back().endColumn();
8228 } else {
8229 return i->endColumn();
8230 }
8231 }
8232 break;
8233
8234 case ItemType::Code:
8235 case ItemType::Math:
8236 {
8237 auto c = static_cast<Code<Trait> *>(item);
8238
8239 if (!c->closeStyles().empty()) {
8240 return c->closeStyles().back().endColumn();
8241 } else {
8242 return c->endDelim().endColumn();
8243 }
8244 }
8245 break;
8246
8247 default:
8248 return -1;
8249 }
8250}
8251
8252//! Make heading.
8253template<class Trait>
8254inline void
8255makeHeading(std::shared_ptr<Block<Trait>> parent,
8256 std::shared_ptr<Document<Trait>> doc,
8257 std::shared_ptr<Paragraph<Trait>> p,
8258 long long int lastColumn,
8259 long long int lastLine,
8260 int level,
8261 const typename Trait::String &workingPath,
8262 const typename Trait::String &fileName,
8263 bool collectRefLinks,
8264 const WithPosition &delim,
8266{
8267 if (!collectRefLinks) {
8268 if (p->items().back()->type() == ItemType::LineBreak) {
8269 auto lb = std::static_pointer_cast<LineBreak<Trait>>(p->items().back());
8270 const auto lineBreakBySpaces = lb->text().simplified().isEmpty();
8271
8272 p = makeParagraph<Trait>(p->items().cbegin(), std::prev(p->items().cend()));
8273 const auto lineBreakPos = localPosFromVirgin(po.m_fr, lb->startColumn(), lb->startLine());
8274
8275 if (!p->isEmpty()) {
8276 if (p->items().back()->type() == ItemType::Text) {
8277 auto lt = std::static_pointer_cast<Text<Trait>>(p->items().back());
8278
8279 if (!lineBreakBySpaces) {
8280 auto text = po.m_fr.m_data.at(lineBreakPos.second).first.fullVirginString().sliced(
8281 lt->startColumn());
8282 po.m_rawTextData.back().m_str = text;
8283
8284 if (!lt->text()[0].isSpace()) {
8285 const auto notSpacePos = skipSpaces<Trait>(0, text);
8286
8287 text.remove(0, notSpacePos);
8288 }
8289
8291 }
8292
8293 lt->setEndColumn(lt->endColumn() + lb->text().length());
8294 } else {
8295 if (!lineBreakBySpaces) {
8296 const auto lastItemVirginPos = lastVirginPositionInParagraph<Trait>(p->items().back().get());
8297 const auto lastItemPos = localPosFromVirgin(po.m_fr, lastItemVirginPos, lineBreakPos.second);
8298 const auto endOfLine = po.m_fr.m_data.at(lineBreakPos.second).first.virginSubString(
8299 lastItemPos.first + 1);
8300 auto t = std::make_shared<Text<Trait>>();
8301 t->setText(endOfLine);
8302 t->setStartColumn(lastItemVirginPos + 1);
8303 t->setStartLine(lb->startLine());
8304 t->setEndColumn(lb->endColumn());
8305 t->setEndLine(lb->endLine());
8306
8307 p->appendItem(t);
8308
8309 const auto pos = localPosFromVirgin(po.m_fr, lb->startColumn(), lb->startLine());
8310
8311 po.m_rawTextData.push_back({lb->text(), pos.first, pos.second});
8312 }
8313 }
8314 }
8315 }
8316
8317 std::pair<typename Trait::String, WithPosition> label;
8318
8319 if (p->items().back()->type() == ItemType::Text) {
8320 auto t = std::static_pointer_cast<Text<Trait>>(p->items().back());
8321
8322 if (t->opts() == TextWithoutFormat) {
8323 auto text = po.m_rawTextData.back();
8324 typename Trait::InternalString tmp(text.m_str);
8326
8327 if (!label.first.isEmpty()) {
8328 label.first = label.first.sliced(1, label.first.length() - 2);
8329
8330 if (tmp.asString().simplified().isEmpty()) {
8331 p->removeItemAt(p->items().size() - 1);
8332 po.m_rawTextData.pop_back();
8333
8334 if (!p->items().empty()) {
8335 const auto last = std::static_pointer_cast<WithPosition>(p->items().back());
8336 p->setEndColumn(last->endColumn());
8337 p->setEndLine(last->endLine());
8338 }
8339 } else {
8340 const auto notSpacePos = tmp.virginPos(skipSpaces<Trait>(0, tmp.asString()));
8341 const auto virginLine = t->endLine();
8342
8343 if (label.second.startColumn() > notSpacePos) {
8344 auto text = tmp.fullVirginString().sliced(0, label.second.startColumn());
8345 po.m_rawTextData.back().m_str = text;
8346
8347 if (!t->text()[0].isSpace()) {
8348 const auto notSpacePos = skipSpaces<Trait>(0, text);
8349
8350 text.remove(0, notSpacePos);
8351 }
8352
8354 t->setEndColumn(label.second.startColumn() - 1);
8355
8356 const auto lastPos = t->endColumn();
8357 const auto pos = localPosFromVirgin(po.m_fr, label.second.endColumn() + 1, virginLine);
8358
8359 if (pos.first != -1) {
8360 t = std::make_shared<Text<Trait>>();
8361 t->setStartColumn(label.second.endColumn() + 1);
8362 t->setStartLine(virginLine);
8363 t->setEndColumn(lastPos);
8364 t->setEndLine(virginLine);
8365 p->appendItem(t);
8366
8367 po.m_rawTextData.push_back({po.m_fr.m_data[pos.second].first.asString().sliced(pos.first),
8368 pos.first, pos.second});
8369 }
8370 }
8371
8372 const auto pos = localPosFromVirgin(po.m_fr, label.second.endColumn() + 1, virginLine);
8373
8374 if (pos.first != -1) {
8375 po.m_rawTextData.back() = {po.m_fr.m_data[pos.second].first.asString().sliced(pos.first),
8376 pos.first, pos.second};
8377
8378 auto text = po.m_rawTextData.back().m_str;
8379
8380 if (!text.simplified().isEmpty()) {
8381 if (p->items().size() == 1) {
8382 const auto ns = skipSpaces<Trait>(0, text);
8383
8384 text.remove(0, ns);
8385 }
8386
8387 t->setStartColumn(label.second.endColumn() + 1);
8389 } else {
8390 po.m_rawTextData.pop_back();
8391 p->removeItemAt(p->items().size() - 1);
8392 }
8393 }
8394
8395 p->setEndColumn(t->endColumn());
8396 }
8397 } else {
8398 label.first.clear();
8399 }
8400
8401 label.second.setStartLine(t->startLine());
8402 label.second.setEndLine(t->endLine());
8403 }
8404 }
8405
8406 std::shared_ptr<Heading<Trait>> h(new Heading<Trait>);
8407 h->setStartColumn(p->startColumn());
8408 h->setStartLine(p->startLine());
8409 h->setEndColumn(lastColumn);
8410 h->setEndLine(lastLine);
8411 h->setLevel(level);
8412
8413 if (!p->items().empty()) {
8414 h->setText(p);
8415 }
8416
8417 h->setDelims({delim});
8418
8419 if (label.first.isEmpty() && !p->items().empty()) {
8420 label.first = Trait::latin1ToString("#") + paragraphToLabel(p.get());
8421 } else {
8422 h->setLabelPos(label.second);
8423 }
8424
8425 if (!label.first.isEmpty()) {
8426 label.first += Trait::latin1ToString("/") + (!workingPath.isEmpty() ?
8427 workingPath + Trait::latin1ToString("/") : typename Trait::String()) + fileName;
8428
8429 h->setLabel(label.first);
8430
8431 doc->insertLabeledHeading(label.first, h);
8432 }
8433
8434 parent->appendItem(h);
8435 }
8436}
8437
8438//! \return Index of text item for the given index in raw text data.
8439template<class Trait>
8440inline long long int
8441textAtIdx(std::shared_ptr<Paragraph<Trait>> p,
8442 size_t idx)
8443{
8444 size_t i = 0;
8445
8446 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it) {
8447 if ((*it)->type() == ItemType::Text) {
8448 if (i == idx) {
8449 return std::distance(p->items().cbegin(), it);
8450 }
8451
8452 ++i;
8453 }
8454 }
8455
8456 return -1;
8457}
8458
8459//! Process text plugins.
8460template<class Trait>
8461inline void
8464 const TextPluginsMap<Trait> &textPlugins,
8465 bool inLink)
8466{
8467 for (const auto &plugin : textPlugins) {
8468 if (inLink && !std::get<bool>(plugin.second)) {
8469 continue;
8470 }
8471
8472 std::get<TextPluginFunc<Trait>>(plugin.second)(p, po,
8473 std::get<typename Trait::StringList>(plugin.second));
8474 }
8475}
8476
8477//! Make horizontal line.
8478template<class Trait>
8479inline void
8481 std::shared_ptr<Block<Trait>> parent)
8482{
8483 std::shared_ptr<Item<Trait>> hr(new HorizontalLine<Trait>);
8484 hr->setStartColumn(line.first.virginPos(skipSpaces<Trait>(0, line.first.asString())));
8485 hr->setStartLine(line.second.m_lineNumber);
8486 hr->setEndColumn(line.first.virginPos(line.first.length() - 1));
8487 hr->setEndLine(line.second.m_lineNumber);
8488 parent->appendItem(hr);
8489}
8490
8491template<class Trait>
8492inline void
8494 std::shared_ptr<Block<Trait>> parent,
8495 std::shared_ptr<Document<Trait>> doc,
8496 typename Trait::StringList &linksToParse,
8497 const typename Trait::String &workingPath,
8498 const typename Trait::String &fileName,
8499 bool collectRefLinks,
8500 bool ignoreLineBreak,
8501 RawHtmlBlock<Trait> &html,
8502 bool inLink)
8503
8504{
8505 if (fr.m_data.empty()) {
8506 return;
8507 }
8508
8509 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
8510 p->setStartColumn(fr.m_data.at(0).first.virginPos(0));
8511 p->setStartLine(fr.m_data.at(0).second.m_lineNumber);
8512 std::shared_ptr<Paragraph<Trait>> pt(new Paragraph<Trait>);
8513
8514 auto delims = collectDelimiters(fr.m_data);
8515
8516 TextParsingOpts<Trait> po = {fr, p, nullptr, doc, linksToParse, workingPath, fileName,
8517 collectRefLinks, ignoreLineBreak, html, m_textPlugins};
8518 typename Delims::iterator styleStackBottom = delims.end();
8519
8520 if (!delims.empty()) {
8521 for (auto it = delims.begin(), last = delims.end(); it != last; ++it) {
8522 if (html.m_html.get() && html.m_continueHtml) {
8523 it = finishRawHtmlTag(it, last, po, false);
8524 } else {
8525 if (isListOrQuoteAfterHtml(po)) {
8526 break;
8527 }
8528
8529 if (po.m_line > po.m_lastTextLine) {
8530 checkForTableInParagraph(po, fr.m_data.size() - 1);
8531 }
8532
8533 if (po.shouldStopParsing() && po.m_lastTextLine < it->m_line) {
8534 break;
8535 } else if (!collectRefLinks) {
8536 makeText(po.m_lastTextLine < it->m_line ? po.m_lastTextLine : it->m_line,
8537 po.m_lastTextLine < it->m_line ? po.m_lastTextPos : it->m_pos, po);
8538 } else {
8539 const auto prevLine = po.m_line;
8540
8541 po.m_line = (po.m_lastTextLine < it->m_line ? po.m_lastTextLine : it->m_line);
8542 po.m_pos = (po.m_lastTextLine < it->m_line ? po.m_lastTextPos : it->m_pos);
8543
8544 if (po.m_line > prevLine) {
8545 po.m_firstInParagraph = false;
8546 } else if (po.m_pos > skipSpaces<Trait>(0, po.m_fr.m_data[po.m_line].first.asString())) {
8547 po.m_firstInParagraph = false;
8548 }
8549 }
8550
8551 switch (it->m_type) {
8552 case Delimiter::SquareBracketsOpen: {
8553 it = checkForLink(it, last, po);
8554 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8555 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8556 } break;
8557
8558 case Delimiter::ImageOpen: {
8559 it = checkForImage(it, last, po);
8560 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8561 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8562 } break;
8563
8564 case Delimiter::Less: {
8565 it = checkForAutolinkHtml(it, last, po, true);
8566
8567 if (!html.m_html.get()) {
8568 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8569 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8570 }
8571 } break;
8572
8573 case Delimiter::Strikethrough:
8574 case Delimiter::Emphasis1:
8575 case Delimiter::Emphasis2: {
8576 if (!collectRefLinks) {
8577 it = checkForStyle(delims.begin(), it, last, styleStackBottom, po);
8578 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8579 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8580 }
8581 } break;
8582
8583 case Delimiter::Math: {
8584 it = checkForMath(it, last, po);
8585 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8586 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8587 } break;
8588
8589 case Delimiter::InlineCode: {
8590 if (!it->m_backslashed) {
8591 it = checkForInlineCode(it, last, po);
8592 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8593 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8594 }
8595 } break;
8596
8597 case Delimiter::HorizontalLine: {
8598 po.m_wasRefLink = false;
8599 po.m_firstInParagraph = false;
8600
8601 const auto pos = skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString());
8602 const auto withoutSpaces = po.m_fr.m_data[it->m_line].first.asString().sliced(pos);
8603
8604 auto h2 = isH2<Trait>(withoutSpaces);
8605
8606 if (!p->isEmpty()) {
8607 optimizeParagraph<Trait>(p, po, OptimizeParagraphType::Semi);
8608
8609 checkForTextPlugins<Trait>(p, po, m_textPlugins, inLink);
8610
8611 if (it->m_line - 1 >= 0) {
8612 p->setEndColumn(fr.m_data.at(it->m_line - 1).first.virginPos(
8613 fr.m_data.at(it->m_line - 1).first.length() - 1));
8614 p->setEndLine(fr.m_data.at(it->m_line - 1).second.m_lineNumber);
8615 }
8616
8617 p = splitParagraphsAndFreeHtml(parent, p, po, collectRefLinks, m_fullyOptimizeParagraphs);
8618
8619 if (!p->isEmpty()) {
8620 if (!collectRefLinks) {
8621 if (!h2 || (p->items().size() == 1 &&
8622 p->items().front()->type() == ItemType::LineBreak)) {
8623 parent->appendItem(p);
8624
8625 h2 = false;
8626 } else {
8627 makeHeading(parent,
8628 doc,
8629 optimizeParagraph<Trait>(p, po, defaultParagraphOptimization()),
8630 fr.m_data[it->m_line].first.virginPos(it->m_pos + it->m_len - 1),
8631 fr.m_data[it->m_line].second.m_lineNumber,
8632 2,
8633 workingPath,
8634 fileName,
8635 collectRefLinks,
8636 {po.m_fr.m_data[it->m_line].first.virginPos(pos),
8637 fr.m_data[it->m_line].second.m_lineNumber,
8638 po.m_fr.m_data[it->m_line].first.virginPos(
8639 lastNonSpacePos(po.m_fr.m_data[it->m_line].first.asString())),
8640 fr.m_data[it->m_line].second.m_lineNumber},
8641 po);
8642
8643 po.m_checkLineOnNewType = true;
8644 }
8645 }
8646 } else {
8647 h2 = false;
8648 }
8649 } else {
8650 h2 = false;
8651 }
8652
8653 p.reset(new Paragraph<Trait>);
8654 po.m_rawTextData.clear();
8655
8656 if (it->m_line + 1 < static_cast<long long int>(fr.m_data.size())) {
8657 p->setStartColumn(fr.m_data.at(it->m_line + 1).first.virginPos(0));
8658 p->setStartLine(fr.m_data.at(it->m_line + 1).second.m_lineNumber);
8659 }
8660
8661 po.m_parent = p;
8662 po.m_line = it->m_line;
8663 po.m_pos = it->m_pos + it->m_len;
8664
8665 if (!h2 && !collectRefLinks) {
8666 makeHorLine<Trait>(fr.m_data[it->m_line], parent);
8667 }
8668 } break;
8669
8670 case Delimiter::H1:
8671 case Delimiter::H2: {
8672 po.m_wasRefLink = false;
8673 po.m_firstInParagraph = false;
8674
8675 optimizeParagraph<Trait>(p, po, OptimizeParagraphType::Semi);
8676
8677 checkForTextPlugins<Trait>(p, po, m_textPlugins, inLink);
8678
8679 if (it->m_line - 1 >= 0) {
8680 p->setEndColumn(fr.m_data.at(it->m_line - 1).first.virginPos(
8681 fr.m_data.at(it->m_line - 1).first.length() - 1));
8682 p->setEndLine(fr.m_data.at(it->m_line - 1).second.m_lineNumber);
8683 }
8684
8685 p = splitParagraphsAndFreeHtml(parent, p, po, collectRefLinks,
8686 m_fullyOptimizeParagraphs);
8687
8688 if (!p->isEmpty() && !((p->items().size() == 1 &&
8689 p->items().front()->type() == ItemType::LineBreak))) {
8690 makeHeading(parent,
8691 doc,
8692 optimizeParagraph<Trait>(p, po, defaultParagraphOptimization()),
8693 fr.m_data[it->m_line].first.virginPos(it->m_pos + it->m_len - 1),
8694 fr.m_data[it->m_line].second.m_lineNumber,
8695 it->m_type == Delimiter::H1 ? 1 : 2,
8696 workingPath,
8697 fileName,
8698 collectRefLinks,
8699 {po.m_fr.m_data[it->m_line].first.virginPos(skipSpaces<Trait>(
8700 0, po.m_fr.m_data[it->m_line].first.asString())),
8701 fr.m_data[it->m_line].second.m_lineNumber,
8702 po.m_fr.m_data[it->m_line].first.virginPos(lastNonSpacePos(
8703 po.m_fr.m_data[it->m_line].first.asString())),
8704 fr.m_data[it->m_line].second.m_lineNumber},
8705 po);
8706
8707 po.m_checkLineOnNewType = true;
8708
8709 p.reset(new Paragraph<Trait>);
8710 po.m_rawTextData.clear();
8711
8712 if (it->m_line + 1 < static_cast<long long int>(fr.m_data.size())) {
8713 p->setStartColumn(fr.m_data.at(it->m_line + 1).first.virginPos(0));
8714 p->setStartLine(fr.m_data.at(it->m_line + 1).second.m_lineNumber);
8715 }
8716
8717 po.m_line = it->m_line;
8718 po.m_pos = it->m_pos + it->m_len;
8719 } else if (p->startColumn() == -1) {
8720 p->setStartColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
8721 p->setStartLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8722 }
8723
8724 po.m_parent = p;
8725 } break;
8726
8727 default: {
8728 if (!po.shouldStopParsing()) {
8729 po.m_wasRefLink = false;
8730 po.m_firstInParagraph = false;
8731
8732 if (!collectRefLinks) {
8733 makeText(it->m_line, it->m_pos + it->m_len, po);
8734 } else {
8735 po.m_line = it->m_line;
8736 po.m_pos = it->m_pos + it->m_len;
8737 }
8738 }
8739 } break;
8740 }
8741
8742 if (po.shouldStopParsing()) {
8743 break;
8744 }
8745
8746 if (po.m_checkLineOnNewType) {
8747 if (po.m_line + 1 < static_cast<long long int>(po.m_fr.m_data.size())) {
8748 const auto type = Parser<Trait>::whatIsTheLine(po.m_fr.m_data[po.m_line + 1].first);
8749
8751 po.m_detected = TextParsingOpts<Trait>::Detected::Code;
8752
8753 break;
8754 }
8755 }
8756
8757 po.m_checkLineOnNewType = false;
8758 }
8759 }
8760 }
8761 } else {
8762 if (html.m_html.get() && html.m_continueHtml) {
8763 finishRawHtmlTag(delims.end(), delims.end(), po, false);
8764 }
8765 }
8766
8767 if (po.m_lastTextLine == -1) {
8768 checkForTableInParagraph(po, po.m_fr.m_data.size() - 1);
8769 }
8770
8771 if (po.m_detected == TextParsingOpts<Trait>::Detected::Table) {
8772 if (!collectRefLinks) {
8773 makeText(po.m_lastTextLine, po.m_lastTextPos, po);
8774 }
8775
8776 parseTableInParagraph(po, pt, doc, linksToParse, workingPath, fileName, collectRefLinks);
8777 }
8778
8779 while (po.m_detected == TextParsingOpts<Trait>::Detected::HTML &&
8780 po.m_line < static_cast<long long int>(po.m_fr.m_data.size())) {
8781 if (!isListOrQuoteAfterHtml(po)) {
8782 if (!collectRefLinks) {
8783 makeText(po.m_line, po.m_fr.m_data[po.m_line].first.length(), po);
8784 }
8785
8786 po.m_pos = 0;
8787 ++po.m_line;
8788 } else {
8789 break;
8790 }
8791 }
8792
8793 if (po.m_detected == TextParsingOpts<Trait>::Detected::Nothing &&
8794 po.m_line <= static_cast<long long int>(po.m_fr.m_data.size() - 1)) {
8795 if (!collectRefLinks) {
8796 makeText(po.m_fr.m_data.size() - 1, po.m_fr.m_data.back().first.length(), po);
8797 }
8798 }
8799
8800 if (!p->isEmpty()) {
8801 optimizeParagraph<Trait>(p, po, OptimizeParagraphType::Semi);
8802
8803 checkForTextPlugins<Trait>(p, po, m_textPlugins, inLink);
8804
8805 p = splitParagraphsAndFreeHtml(parent, p, po, collectRefLinks, m_fullyOptimizeParagraphs);
8806
8807 if (!p->isEmpty() && !collectRefLinks) {
8808 parent->appendItem(optimizeParagraph<Trait>(p, po, defaultParagraphOptimization()));
8809 }
8810
8811 po.m_rawTextData.clear();
8812 }
8813
8814 if (!pt->isEmpty() && !collectRefLinks) {
8815 parent->appendItem(pt->items().front());
8816 }
8817
8818 normalizePos(po.m_pos, po.m_line, po.m_line < static_cast<long long int>(po.m_fr.m_data.size()) ?
8819 po.m_fr.m_data[po.m_line].first.length() : 0, po.m_fr.m_data.size());
8820
8821 if (po.m_detected != TextParsingOpts<Trait>::Detected::Nothing &&
8822 po.m_line < static_cast<long long int>(po.m_fr.m_data.size())) {
8823 typename MdBlock<Trait>::Data tmp;
8824 std::copy(fr.m_data.cbegin() + po.m_line, fr.m_data.cend(), std::back_inserter(tmp));
8825
8826 StringListStream<Trait> stream(tmp);
8827
8828 Parser<Trait>::parse(stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
8829 }
8830}
8831
8832template<class Trait>
8833inline void
8834Parser<Trait>::parseFootnote(MdBlock<Trait> &fr,
8835 std::shared_ptr<Block<Trait>>,
8836 std::shared_ptr<Document<Trait>> doc,
8837 typename Trait::StringList &linksToParse,
8838 const typename Trait::String &workingPath,
8839 const typename Trait::String &fileName,
8840 bool collectRefLinks)
8841{
8842 {
8843 const auto it = (std::find_if(fr.m_data.rbegin(), fr.m_data.rend(), [](const auto &s) {
8844 return !s.first.isEmpty();
8845 })).base();
8846
8847 if (it != fr.m_data.end()) {
8848 fr.m_data.erase(it, fr.m_data.end());
8849 }
8850 }
8851
8852 if (!fr.m_data.empty()) {
8853 std::shared_ptr<Footnote<Trait>> f(new Footnote<Trait>);
8854 f->setStartColumn(fr.m_data.front().first.virginPos(0));
8855 f->setStartLine(fr.m_data.front().second.m_lineNumber);
8856 f->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
8857 f->setEndLine(fr.m_data.back().second.m_lineNumber);
8858
8859 auto delims = collectDelimiters(fr.m_data);
8860
8861 RawHtmlBlock<Trait> html;
8862
8863 TextParsingOpts<Trait> po = {fr, f, nullptr, doc, linksToParse, workingPath, fileName,
8864 collectRefLinks, false, html, m_textPlugins};
8865 po.m_lastTextLine = fr.m_data.size();
8866 po.m_lastTextPos = fr.m_data.back().first.length();
8867
8868 if (!delims.empty() && delims.cbegin()->m_type == Delimiter::SquareBracketsOpen &&
8869 !delims.cbegin()->m_isWordBefore) {
8870 typename MdBlock<Trait>::Data id;
8871 typename Delims::iterator it = delims.end();
8872
8873 po.m_line = delims.cbegin()->m_line;
8874 po.m_pos = delims.cbegin()->m_pos;
8875
8876 std::tie(id, it) = checkForLinkText(delims.begin(), delims.end(), po);
8877
8878 if (!toSingleLine(id).isEmpty() &&
8879 id.front().first.asString().startsWith(Trait::latin1ToString("^")) &&
8880 it != delims.cend() &&
8881 fr.m_data.at(it->m_line).first.length() > it->m_pos + 2 &&
8882 fr.m_data.at(it->m_line).first[it->m_pos + 1] == Trait::latin1ToChar(':') &&
8883 fr.m_data.at(it->m_line).first[it->m_pos + 2].isSpace()) {
8884 f->setIdPos({fr.m_data[delims.cbegin()->m_line].first.virginPos(delims.cbegin()->m_pos),
8885 fr.m_data[delims.cbegin()->m_line].second.m_lineNumber,
8886 fr.m_data.at(it->m_line).first.virginPos(it->m_pos + 1),
8887 fr.m_data.at(it->m_line).second.m_lineNumber});
8888
8889 {
8890 typename MdBlock<Trait>::Data tmp;
8891 std::copy(fr.m_data.cbegin() + it->m_line, fr.m_data.cend(),
8892 std::back_inserter(tmp));
8893 fr.m_data = tmp;
8894 }
8895
8896 fr.m_data.front().first = fr.m_data.front().first.sliced(it->m_pos + 3);
8897
8898 for (auto it = fr.m_data.begin(), last = fr.m_data.end(); it != last; ++it) {
8899 if (it->first.asString().startsWith(Trait::latin1ToString(" "))) {
8900 it->first = it->first.sliced(4);
8901 }
8902 }
8903
8904 StringListStream<Trait> stream(fr.m_data);
8905
8906 parse(stream, f, doc, linksToParse, workingPath, fileName, collectRefLinks);
8907
8908 if (!f->isEmpty()) {
8909 doc->insertFootnote(Trait::latin1ToString("#") + toSingleLine(id) +
8910 Trait::latin1ToString("/") + (!workingPath.isEmpty() ?
8911 workingPath + Trait::latin1ToString("/") : typename Trait::String()) + fileName,
8912 f);
8913 }
8914 }
8915 }
8916 }
8917}
8918
8919template<class Trait>
8920inline long long int
8921Parser<Trait>::parseBlockquote(MdBlock<Trait> &fr,
8922 std::shared_ptr<Block<Trait>> parent,
8923 std::shared_ptr<Document<Trait>> doc,
8924 typename Trait::StringList &linksToParse,
8925 const typename Trait::String &workingPath,
8926 const typename Trait::String &fileName,
8927 bool collectRefLinks,
8928 RawHtmlBlock<Trait> &)
8929{
8930 const long long int pos = fr.m_data.front().first.asString().indexOf(Trait::latin1ToChar('>'));
8931 long long int extra = 0;
8932
8933 long long int line = -1;
8934
8935 if (pos > -1) {
8936 typename Blockquote<Trait>::Delims delims;
8937
8938 long long int i = 0, j = 0;
8939
8940 BlockType bt = BlockType::EmptyLine;
8941
8942 for (auto it = fr.m_data.begin(), last = fr.m_data.end(); it != last; ++it, ++i) {
8943 const auto ns = skipSpaces<Trait>(0, it->first.asString());
8944 const auto gt = (ns < it->first.length() ? (it->first[ns] == Trait::latin1ToChar('>') ? ns : -1) : -1);
8945
8946 if (gt > -1) {
8947 const auto dp = it->first.virginPos(gt);
8948 delims.push_back({dp, it->second.m_lineNumber, dp, it->second.m_lineNumber});
8949
8950 if (it == fr.m_data.begin()) {
8951 extra = gt + (it->first.length() > gt + 1 ?
8952 (it->first[gt + 1] == Trait::latin1ToChar(' ') ? 1 : 0) : 0) + 1;
8953 }
8954
8955 it->first = it->first.sliced(gt + (it->first.length() > gt + 1 ?
8956 (it->first[gt + 1] == Trait::latin1ToChar(' ') ? 1 : 0) : 0) + 1);
8957
8958 bt = whatIsTheLine(it->first);
8959 }
8960 // Process lazyness...
8961 else {
8962 if (ns < 4 && isHorizontalLine<Trait>(it->first.asString().sliced(ns))) {
8963 line = it->second.m_lineNumber;
8964 break;
8965 }
8966
8967 const auto tmpBt = whatIsTheLine(it->first);
8968
8969 if (isListType(tmpBt)) {
8970 line = it->second.m_lineNumber;
8971 break;
8972 }
8973
8974 if (bt == BlockType::Text) {
8975 if (isH1<Trait>(it->first.asString())) {
8976 const auto p = it->first.asString().indexOf(Trait::latin1ToChar('='));
8977
8978 it->first.insert(p, Trait::latin1ToChar('\\'));
8979
8980 continue;
8981 } else if (isH2<Trait>(it->first.asString())) {
8982 const auto p = it->first.asString().indexOf(Trait::latin1ToChar('-'));
8983
8984 it->first.insert(p, Trait::latin1ToChar('\\'));
8985
8986 continue;
8987 }
8988 } else if ((bt == BlockType::Code || bt == BlockType::CodeIndentedBySpaces) &&
8989 it->second.m_mayBreakList) {
8990 line = it->second.m_lineNumber;
8991 break;
8992 }
8993
8994 if ((bt == BlockType::Text || bt == BlockType::Blockquote || bt == BlockType::List)
8995 && (tmpBt == BlockType::Text || tmpBt == BlockType::CodeIndentedBySpaces)) {
8996 continue;
8997 } else {
8998 line = it->second.m_lineNumber;
8999 break;
9000 }
9001 }
9002 }
9003
9004 typename MdBlock<Trait>::Data tmp;
9005
9006 for (; j < i; ++j) {
9007 tmp.push_back(fr.m_data.at(j));
9008 }
9009
9010 StringListStream<Trait> stream(tmp);
9011
9012 std::shared_ptr<Blockquote<Trait>> bq(new Blockquote<Trait>);
9013 bq->setStartColumn(fr.m_data.at(0).first.virginPos(0) - extra);
9014 bq->setStartLine(fr.m_data.at(0).second.m_lineNumber);
9015 bq->setEndColumn(fr.m_data.at(j - 1).first.virginPos(fr.m_data.at(j - 1).first.length() - 1));
9016 bq->setEndLine(fr.m_data.at(j - 1).second.m_lineNumber);
9017 bq->delims() = delims;
9018
9019 parse(stream, bq, doc, linksToParse, workingPath, fileName, collectRefLinks);
9020
9021 if (!collectRefLinks) {
9022 parent->appendItem(bq);
9023 }
9024 }
9025
9026 return line;
9027}
9028
9029//! \return Is the given string a new list item.
9030template<class Trait>
9031inline bool
9032isListItemAndNotNested(const typename Trait::String &s,
9033 long long int indent)
9034{
9035 long long int p = skipSpaces<Trait>(0, s);
9036
9037 if (p >= indent || p == s.size()) {
9038 return false;
9039 }
9040
9041 bool space = false;
9042
9043 if (p + 1 >= s.size()) {
9044 space = true;
9045 } else {
9046 space = s[p + 1].isSpace();
9047 }
9048
9049 if (p < 4) {
9050 if (s[p] == Trait::latin1ToChar('*') && space) {
9051 return true;
9052 } else if (s[p] == Trait::latin1ToChar('-') && space) {
9053 return true;
9054 } else if (s[p] == Trait::latin1ToChar('+') && space) {
9055 return true;
9056 } else {
9057 return isOrderedList<Trait>(s);
9058 }
9059 } else
9060 return false;
9061}
9062
9063//! \return Indent.
9064template<class Trait>
9065inline std::pair<long long int, long long int>
9066calculateIndent(const typename Trait::String &s,
9067 long long int p)
9068{
9069 return {0, skipSpaces<Trait>(p, s)};
9070}
9071
9072//! \return List item data.
9073template<class Trait>
9074inline std::tuple<bool, long long int, typename Trait::Char, bool>
9075listItemData(const typename Trait::String &s,
9076 bool wasText)
9077{
9078 long long int p = skipSpaces<Trait>(0, s);
9079
9080 if (p == s.size()) {
9081 return {false, 0, typename Trait::Char(), false};
9082 }
9083
9084 bool space = false;
9085
9086 if (p + 1 >= s.size()) {
9087 space = true;
9088 } else {
9089 space = s[p + 1].isSpace();
9090 }
9091
9092 if (p < 4) {
9093 if (s[p] == Trait::latin1ToChar('*') && space) {
9094 return {true, p + 2, Trait::latin1ToChar('*'),
9095 p + 2 < s.size() ? !s.sliced(p + 2).isEmpty() : false};
9096 } else if (s[p] == Trait::latin1ToChar('-')) {
9097 if (isH2<Trait>(s) && wasText) {
9098 return {false, p + 2, Trait::latin1ToChar('-'), false};
9099 } else if (space) {
9100 return {true, p + 2, Trait::latin1ToChar('-'),
9101 p + 2 < s.size() ? !s.sliced(p + 2).isEmpty() : false};
9102 }
9103 } else if (s[p] == Trait::latin1ToChar('+') && space) {
9104 return {true, p + 2, Trait::latin1ToChar('+'),
9105 p + 2 < s.size() ? !s.sliced(p + 2).isEmpty() : false};
9106 } else {
9107 int d = 0, l = 0;
9108 typename Trait::Char c;
9109
9110 if (isOrderedList<Trait>(s, &d, &l, &c)) {
9111 return {true, p + l + 2, c,
9112 p + l + 2 < s.size() ? !s.sliced(p + l + 2).isEmpty() : false};
9113 } else {
9114 return {false, 0, typename Trait::Char(), false};
9115 }
9116 }
9117 }
9118
9119 return {false, 0, typename Trait::Char(), false};
9120}
9121
9122//! Set last position of the item.
9123template<class Trait>
9124inline void
9125setLastPos(std::shared_ptr<Item<Trait>> item,
9126 long long int pos,
9127 long long int line)
9128{
9129 item->setEndColumn(pos);
9130 item->setEndLine(line);
9131}
9132
9133//! Update last position of all parent.
9134template<class Trait>
9135inline void
9137{
9138 if (html.m_parent != html.m_topParent) {
9139 const auto it = html.m_toAdjustLastPos.find(html.m_parent);
9140
9141 if (it != html.m_toAdjustLastPos.end()) {
9142 for (auto &i : it->second) {
9143 i.first->setEndColumn(html.m_html->endColumn());
9144 i.first->setEndLine(html.m_html->endLine());
9145 }
9146 }
9147 }
9148}
9149
9150template<class Trait>
9151inline long long int
9152Parser<Trait>::parseList(MdBlock<Trait> &fr,
9153 std::shared_ptr<Block<Trait>> parent,
9154 std::shared_ptr<Document<Trait>> doc,
9155 typename Trait::StringList &linksToParse,
9156 const typename Trait::String &workingPath,
9157 const typename Trait::String &fileName,
9158 bool collectRefLinks,
9159 RawHtmlBlock<Trait> &html)
9160{
9161 bool resetTopParent = false;
9162 long long int line = -1;
9163
9164 if (!html.m_topParent) {
9165 html.m_topParent = parent;
9166 resetTopParent = true;
9167 }
9168
9169 const auto p = skipSpaces<Trait>(0, fr.m_data.front().first.asString());
9170
9171 if (p != fr.m_data.front().first.length()) {
9172 std::shared_ptr<List<Trait>> list(new List<Trait>);
9173
9174 typename MdBlock<Trait>::Data listItem;
9175 auto it = fr.m_data.begin();
9176 listItem.push_back(*it);
9177 list->setStartColumn(it->first.virginPos(p));
9178 list->setStartLine(it->second.m_lineNumber);
9179 ++it;
9180
9181 long long int indent = 0;
9182 typename Trait::Char marker;
9183
9184 std::tie(std::ignore, indent, marker, std::ignore) =
9185 listItemData<Trait>(listItem.front().first.asString(), false);
9186
9187 html.m_blocks.push_back({list, list->startColumn() + indent});
9188
9189 if (!collectRefLinks) {
9190 html.m_toAdjustLastPos.insert({list, html.m_blocks});
9191 }
9192
9193 bool updateIndent = false;
9194
9195 auto addListMakeNew = [&]() {
9196 if (!list->isEmpty() && !collectRefLinks) {
9197 parent->appendItem(list);
9198 }
9199
9200 html.m_blocks.pop_back();
9201
9202 list.reset(new List<Trait>);
9203
9204 html.m_blocks.push_back({list, indent});
9205
9206 if (!collectRefLinks) {
9207 html.m_toAdjustLastPos.insert({list, html.m_blocks});
9208 }
9209 };
9210
9211 auto processLastHtml = [&](std::shared_ptr<ListItem<Trait>> resItem) {
9212 if (html.m_html && resItem) {
9213 html.m_parent = (resItem->startLine() == html.m_html->startLine() ||
9214 html.m_html->startColumn() >= resItem->startColumn() + indent ?
9215 resItem : html.findParent(html.m_html->startColumn()));
9216
9217 if (!html.m_parent) {
9218 html.m_parent = html.m_topParent;
9219 }
9220
9221 if (html.m_parent != resItem) {
9222 addListMakeNew();
9223 }
9224
9225 const auto continueHtml = html.m_onLine && html.m_continueHtml && html.m_parent == html.m_topParent;
9226
9227 if (!collectRefLinks) {
9228 if (!continueHtml) {
9229 html.m_parent->appendItem(html.m_html);
9230 }
9231
9232 updateLastPosInList<Trait>(html);
9233 }
9234
9235 if (!continueHtml) {
9236 resetHtmlTag<Trait>(html);
9237 }
9238 }
9239 };
9240
9241 auto processListItem = [&]() {
9242 MdBlock<Trait> block = {listItem, 0};
9243
9244 std::shared_ptr<ListItem<Trait>> resItem;
9245
9246 line = parseListItem(block, list, doc, linksToParse, workingPath, fileName,
9247 collectRefLinks, html, &resItem);
9248 listItem.clear();
9249
9250 if (html.m_html) {
9251 processLastHtml(resItem);
9252 } else if (line >= 0) {
9253 addListMakeNew();
9254 }
9255 };
9256
9257 for (auto last = fr.m_data.end(); it != last; ++it) {
9258 if (updateIndent) {
9259 std::tie(std::ignore, indent, marker, std::ignore) =
9260 listItemData<Trait>(it->first.asString(), false);
9261
9262 if (!collectRefLinks) {
9263 html.m_blocks.back().second = indent;
9264 }
9265
9266 updateIndent = false;
9267 }
9268
9269 const auto ns = skipSpaces<Trait>(0, it->first.asString());
9270
9271 if (isH1<Trait>(it->first.asString().sliced(ns)) && ns < indent && !listItem.empty()) {
9272 const auto p = it->first.asString().indexOf(Trait::latin1ToChar('='));
9273
9274 it->first.insert(p, Trait::latin1ToChar('\\'));
9275 } else if (isHorizontalLine<Trait>(it->first.asString().sliced(ns)) &&
9276 ns < indent && !listItem.empty()) {
9277 updateIndent = true;
9278
9279 processListItem();
9280
9281 if (!list->isEmpty()) {
9282 addListMakeNew();
9283 }
9284
9285 if (!collectRefLinks) {
9286 makeHorLine<Trait>(*it, parent);
9287 }
9288
9289 continue;
9290 } else if (isListItemAndNotNested<Trait>(it->first.asString(), indent) &&
9291 !listItem.empty() && !it->second.m_mayBreakList) {
9292 typename Trait::Char tmpMarker;
9293 std::tie(std::ignore, indent, tmpMarker, std::ignore) =
9294 listItemData<Trait>(it->first.asString(), false);
9295
9296 processListItem();
9297
9298 if (tmpMarker != marker) {
9299 if (!list->isEmpty()) {
9300 addListMakeNew();
9301 }
9302
9303 marker = tmpMarker;
9304 }
9305 }
9306
9307 if (line > 0) {
9308 break;
9309 }
9310
9311 listItem.push_back(*it);
9312
9313 if (list->startColumn() == -1) {
9314 list->setStartColumn(
9315 it->first.virginPos(std::min(it->first.length() ?
9316 it->first.length() - 1 : 0, skipSpaces<Trait>(0, it->first.asString()))));
9317 list->setStartLine(it->second.m_lineNumber);
9318
9319 if (!collectRefLinks) {
9320 html.m_blocks.back().second += list->startColumn();
9321 }
9322 }
9323 }
9324
9325 if (!listItem.empty()) {
9326 MdBlock<Trait> block = {listItem, 0};
9327 line = parseListItem(block, list, doc, linksToParse, workingPath, fileName,
9328 collectRefLinks, html);
9329 }
9330
9331 if (!list->isEmpty() && !collectRefLinks) {
9332 parent->appendItem(list);
9333 }
9334
9335 html.m_blocks.pop_back();
9336 }
9337
9338 if (resetTopParent) {
9339 html.m_topParent.reset();
9340 }
9341
9342 return line;
9343}
9344
9345template<class Trait>
9346inline long long int
9347Parser<Trait>::parseListItem(MdBlock<Trait> &fr,
9348 std::shared_ptr<Block<Trait>> parent,
9349 std::shared_ptr<Document<Trait>> doc,
9350 typename Trait::StringList &linksToParse,
9351 const typename Trait::String &workingPath,
9352 const typename Trait::String &fileName,
9353 bool collectRefLinks,
9354 RawHtmlBlock<Trait> &html,
9355 std::shared_ptr<ListItem<Trait>> *resItem)
9356{
9357 {
9358 const auto it = (std::find_if(fr.m_data.rbegin(), fr.m_data.rend(), [](const auto &s) {
9359 return !s.first.isEmpty();
9360 })).base();
9361
9362 if (it != fr.m_data.end()) {
9363 fr.m_data.erase(it, fr.m_data.end());
9364 }
9365 }
9366
9367 const auto p = skipSpaces<Trait>(0, fr.m_data.front().first.asString());
9368
9369 std::shared_ptr<ListItem<Trait>> item(new ListItem<Trait>);
9370
9371 item->setStartColumn(fr.m_data.front().first.virginPos(p));
9372 item->setStartLine(fr.m_data.front().second.m_lineNumber);
9373
9374 int i = 0, len = 0;
9375
9376 if (isOrderedList<Trait>(fr.m_data.front().first.asString(), &i, &len)) {
9377 item->setListType(ListItem<Trait>::Ordered);
9378 item->setStartNumber(i);
9379 item->setDelim({item->startColumn(), item->startLine(), item->startColumn() + len, item->startLine()});
9380 } else {
9381 item->setListType(ListItem<Trait>::Unordered);
9382 item->setDelim({item->startColumn(), item->startLine(), item->startColumn(), item->startLine()});
9383 }
9384
9385 if (item->listType() == ListItem<Trait>::Ordered) {
9386 item->setOrderedListPreState(i == 1 ? ListItem<Trait>::Start : ListItem<Trait>::Continue);
9387 }
9388
9389 typename MdBlock<Trait>::Data data;
9390
9391 auto it = fr.m_data.begin();
9392 ++it;
9393
9394 int pos = 1;
9395
9396 long long int indent = 0;
9397 bool wasText = false;
9398
9399 std::tie(std::ignore, indent, std::ignore, wasText) =
9400 listItemData<Trait>(fr.m_data.front().first.asString(), wasText);
9401
9402 html.m_blocks.push_back({item, item->startColumn() + indent});
9403
9404 if (!collectRefLinks) {
9405 html.m_toAdjustLastPos.insert({item, html.m_blocks});
9406 }
9407
9408 const auto firstNonSpacePos = calculateIndent<Trait>(
9409 fr.m_data.front().first.asString(), indent).second;
9410
9411 if (firstNonSpacePos - indent < 4) {
9412 indent = firstNonSpacePos;
9413 }
9414
9415 if (indent < fr.m_data.front().first.length()) {
9416 data.push_back({fr.m_data.front().first.right(fr.m_data.front().first.length() - indent),
9417 fr.m_data.front().second});
9418 }
9419
9420 bool taskList = false;
9421 bool checked = false;
9422
9423 if (!data.empty()) {
9424 auto p = skipSpaces<Trait>(0, data.front().first.asString());
9425
9426 if (p < data.front().first.length()) {
9427 if (data.front().first[p] == Trait::latin1ToChar('[')) {
9428 const auto startTaskDelimPos = data.front().first.virginPos(p);
9429
9430 ++p;
9431
9432 if (p < data.front().first.length()) {
9433 if (data.front().first[p] == Trait::latin1ToChar(' ') ||
9434 data.front().first[p].toLower() == Trait::latin1ToChar('x')) {
9435 if (data.front().first[p].toLower() == Trait::latin1ToChar('x')) {
9436 checked = true;
9437 }
9438
9439 ++p;
9440
9441 if (p < data.front().first.length()) {
9442 if (data.front().first[p] == Trait::latin1ToChar(']')) {
9443 item->setTaskDelim({startTaskDelimPos, item->startLine(), data.front().first.virginPos(p), item->startLine()});
9444
9445 taskList = true;
9446
9447 data[0].first = data[0].first.sliced(p + 1);
9448 }
9449 }
9450 }
9451 }
9452 }
9453 }
9454 }
9455
9456 if (taskList) {
9457 item->setTaskList();
9458 item->setChecked(checked);
9459 }
9460
9461 bool fensedCode = false;
9462 typename Trait::String startOfCode;
9463 bool wasEmptyLine = false;
9464
9465 std::vector<std::pair<RawHtmlBlock<Trait>, long long int>> htmlToAdd;
9466 long long int line = -1;
9467
9468 auto parseStream = [&](StringListStream<Trait> &stream) -> long long int
9469 {
9470 const auto tmpHtml = html;
9471 long long int line = -1;
9472 std::tie(html, line) = parse(stream, item, doc, linksToParse, workingPath, fileName,
9473 collectRefLinks, false, true, true);
9474 html.m_topParent = tmpHtml.m_topParent;
9475 html.m_blocks = tmpHtml.m_blocks;
9476 html.m_toAdjustLastPos = tmpHtml.m_toAdjustLastPos;
9477
9478 return line;
9479 };
9480
9481 auto processHtml = [&](auto it) -> bool
9482 {
9483 if (html.m_html.get()) {
9484 html.m_parent = html.findParent(html.m_html->startColumn());
9485
9486 if (!html.m_parent) {
9487 html.m_parent = html.m_topParent;
9488 }
9489
9490 data.clear();
9491
9492 if (html.m_continueHtml) {
9493 MdBlock<Trait> tmp;
9494 tmp.m_emptyLineAfter = fr.m_emptyLineAfter;
9495 std::copy(it, fr.m_data.end(), std::back_inserter(tmp.m_data));
9496
9497 parseText(tmp, html.m_parent, doc, linksToParse, workingPath, fileName,
9498 collectRefLinks, html);
9499
9500 return true;
9501 }
9502
9503 htmlToAdd.push_back({html, html.m_parent->items().size()});
9504 updateLastPosInList<Trait>(html);
9505 resetHtmlTag<Trait>(html);
9506 }
9507
9508 return false;
9509 };
9510
9511 if (!processHtml(std::prev(it))) {
9512 for (auto last = fr.m_data.end(); it != last; ++it, ++pos) {
9513 if (!fensedCode) {
9514 fensedCode = isCodeFences<Trait>(it->first.asString().startsWith(
9515 typename Trait::String(indent, Trait::latin1ToChar(' '))) ?
9516 it->first.asString().sliced(indent) : it->first.asString());
9517
9518 if (fensedCode) {
9519 startOfCode = startSequence<Trait>(it->first.asString());
9520 }
9521 } else if (fensedCode &&
9522 isCodeFences<Trait>(it->first.asString().startsWith(
9523 typename Trait::String(indent, Trait::latin1ToChar(' '))) ?
9524 it->first.asString().sliced(indent) : it->first.asString(),
9525 true) && startSequence<Trait>(it->first.asString()).contains(startOfCode)) {
9526 fensedCode = false;
9527 }
9528
9529 if (!fensedCode) {
9530 long long int newIndent = 0;
9531 bool ok = false;
9532
9533 std::tie(ok, newIndent, std::ignore, wasText) = listItemData<Trait>(
9534 it->first.asString().startsWith(typename Trait::String(indent, Trait::latin1ToChar(' '))) ?
9535 it->first.asString().sliced(indent) : it->first.asString(),
9536 wasText);
9537
9538 if (ok && !it->second.m_mayBreakList) {
9539 StringListStream<Trait> stream(data);
9540
9541 line = parseStream(stream);
9542
9543 data.clear();
9544
9545 if (processHtml(it)) {
9546 break;
9547 }
9548
9549 if (line != -1) {
9550 break;
9551 }
9552
9553 if (!htmlToAdd.empty() && htmlToAdd.back().first.m_parent == html.m_topParent) {
9554 line = it->second.m_lineNumber;
9555
9556 break;
9557 } else {
9558 typename MdBlock<Trait>::Data nestedList;
9559 nestedList.push_back(*it);
9560 ++it;
9561
9562 wasEmptyLine = false;
9563
9564 for (; it != last; ++it) {
9565 const auto ns = skipSpaces<Trait>(0, it->first.asString());
9566 std::tie(ok, std::ignore, std::ignore, wasText) =
9567 listItemData<Trait>((ns >= indent ? it->first.asString().sliced(indent) :
9568 it->first.asString()), wasText);
9569
9570 if (ok) {
9571 wasEmptyLine = false;
9572 }
9573
9574 if (ok || ns >= indent + newIndent || ns == it->first.length() || !wasEmptyLine) {
9575 nestedList.push_back(*it);
9576 } else {
9577 break;
9578 }
9579
9580 wasEmptyLine = (ns == it->first.length());
9581
9582 wasText = (wasEmptyLine ? false : wasText);
9583 }
9584
9585 for (auto it = nestedList.begin(), last = nestedList.end(); it != last; ++it) {
9586 const auto ns = skipSpaces<Trait>(0, it->first.asString());
9587
9588 if (ns < indent && ns != it->first.length()) {
9589 it->second.m_mayBreakList = true;
9590 } else {
9591 it->first = it->first.sliced(std::min(ns, indent));
9592 }
9593 }
9594
9595 while (!nestedList.empty() &&
9596 nestedList.back().first.asString().isEmpty()) {
9597 nestedList.pop_back();
9598 }
9599
9600 MdBlock<Trait> block = {nestedList, 0};
9601
9602 line = parseList(block, item, doc, linksToParse, workingPath, fileName,
9603 collectRefLinks, html);
9604
9605 if (line >= 0) {
9606 break;
9607 }
9608
9609 for (; it != last; ++it) {
9610 if (it->first.asString().startsWith(typename Trait::String(
9611 indent, Trait::latin1ToChar(' ')))) {
9612 it->first = it->first.sliced(indent);
9613 }
9614
9615 data.push_back(*it);
9616 }
9617
9618 break;
9619 }
9620 } else {
9621 if (!it->second.m_mayBreakList &&
9622 it->first.asString().startsWith(typename Trait::String(
9623 indent, Trait::latin1ToChar(' ')))) {
9624 it->first = it->first.sliced(indent);
9625 }
9626
9627 data.push_back(*it);
9628
9629 wasEmptyLine = (skipSpaces<Trait>(0, it->first.asString()) == it->first.length());
9630
9631 wasText = !wasEmptyLine;
9632 }
9633 } else {
9634 if (!it->second.m_mayBreakList &&
9635 it->first.asString().startsWith(typename Trait::String(
9636 indent, Trait::latin1ToChar(' ')))) {
9637 it->first = it->first.sliced(indent);
9638 }
9639
9640 data.push_back(*it);
9641 }
9642 }
9643
9644 if (!data.empty()) {
9645 StringListStream<Trait> stream(data);
9646
9647 line = parseStream(stream);
9648
9649 if (html.m_html) {
9650 html.m_parent = html.findParent(html.m_html->startColumn());
9651
9652 if (!html.m_parent) {
9653 html.m_parent = html.m_topParent;
9654 }
9655 }
9656 }
9657 } else {
9658 item.reset();
9659 }
9660
9661 if (!collectRefLinks) {
9662 if (item) {
9663 parent->appendItem(item);
9664 }
9665
9666 long long int i = 0;
9667
9668 for (auto &h : htmlToAdd) {
9669 if (h.first.m_parent != h.first.m_topParent) {
9670 h.first.m_parent->insertItem(h.second + i, h.first.m_html);
9671
9672 ++i;
9673
9674 updateLastPosInList(h.first);
9675 } else {
9676 html = h.first;
9677
9678 break;
9679 }
9680 }
9681
9682 if (item) {
9683 long long int htmlStartColumn = -1;
9684 long long int htmlStartLine = -1;
9685
9686 if (html.m_html) {
9687 std::tie(htmlStartColumn, htmlStartLine) =
9688 localPosFromVirgin<Trait>(fr, html.m_html->startColumn(), html.m_html->startLine());
9689 }
9690
9691 long long int localLine = (html.m_html ? htmlStartLine : fr.m_data.size() - 1);
9692
9693 if (html.m_html) {
9694 if (skipSpaces<Trait>(0, fr.m_data[localLine].first.asString()) >= htmlStartColumn) {
9695 --localLine;
9696 }
9697 }
9698
9699 const auto lastLine = fr.m_data[localLine].second.m_lineNumber;
9700
9701 const auto lastColumn = fr.m_data[localLine].first.virginPos(
9702 fr.m_data[localLine].first.length() ? fr.m_data[localLine].first.length() - 1 : 0);
9703
9704 item->setEndColumn(lastColumn);
9705 item->setEndLine(lastLine);
9706 parent->setEndColumn(lastColumn);
9707 parent->setEndLine(lastLine);
9708 }
9709 }
9710
9711 if (resItem) {
9712 *resItem = item;
9713 }
9714
9715 html.m_blocks.pop_back();
9716
9717 return line;
9718}
9719
9720template<class Trait>
9721inline long long int
9722Parser<Trait>::parseCode(MdBlock<Trait> &fr,
9723 std::shared_ptr<Block<Trait>> parent,
9724 bool collectRefLinks)
9725{
9726 const auto indent = skipSpaces<Trait>(0, fr.m_data.front().first.asString());
9727
9728 if (indent != fr.m_data.front().first.length()) {
9729 WithPosition startDelim, endDelim, syntaxPos;
9730 typename Trait::String syntax;
9731 isStartOfCode<Trait>(fr.m_data.front().first.asString(), &syntax, &startDelim, &syntaxPos);
9732 syntax = replaceEntity<Trait>(syntax);
9733 startDelim.setStartLine(fr.m_data.front().second.m_lineNumber);
9734 startDelim.setEndLine(startDelim.startLine());
9735 startDelim.setStartColumn(fr.m_data.front().first.virginPos(startDelim.startColumn()));
9736 startDelim.setEndColumn(fr.m_data.front().first.virginPos(startDelim.endColumn()));
9737
9738 if (syntaxPos.startColumn() != -1) {
9739 syntaxPos.setStartLine(startDelim.startLine());
9740 syntaxPos.setEndLine(startDelim.startLine());
9741 syntaxPos.setStartColumn(fr.m_data.front().first.virginPos(syntaxPos.startColumn()));
9742 syntaxPos.setEndColumn(fr.m_data.front().first.virginPos(syntaxPos.endColumn()));
9743 }
9744
9745 const long long int startPos = fr.m_data.front().first.virginPos(indent);
9746 const long long int emptyColumn = fr.m_data.front().first.virginPos(fr.m_data.front().first.length());
9747 const long long int startLine = fr.m_data.front().second.m_lineNumber;
9748 const long long int endPos = fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1);
9749 const long long int endLine = fr.m_data.back().second.m_lineNumber;
9750
9751 fr.m_data.erase(fr.m_data.cbegin());
9752
9753 {
9754 const auto it = std::prev(fr.m_data.cend());
9755
9756 if (it->second.m_lineNumber > -1) {
9757 endDelim.setStartColumn(it->first.virginPos(skipSpaces<Trait>(0, it->first.asString())));
9758 endDelim.setStartLine(it->second.m_lineNumber);
9759 endDelim.setEndLine(endDelim.startLine());
9760 endDelim.setEndColumn(it->first.virginPos(it->first.length() - 1));
9761 }
9762
9763 fr.m_data.erase(it);
9764 }
9765
9766 if (syntax.toLower() == Trait::latin1ToString("math")) {
9767 typename Trait::String math;
9768 bool first = true;
9769
9770 for (const auto &l : std::as_const(fr.m_data)) {
9771 if (!first) {
9772 math.push_back(Trait::latin1ToChar('\n'));
9773 }
9774
9775 math.push_back(l.first.virginSubString());
9776
9777 first = false;
9778 }
9779
9780 if (!collectRefLinks) {
9781 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
9782 p->setStartColumn(startPos);
9783 p->setStartLine(startLine);
9784 p->setEndColumn(endPos);
9785 p->setEndLine(endLine);
9786
9787 std::shared_ptr<Math<Trait>> m(new Math<Trait>);
9788
9789 if (!fr.m_data.empty()) {
9790 m->setStartColumn(fr.m_data.front().first.virginPos(0));
9791 m->setStartLine(fr.m_data.front().second.m_lineNumber);
9792 m->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
9793 m->setEndLine(fr.m_data.back().second.m_lineNumber);
9794 } else {
9795 m->setStartColumn(emptyColumn);
9796 m->setStartLine(startLine);
9797 m->setEndColumn(emptyColumn);
9798 m->setEndLine(startLine);
9799 }
9800
9801 m->setInline(false);
9802 m->setExpr(math);
9803 m->setStartDelim(startDelim);
9804 m->setEndDelim(endDelim);
9805 m->setSyntaxPos(syntaxPos);
9806 m->setFensedCode(true);
9807 p->appendItem(m);
9808
9809 parent->appendItem(p);
9810 }
9811 } else {
9812 return parseCodeIndentedBySpaces(fr, parent, collectRefLinks, indent, syntax, emptyColumn,
9813 startLine, true, startDelim, endDelim, syntaxPos);
9814 }
9815 }
9816
9817 return -1;
9818}
9819
9820template<class Trait>
9821inline long long int
9823 std::shared_ptr<Block<Trait>> parent,
9824 bool collectRefLinks,
9825 int indent,
9826 const typename Trait::String &syntax,
9827 long long int emptyColumn,
9828 long long int startLine,
9829 bool fensedCode,
9830 const WithPosition &startDelim,
9831 const WithPosition &endDelim,
9832 const WithPosition &syntaxPos)
9833{
9834 typename Trait::String code;
9835 long long int startPos = 0;
9836 bool first = true;
9837
9838 auto it = fr.m_data.begin(), lastIt = fr.m_data.end();
9839
9840 for (; it != lastIt; ++it) {
9841 if (it->second.m_mayBreakList) {
9842 lastIt = it;
9843 break;
9844 }
9845
9846 if (!collectRefLinks) {
9847 const auto ns = skipSpaces<Trait>(0, it->first.asString());
9848 if (first) {
9849 startPos = ns;
9850 }
9851 first = false;
9852
9853 code.push_back((indent > 0 ? it->first.virginSubString(ns < indent ? ns : indent) +
9854 typename Trait::String(Trait::latin1ToChar('\n')) :
9855 typename Trait::String(it->first.virginSubString()) +
9856 typename Trait::String(Trait::latin1ToChar('\n'))));
9857 }
9858 }
9859
9860 if (!collectRefLinks) {
9861 if (!code.isEmpty()) {
9862 code.remove(code.length() - 1, 1);
9863 }
9864
9865 std::shared_ptr<Code<Trait>> codeItem(new Code<Trait>(code, fensedCode, false));
9866 codeItem->setSyntax(syntax);
9867 codeItem->setStartDelim(startDelim);
9868 codeItem->setEndDelim(endDelim);
9869 codeItem->setSyntaxPos(syntaxPos);
9870
9871 if (lastIt != fr.m_data.end() || (it == fr.m_data.end() && !fr.m_data.empty())) {
9872 codeItem->setStartColumn(fr.m_data.front().first.virginPos(startPos));
9873 codeItem->setStartLine(fr.m_data.front().second.m_lineNumber);
9874 auto tmp = std::prev(lastIt);
9875 codeItem->setEndColumn(tmp->first.virginPos(tmp->first.length() - 1));
9876 codeItem->setEndLine(tmp->second.m_lineNumber);
9877 } else {
9878 codeItem->setStartColumn(emptyColumn);
9879 codeItem->setStartLine(startLine);
9880 codeItem->setEndColumn(emptyColumn);
9881 codeItem->setEndLine(startLine);
9882 }
9883
9884 if (fensedCode) {
9885 parent->appendItem(codeItem);
9886 } else if (!parent->items().empty() && parent->items().back()->type() == ItemType::Code) {
9887 auto c = std::static_pointer_cast<Code<Trait>>(parent->items().back());
9888
9889 if (!c->isFensedCode()) {
9890 auto line = c->endLine();
9891 auto text = c->text();
9892
9893 for (; line < codeItem->startLine(); ++line) {
9894 text.push_back(Trait::latin1ToString("\n"));
9895 }
9896
9897 text.push_back(codeItem->text());
9898 c->setText(text);
9899 c->setEndColumn(codeItem->endColumn());
9900 c->setEndLine(codeItem->endLine());
9901 } else {
9902 parent->appendItem(codeItem);
9903 }
9904 } else {
9905 parent->appendItem(codeItem);
9906 }
9907 }
9908
9909 if (lastIt != fr.m_data.end()) {
9910 return lastIt->second.m_lineNumber;
9911 }
9912
9913 return -1;
9914}
9915
9916} /* namespace MD */
9917
9918#endif // MD4QT_MD_PARSER_HPP_INCLUDED
Abstract block (storage of child items).
Definition doc.h:603
const Items & items() const
Definition doc.h:629
Blockquote.
Definition doc.h:836
Code.
Definition doc.h:1269
Document.
Definition doc.h:1774
Footnote.
Definition doc.h:1727
Heading.
Definition doc.h:710
typename Trait::template Vector< WithPosition > Delims
Type of list of service chanracters.
Definition doc.h:720
Horizontal line.
Definition doc.h:364
Image.
Definition doc.h:1183
Base class for items that can have style options.
Definition doc.h:259
void setOpts(int o)
Set style options.
Definition doc.h:287
const Styles & closeStyles() const
Definition doc.h:305
const Styles & openStyles() const
Definition doc.h:293
int opts() const
Definition doc.h:281
typename Trait::template Vector< StyleDelim > Styles
Type of list of emphasis.
Definition doc.h:278
Base class for item in Markdown document.
Definition doc.h:177
virtual ItemType type() const =0
Line break.
Definition doc.h:570
List.
Definition doc.h:1039
Page break.
Definition doc.h:334
Paragraph.
Definition doc.h:679
void removeTextPlugin(int id)
Remove text plugin.
Definition parser.h:1458
friend struct PrivateAccess
Used in tests.
Definition parser.h:2075
~Parser()=default
void addTextPlugin(int id, TextPluginFunc< Trait > plugin, bool processInLinks, const typename Trait::StringList &userData)
Add text plugin.
Definition parser.h:1443
std::shared_ptr< Document< Trait > > parse(const typename Trait::String &fileName, bool recursive=true, const typename Trait::StringList &ext={Trait::latin1ToString("md"), Trait::latin1ToString("markdown")}, bool fullyOptimizeParagraphs=true)
Definition parser.h:2091
Raw HTML.
Definition doc.h:440
Wrapper for typename Trait::StringList to be behaved like a stream.
Definition parser.h:267
Trait::InternalString lineAt(long long int pos)
Definition parser.h:295
std::pair< typename Trait::InternalString, bool > readLine()
Definition parser.h:280
bool atEnd() const
Definition parser.h:275
void setLineNumber(long long int lineNumber)
Definition parser.h:305
long long int size() const
Definition parser.h:300
StringListStream(typename MdBlock< Trait >::Data &stream)
Definition parser.h:269
long long int currentLineNumber() const
Definition parser.h:290
Emphasis in the Markdown document.
Definition doc.h:216
Table cell.
Definition doc.h:1472
Table row.
Definition doc.h:1503
Alignment
Alignment.
Definition doc.h:1610
@ AlignCenter
Center.
Definition doc.h:1616
@ AlignLeft
Left.
Definition doc.h:1612
@ AlignRight
Right.
Definition doc.h:1614
TextStream(QTextStream &stream)
Definition parser.h:2135
TextStream(std::istream &stream)
Definition parser.h:2222
Text item in Paragraph.
Definition doc.h:513
Wrapper for UChar32 to be used with MD::Parser.
Definition traits.h:465
Wrapper for icu::UnicodeString to be used with MD::Parser.
Definition traits.h:600
void push_back(const UnicodeChar &ch)
Definition traits.h:648
std::vector< UnicodeString > split(const UnicodeChar &ch) const
Definition traits.h:729
bool isRelative() const
Definition traits.h:866
UnicodeString scheme() const
Definition traits.h:871
UnicodeString host() const
Definition traits.h:876
bool isValid() const
Definition traits.h:861
Base for any thing with start and end position.
Definition doc.h:76
void setEndColumn(long long int c)
Set end column.
Definition doc.h:137
long long int startColumn() const
Definition doc.h:101
void setStartColumn(long long int c)
Set start column.
Definition doc.h:125
long long int startLine() const
Definition doc.h:107
long long int endColumn() const
Definition doc.h:113
long long int endLine() const
Definition doc.h:119
Q_SCRIPTABLE QString start(QString train="")
Q_SCRIPTABLE Q_NOREPLY void start()
KIOCORE_EXPORT CopyJob * link(const QList< QUrl > &src, const QUrl &destDir, JobFlags flags=DefaultFlags)
QList< QVariant > parse(const QString &message, const QDateTime &externalIssueDateTime=QDateTime())
QString path(const QString &relativePath)
VehicleSection::Type type(QStringView coachNumber, QStringView coachClassification)
KIOCORE_EXPORT QStringList list(const QString &fileClass)
KGuiItem open()
KGuiItem quit()
KGuiItem back(BidiMode useBidi=IgnoreRTL)
QString label(StandardShortcut id)
const QList< QKeySequence > & end()
Definition algo.h:17
TextOption
Text option.
Definition doc.h:199
@ ItalicText
Italic text.
Definition doc.h:205
@ StrikethroughText
Strikethrough.
Definition doc.h:207
@ TextWithoutFormat
No format.
Definition doc.h:201
@ BoldText
Bold text.
Definition doc.h:203
bool isOrderedList(const typename Trait::String &s, int *num=nullptr, int *len=nullptr, typename Trait::Char *delim=nullptr, bool *isFirstLineEmpty=nullptr)
Definition parser.h:134
Trait::String paragraphToLabel(Paragraph< Trait > *p)
Convert Paragraph to label.
Definition parser.h:3655
std::pair< long long int, long long int > prevPosition(const MdBlock< Trait > &fr, long long int pos, long long int line)
Definition parser.h:4120
bool isValidUrl< UnicodeStringTrait >(const UnicodeString &url)
Definition parser.h:1219
bool isMult3(long long int i1, long long int i2)
Definition parser.h:7336
std::pair< long long int, long long int > nextPosition(const MdBlock< Trait > &fr, long long int pos, long long int line)
Definition parser.h:4143
bool checkForEndHtmlComments(const typename Trait::String &line, long long int pos)
Definition parser.h:2318
bool isH1(const typename Trait::String &s)
Definition parser.h:4104
bool isEmail(const typename Trait::String &url)
Definition parser.h:1094
bool isLineBreak(const typename Trait::String &s)
Definition parser.h:4410
TextOption styleToTextOption(Style s)
Definition parser.h:861
std::shared_ptr< Text< Trait > > concatenateText(typename Block< Trait >::Items::const_iterator it, typename Block< Trait >::Items::const_iterator last)
Concatenate texts in block.
Definition parser.h:7887
bool isH(const typename Trait::String &s, const typename Trait::Char &c)
Definition parser.h:4071
std::tuple< bool, long long int, typename Trait::Char, bool > listItemData(const typename Trait::String &s, bool wasText)
Definition parser.h:9075
bool isGitHubAutolink< QStringTrait >(const QString &url)
Definition parser.h:1203
bool isSemiOptimization(OptimizeParagraphType t)
Definition parser.h:7927
long long int skipSpaces(long long int i, const typename Trait::String &line)
Skip spaces in line from position i.
Definition parser.h:71
Trait::InternalString prepareTableData(typename Trait::InternalString s)
Prepare data in table cell for parsing.
Definition parser.h:3863
void makeTextObject(const typename Trait::String &text, TextParsingOpts< Trait > &po, long long int startPos, long long int startLine, long long int endPos, long long int endLine, bool doRemoveSpacesAtEnd=false)
Make text item.
Definition parser.h:4457
static const Trait::String s_canBeEscaped
Characters that can be escaped.
Definition parser.h:452
int isTableHeader(const typename Trait::String &s)
Definition parser.h:3539
void initLastItemWithOpts(TextParsingOpts< Trait > &po, std::shared_ptr< ItemWithOpts< Trait > > item)
Initialize item with style information and set it as last item.
Definition parser.h:4446
std::tuple< long long int, long long int, bool, typename Trait::String, long long int > readLinkDestination(long long int line, long long int pos, const TextParsingOpts< Trait > &po, WithPosition *urlPos=nullptr)
Read link's destination.
Definition parser.h:6598
Trait::StringList splitString(const typename Trait::String &str, const typename Trait::Char &ch)
Split string.
void removeSpacesAtEnd(String &s)
Remove spaces at the end of string s.
Definition parser.h:99
bool isListItemAndNotNested(const typename Trait::String &s, long long int indent)
Definition parser.h:9032
std::pair< bool, bool > readUnquotedHtmlAttrValue(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr)
Read HTML attribute value.
Definition parser.h:4706
void resetHtmlTag(RawHtmlBlock< Trait > &html)
Reset pre-stored HTML.
Definition parser.h:221
static const char * s_startComment
Starting HTML comment string.
Definition parser.h:47
long long int processGitHubAutolinkExtension(std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, long long int idx)
Process GitHub autolinks for the text with index idx.
Definition parser.h:1243
void setLastPos(std::shared_ptr< Item< Trait > > item, long long int pos, long long int line)
Set last position of the item.
Definition parser.h:9125
long long int lastNonSpacePos(const String &line)
Definition parser.h:85
std::shared_ptr< Paragraph< Trait > > optimizeParagraph(std::shared_ptr< Paragraph< Trait > > &p, TextParsingOpts< Trait > &po, OptimizeParagraphType type=OptimizeParagraphType::Full)
Optimize Paragraph.
Definition parser.h:7956
WithPosition findAndRemoveClosingSequence(typename Trait::InternalString &s)
Find and remove closing sequence of "#" in heading.
Definition parser.h:3710
std::shared_ptr< Paragraph< Trait > > splitParagraphsAndFreeHtml(std::shared_ptr< Block< Trait > > parent, std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, bool collectRefLinks, bool fullyOptimizeParagraphs=true)
Split Paragraph and free HTML.
Definition parser.h:8159
bool isFootnote(const typename Trait::String &s)
Definition parser.h:320
void githubAutolinkPlugin(std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, const typename Trait::StringList &)
GitHub autolinks plugin.
Definition parser.h:1375
void replaceTabs(typename Trait::InternalString &s)
Replace tabs with spaces (just for internal simpler use).
Definition parser.h:2465
bool isCodeFences(const typename Trait::String &s, bool closing=false)
Definition parser.h:360
long long int lineBreakLength(const typename Trait::String &s)
Definition parser.h:4426
bool indentInList(const std::vector< long long int > *indents, long long int indent, bool codeIndentedBySpaces)
Definition parser.h:51
std::pair< long long int, long long int > localPosFromVirgin(const MdBlock< Trait > &fr, long long int virginColumn, long long int virginLine)
Definition parser.h:1057
OptimizeParagraphType
Type of the paragraph's optimization.
Definition parser.h:813
@ Semi
Semi optimization, optimization won't concatenate text items if style delimiters will be in the middl...
Definition parser.h:818
@ SemiWithoutRawData
Semi optimization, but raw text data won't be concatenated (will be untouched).
Definition parser.h:822
@ Full
Full optimization.
Definition parser.h:815
@ FullWithoutRawData
Full optimization, but raw text data won't be concatenated (will be untouched).
Definition parser.h:820
std::shared_ptr< Paragraph< Trait > > makeParagraph(typename Block< Trait >::Items::const_iterator first, typename Block< Trait >::Items::const_iterator last)
Make Paragraph.
Definition parser.h:8139
bool isH2(const typename Trait::String &s)
Definition parser.h:4112
Trait::String readEscapedSequence(long long int i, const typename Trait::String &str, long long int *endPos=nullptr)
Skip escaped sequence of characters till first space.
Definition parser.h:415
std::function< void(std::shared_ptr< Paragraph< Trait > >, TextParsingOpts< Trait > &, const typename Trait::StringList &)> TextPluginFunc
Functor type for text plugin.
Definition parser.h:889
bool isGitHubAutolink< UnicodeStringTrait >(const UnicodeString &url)
Definition parser.h:1228
void normalizePos(long long int &pos, long long int &line, long long int length, long long int linesCount)
Normalize position.
Definition parser.h:8062
std::pair< bool, bool > readHtmlAttrValue(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr)
Read HTML attribute value.
Definition parser.h:4730
std::pair< long long int, long long int > calculateIndent(const typename Trait::String &s, long long int p)
Definition parser.h:9066
void makeHorLine(const typename MdBlock< Trait >::Line &line, std::shared_ptr< Block< Trait > > parent)
Make horizontal line.
Definition parser.h:8480
void makeText(long long int lastLine, long long int lastPos, TextParsingOpts< Trait > &po)
Make text item.
Definition parser.h:4574
std::tuple< bool, long long int, long long int, bool, typename Trait::String > isHtmlTag(long long int line, long long int pos, TextParsingOpts< Trait > &po, int rule)
Definition parser.h:4959
bool isStartOfCode(const typename Trait::String &str, typename Trait::String *syntax=nullptr, WithPosition *delim=nullptr, WithPosition *syntaxPos=nullptr)
Definition parser.h:486
Trait::String stringToLabel(const typename Trait::String &s)
Convert string to label.
Definition parser.h:3634
UnicodeStringTrait::StringList splitString< UnicodeStringTrait >(const UnicodeString &str, const UnicodeChar &ch)
Definition parser.h:648
int isTableAlignment(const typename Trait::String &s)
Definition parser.h:669
bool isColumnAlignment(const typename Trait::String &s)
Definition parser.h:596
void makeHeading(std::shared_ptr< Block< Trait > > parent, std::shared_ptr< Document< Trait > > doc, std::shared_ptr< Paragraph< Trait > > p, long long int lastColumn, long long int lastLine, int level, const typename Trait::String &workingPath, const typename Trait::String &fileName, bool collectRefLinks, const WithPosition &delim, TextParsingOpts< Trait > &po)
Make heading.
Definition parser.h:8255
void checkForTableInParagraph(TextParsingOpts< Trait > &po, long long int lastLine)
Check for table in paragraph.
Definition parser.h:4544
std::tuple< long long int, long long int, bool, typename Trait::String, long long int > readLinkTitle(long long int line, long long int pos, const TextParsingOpts< Trait > &po)
Read link's title.
Definition parser.h:6715
bool isOnlyHtmlTagsAfterOrClosedRule1(long long int line, long long int pos, TextParsingOpts< Trait > &po, int rule)
Definition parser.h:4887
void checkForTextPlugins(std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, const TextPluginsMap< Trait > &textPlugins, bool inLink)
Process text plugins.
Definition parser.h:8462
Style
Emphasis type.
Definition parser.h:844
@ Bold2
"__"
Definition parser.h:852
@ Unknown
Unknown.
Definition parser.h:856
@ Strikethrough
"~"
Definition parser.h:854
@ Bold1
"**"
Definition parser.h:850
@ Italic1
"*"
Definition parser.h:846
@ Italic2
"_"
Definition parser.h:848
Trait::String virginSubstr(const MdBlock< Trait > &fr, const WithPosition &virginPos)
Definition parser.h:990
std::map< int, std::tuple< TextPluginFunc< Trait >, bool, typename Trait::StringList > > TextPluginsMap
Type of the map of text plugins.
Definition parser.h:899
void eatRawHtml(long long int line, long long int pos, long long int toLine, long long int toPos, TextParsingOpts< Trait > &po, bool finish, int htmlRule, bool onLine, bool continueEating=false)
Read HTML data.
Definition parser.h:5165
void skipSpacesInHtml(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr)
Skip spaces.
Definition parser.h:4687
@ Document
Document.
Definition doc.h:55
@ FootnoteRef
Footnote ref.
Definition doc.h:51
@ Table
Table.
Definition doc.h:49
@ PageBreak
Page break.
Definition doc.h:57
@ Link
Link.
Definition doc.h:39
@ Text
Text.
Definition doc.h:27
@ Anchor
Anchor.
Definition doc.h:59
@ Math
Math expression.
Definition doc.h:65
@ ListItem
List item.
Definition doc.h:35
@ Image
Image.
Definition doc.h:41
@ Code
Code.
Definition doc.h:43
@ RawHtml
Raw HTML.
Definition doc.h:63
@ LineBreak
Line break.
Definition doc.h:31
@ Paragraph
Paragraph.
Definition doc.h:29
void appendCloseStyle(TextParsingOpts< Trait > &po, const StyleDelim &s)
Append close style.
Definition parser.h:7646
void makeTextObjectWithLineBreak(const typename Trait::String &text, TextParsingOpts< Trait > &po, long long int startPos, long long int startLine, long long int endPos, long long int endLine)
Make text item with line break.
Definition parser.h:4519
std::pair< typename Trait::InternalStringList, std::vector< long long int > > splitTableRow(const typename Trait::InternalString &s)
Split table's row on cells.
Definition parser.h:3873
bool isSetextHeadingBetween(const TextParsingOpts< Trait > &po, long long int startLine, long long int endLine)
Definition parser.h:4940
long long int textAtIdx(std::shared_ptr< Paragraph< Trait > > p, size_t idx)
Definition parser.h:8441
long long int listLevel(const std::vector< long long int > &indents, long long int pos)
Definition parser.h:3313
long long int posOfListItem(const typename Trait::String &s, bool ordered)
Definition parser.h:3271
bool isGitHubAutolink(const typename Trait::String &url)
bool isHorizontalLine(const typename Trait::String &s)
Definition parser.h:553
bool isValidUrl(const typename Trait::String &url)
Trait::String startSequence(const typename Trait::String &line)
Definition parser.h:111
bool isValidUrl< QStringTrait >(const QString &url)
Definition parser.h:1194
TextPlugin
ID of text plugin.
Definition parser.h:830
@ UnknownPluginID
Unknown plugin.
Definition parser.h:832
@ UserDefinedPluginID
First user defined plugin ID.
Definition parser.h:836
@ GitHubAutoLinkPluginID
GitHub's autolinks plugin.
Definition parser.h:834
void applyStyles(int &opts, std::vector< typename TextParsingOpts< Trait >::StyleInfo > &styles)
Apply styles.
Definition parser.h:7213
long long int lastVirginPositionInParagraph(Item< Trait > *item)
Definition parser.h:8215
void skipSpacesUpTo1Line(long long int &line, long long int &pos, const typename MdBlock< Trait >::Data &fr)
Skip space in the block up to 1 new line.
Definition parser.h:6583
bool isHtmlComment(const typename Trait::String &s)
Definition parser.h:685
void resolveLinks(typename Trait::StringList &linksToParse, std::shared_ptr< Document< Trait > > doc)
Resolve links in the document.
Definition parser.h:3171
QStringTrait::StringList splitString< QStringTrait >(const QString &str, const QChar &ch)
Definition parser.h:659
bool isWithoutRawDataOptimization(OptimizeParagraphType t)
Definition parser.h:7941
Trait::String replaceEntity(const typename Trait::String &s)
Replace entities in the string with corresponding character.
Definition parser.h:718
static const std::map< typename Trait::String, const char16_t * > s_entityMap
String removeBackslashes(const String &s)
Remove backslashes from the string.
Definition parser.h:458
Trait::String removeLineBreak(const typename Trait::String &s)
Remove line break from the end of string.
Definition parser.h:4434
void updateLastPosInList(const RawHtmlBlock< Trait > &html)
Update last position of all parent.
Definition parser.h:9136
void closeStyle(std::vector< typename TextParsingOpts< Trait >::StyleInfo > &styles, Style s)
Close style.
Definition parser.h:7198
std::pair< typename Trait::String, WithPosition > findAndRemoveHeaderLabel(typename Trait::InternalString &s)
Find and remove heading label.
Definition parser.h:3604
std::pair< bool, bool > readHtmlAttr(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr, bool checkForSpace)
Read HTML attribute.
Definition parser.h:4787
void checkForHtmlComments(const typename Trait::InternalString &line, StringListStream< Trait > &stream, MdLineData::CommentDataMap &res)
Collect information about HTML comments.
Definition parser.h:2333
bool isEmpty() const const
void push_back(parameter_type value)
void clear()
QString first(qsizetype n) const const
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
qsizetype length() const const
void push_back(QChar ch)
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString sliced(qsizetype pos) const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QString toCaseFolded() const const
QString toUpper() const const
const QChar * unicode() const const
bool contains(QLatin1StringView str, Qt::CaseSensitivity cs) const const
qsizetype size() const const
SkipEmptyParts
QString host(ComponentFormattingOptions options) const const
bool isRelative() const const
bool isValid() const const
QString scheme() const const
Internal structure for block of text in Markdown.
Definition parser.h:251
Data m_data
Definition parser.h:255
typename Trait::template Vector< Line > Data
Definition parser.h:253
std::pair< typename Trait::InternalString, MdLineData > Line
Definition parser.h:252
long long int m_emptyLinesBefore
Definition parser.h:256
bool m_emptyLineAfter
Definition parser.h:257
Internal structure for auxiliary information about a line in Markdown.
Definition parser.h:235
long long int m_lineNumber
Definition parser.h:236
std::pair< char, bool > CommentData
Definition parser.h:237
bool m_mayBreakList
Definition parser.h:242
std::map< long long int, CommentData > CommentDataMap
Definition parser.h:238
CommentDataMap m_htmlCommentData
Definition parser.h:240
Trait to use this library with QString.
Definition traits.h:1017
QStringList StringList
Definition traits.h:1034
Internal structure for pre-storing HTML.
Definition parser.h:195
int m_htmlBlockType
Definition parser.h:202
std::unordered_map< std::shared_ptr< Block< Trait > >, SequenceOfBlock > m_toAdjustLastPos
Definition parser.h:201
SequenceOfBlock m_blocks
Definition parser.h:200
std::vector< std::pair< std::shared_ptr< Block< Trait > >, long long int > > SequenceOfBlock
Definition parser.h:199
std::shared_ptr< RawHtml< Trait > > m_html
Definition parser.h:196
std::shared_ptr< Block< Trait > > findParent(long long int indent) const
Definition parser.h:207
bool m_continueHtml
Definition parser.h:203
std::shared_ptr< Block< Trait > > m_topParent
Definition parser.h:198
std::shared_ptr< Block< Trait > > m_parent
Definition parser.h:197
Internal structure for auxiliary options for parser.
Definition parser.h:909
bool shouldStopParsing() const
Definition parser.h:951
RawHtmlBlock< Trait > & m_html
Definition parser.h:919
std::shared_ptr< Document< Trait > > m_doc
Definition parser.h:913
long long int m_pos
Definition parser.h:965
bool m_checkLineOnNewType
Definition parser.h:923
ItemWithOpts< Trait >::Styles m_openStyles
Definition parser.h:979
void concatenateAuxText(long long int start, long long int end)
Definition parser.h:935
Trait::StringList & m_linksToParse
Definition parser.h:914
bool m_firstInParagraph
Definition parser.h:924
long long int m_lastTextPos
Definition parser.h:968
long long int m_line
Definition parser.h:964
Trait::String m_fileName
Definition parser.h:916
long long int m_startTableLine
Definition parser.h:966
std::shared_ptr< ItemWithOpts< Trait > > m_lastItemWithStyle
Definition parser.h:980
std::shared_ptr< Block< Trait > > m_parent
Definition parser.h:911
std::shared_ptr< RawHtml< Trait > > m_tmpHtml
Definition parser.h:912
std::shared_ptr< Text< Trait > > m_lastText
Definition parser.h:921
const TextPluginsMap< Trait > & m_textPlugins
Definition parser.h:920
MdBlock< Trait > & m_fr
Definition parser.h:910
Trait::String m_workingPath
Definition parser.h:915
Detected m_detected
Definition parser.h:948
std::vector< StyleInfo > m_styles
Definition parser.h:978
std::vector< TextData > m_rawTextData
Definition parser.h:932
long long int m_lastTextLine
Definition parser.h:967
Trait to use this library with std::string.
Definition traits.h:893
std::vector< String > StringList
Definition traits.h:908
static bool fileExists(const String &fileName, const String &workingPath)
Definition traits.h:953
static bool isFreeTag(std::shared_ptr< RawHtml< Trait > > html)
Definition parser.h:4180
static void setFreeTag(std::shared_ptr< RawHtml< Trait > > html, bool on)
Definition parser.h:4186
#define MD_DISABLE_COPY(Class)
Macro for disabling copy.
Definition utils.h:17
#define MD_UNUSED(x)
Avoid "unused parameter" warnings.
Definition utils.h:26
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 24 2025 11:46:26 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.