Md4qt

parser.h
Go to the documentation of this file.
1/*
2 SPDX-FileCopyrightText: 2022-2024 Igor Mironchik <igor.mironchik@gmail.com>
3 SPDX-License-Identifier: MIT
4*/
5
6#ifndef MD4QT_MD_PARSER_HPP_INCLUDED
7#define MD4QT_MD_PARSER_HPP_INCLUDED
8
9// md4qt include.
10#include "doc.h"
11#include "entities_map.h"
12#include "traits.h"
13#include "utils.h"
14
15#ifdef MD4QT_QT_SUPPORT
16
17// Qt include.
18#include <QDir>
19#include <QFile>
20#include <QTextStream>
21
22#endif // MD4QT_QT_SUPPORT
23
24#ifdef MD4QT_ICU_STL_SUPPORT
25
26// C++ include.
27#include <exception>
28
29#endif // MD4QT_ICU_STL_SUPPORT
30
31// C++ include.
32#include <algorithm>
33#include <cassert>
34#include <cmath>
35#include <fstream>
36#include <functional>
37#include <memory>
38#include <set>
39#include <tuple>
40#include <unordered_map>
41#include <vector>
42
43namespace MD
44{
45
46//! Starting HTML comment string.
47static const char *s_startComment = "<!--";
48
49//! \return Is \p indent indent belongs to list with previous \p indents indents.
50inline bool
51indentInList(const std::vector<long long int> *indents,
52 long long int indent,
53 bool codeIndentedBySpaces)
54{
55 if (indents && !indents->empty()) {
56 return (std::find_if(indents->cbegin(),
57 indents->cend(),
58 [indent, codeIndentedBySpaces](const auto &v) {
59 return (indent >= v && (codeIndentedBySpaces ?
60 true : indent <= v + 3));
61 })
62 != indents->cend());
63 } else {
64 return false;
65 }
66}
67
68//! Skip spaces in line from position \p i.
69template<class Trait>
70inline long long int
71skipSpaces(long long int i, const typename Trait::String &line)
72{
73 const auto length = line.length();
74
75 while (i < length && line[i].isSpace()) {
76 ++i;
77 }
78
79 return i;
80}
81
82
83//! Remove spaces at the end of string \p s.
84template<class String>
85inline void
87{
88 long long int i = s.length() - 1;
89
90 for (; i >= 0; --i) {
91 if (!s[i].isSpace()) {
92 break;
93 }
94 }
95
96 if (i != s.length() - 1) {
97 s.remove(i + 1, s.length() - i - 1);
98 }
99}
100
101//! \return Last non-space character position.
102template<class Trait>
103inline long long int
104lastNonSpacePos(const typename Trait::String &line)
105{
106 long long int i = line.length() - 1;
107
108 while (i > 0 && line[i].isSpace()) {
109 --i;
110 }
111
112 return i;
113}
114
115//! \return Starting sequence of the same characters.
116template<class Trait>
117inline typename Trait::String
118startSequence(const typename Trait::String &line)
119{
120 auto pos = skipSpaces<Trait>(0, line);
121
122 if (pos >= line.length()) {
123 return {};
124 }
125
126 const auto sch = line[pos];
127 const auto start = pos;
128
129 ++pos;
130
131 while (pos < line.length() && line[pos] == sch) {
132 ++pos;
133 }
134
135 return line.sliced(start, pos - start);
136}
137
138//! \return Is string an ordered list.
139template<class Trait>
140inline bool
141isOrderedList(const typename Trait::String &s,
142 int *num = nullptr,
143 int *len = nullptr,
144 typename Trait::Char *delim = nullptr,
145 bool *isFirstLineEmpty = nullptr)
146{
147 long long int p = skipSpaces<Trait>(0, s);
148
149 long long int dp = p;
150
151 for (; p < s.size(); ++p) {
152 if (!s[p].isDigit()) {
153 break;
154 }
155 }
156
157 if (dp != p && p < s.size()) {
158 const auto digits = s.sliced(dp, p - dp);
159
160 if (digits.size() > 9) {
161 return false;
162 }
163
164 const auto i = digits.toInt();
165
166 if (num) {
167 *num = i;
168 }
169
170 if (len) {
171 *len = p - dp;
172 }
173
174 if (s[p] == Trait::latin1ToChar('.') || s[p] == Trait::latin1ToChar(')')) {
175 if (delim) {
176 *delim = s[p];
177 }
178
179 ++p;
180
181 long long int tmp = skipSpaces<Trait>(p, s);
182
183 if (isFirstLineEmpty) {
184 *isFirstLineEmpty = (tmp == s.size());
185 }
186
187 if ((p < s.size() && s[p] == Trait::latin1ToChar(' ')) || p == s.size()) {
188 return true;
189 }
190 }
191 }
192
193 return false;
194}
195
196//
197// RawHtmlBlock
198//
199
200//! Internal structure for pre-storing HTML.
201template<class Trait>
203 std::shared_ptr<RawHtml<Trait>> m_html = {};
204 std::shared_ptr<Block<Trait>> m_parent = {};
205 std::shared_ptr<Block<Trait>> m_topParent = {};
206 using SequenceOfBlock = std::vector<std::pair<std::shared_ptr<Block<Trait>>, long long int>>;
208 std::unordered_map<std::shared_ptr<Block<Trait>>, SequenceOfBlock> m_toAdjustLastPos = {};
210 bool m_continueHtml = false;
211 bool m_onLine = false;
212
213 std::shared_ptr<Block<Trait>>
214 findParent(long long int indent) const
215 {
216 for (auto it = m_blocks.crbegin(), last = m_blocks.crend(); it != last; ++it) {
217 if (indent >= it->second) {
218 return it->first;
219 }
220 }
221
222 return nullptr;
223 }
224}; // struct RawHtmlBlock
225
226//! Reset pre-stored HTML.
227template<class Trait>
229{
230 html.m_html.reset();
231 html.m_parent.reset();
232 html.m_htmlBlockType = -1;
233 html.m_continueHtml = false;
234 html.m_onLine = false;
235}
236
237//
238// MdLineData
239//
240
241//! Internal structure for auxiliary information about a line in Markdown.
243 long long int m_lineNumber = -1;
244 using CommentData = std::pair<char, bool>;
245 using CommentDataMap = std::map<long long int, CommentData>;
246 // std::pair< closed, valid >
248}; // struct MdLineData
249
250//
251// MdBlock
252//
253
254//! Internal structure for block of text in Markdown.
255template<class Trait>
256struct MdBlock {
257 using Line = std::pair<typename Trait::InternalString, MdLineData>;
258 using Data = typename Trait::template Vector<Line>;
259
261 long long int m_emptyLinesBefore = 0;
262 bool m_emptyLineAfter = true;
263}; // struct MdBlock
264
265//
266// StringListStream
267//
268
269//! Wrapper for typename Trait::StringList to be behaved like a stream.
270template<class Trait>
272{
273public:
275 : m_stream(stream)
276 , m_pos(0)
277 {
278 }
279
280 bool atEnd() const
281 {
282 return (m_pos >= (long long int)m_stream.size());
283 }
284 typename Trait::InternalString readLine()
285 {
286 return m_stream.at(m_pos++).first;
287 }
288 long long int currentLineNumber() const
289 {
290 return (m_pos < size() ? m_stream.at(m_pos).second.m_lineNumber : size());
291 }
292 typename Trait::InternalString lineAt(long long int pos)
293 {
294 return m_stream.at(pos).first;
295 }
296 long long int size() const
297 {
298 return m_stream.size();
299 }
300
301private:
302 typename MdBlock<Trait>::Data &m_stream;
303 long long int m_pos;
304}; // class StringListStream
305
306inline bool
307checkStack(std::vector<std::pair<std::pair<long long int, bool>, int>> &s,
308 const std::pair<std::pair<long long int, bool>, int> &v,
309 size_t idx)
310{
311 int value = -v.first.first;
312
313 for (long long int i = s.size() - 1; i >= 0; --i) {
314 if (s[i].second == v.second && s[i].first.first > 0) {
315 // Check for rule of multiplies of 3. Look at CommonMark 0.30 example 411.
316 if (!((s[i].first.second || v.first.second) &&
317 (s[i].first.first + value) % 3 == 0 &&
318 !(s[i].first.first % 3 == 0 && value % 3 == 0))) {
319 if (s[i].first.first - value <= 0) {
320 if (i == (long long int)idx) {
321 return true;
322 }
323
324 value -= s[i].first.first;
325
326 s.erase(s.cbegin() + i, s.cend());
327
328 if (value == 0) {
329 break;
330 }
331 } else {
332 s[i].first.first -= value;
333
334 s.erase(s.cbegin() + i + 1, s.cend());
335
336 break;
337 }
338 }
339 }
340
341 if (i == 0) {
342 break;
343 }
344 }
345
346 return false;
347}
348
349//! \return Is string a footnote?
350template<class Trait>
351inline bool
352isFootnote(const typename Trait::String &s)
353{
354 long long int p = skipSpaces<Trait>(0, s);
355
356 if (s.size() - p < 5) {
357 return false;
358 }
359
360 if (s[p++] != Trait::latin1ToChar('[')) {
361 return false;
362 }
363
364 if (s[p++] != Trait::latin1ToChar('^')) {
365 return false;
366 }
367
368 if (s[p] == Trait::latin1ToChar(']') || s[p].isSpace()) {
369 return false;
370 }
371
372 for (; p < s.size(); ++p) {
373 if (s[p] == Trait::latin1ToChar(']')) {
374 break;
375 } else if (s[p].isSpace()) {
376 return false;
377 }
378 }
379
380 ++p;
381
382 if (p < s.size() && s[p] == Trait::latin1ToChar(':')) {
383 return true;
384 } else {
385 return false;
386 }
387}
388
389//! \return Is string a code fences?
390template<class Trait>
391inline bool
392isCodeFences(const typename Trait::String &s, bool closing = false)
393{
394 auto p = skipSpaces<Trait>(0, s);
395
396 if (p > 3 || p == s.length()) {
397 return false;
398 }
399
400 const auto ch = s[p];
401
402 if (ch != Trait::latin1ToChar('~') && ch != Trait::latin1ToChar('`')) {
403 return false;
404 }
405
406 bool space = false;
407
408 long long int c = 1;
409 ++p;
410
411 for (; p < s.length(); ++p) {
412 if (s[p].isSpace()) {
413 space = true;
414 } else if (s[p] == ch) {
415 if (space && (closing ? true : ch == Trait::latin1ToChar('`'))) {
416 return false;
417 }
418
419 if (!space) {
420 ++c;
421 }
422 } else if (closing) {
423 return false;
424 } else {
425 break;
426 }
427 }
428
429 if (c < 3) {
430 return false;
431 }
432
433 if (ch == Trait::latin1ToChar('`')) {
434 for (; p < s.length(); ++p) {
435 if (s[p] == Trait::latin1ToChar('`')) {
436 return false;
437 }
438 }
439 }
440
441 return true;
442}
443
444//! Skip escaped sequence of characters till first space.
445template<class Trait>
446inline typename Trait::String
447readEscapedSequence(long long int i,
448 const typename Trait::String &str,
449 long long int *endPos = nullptr)
450{
451 bool backslash = false;
452 const auto start = i;
453
454 if (start >= str.length()) {
455 return {};
456 }
457
458 while (i < str.length()) {
459 bool now = false;
460
461 if (str[i] == Trait::latin1ToChar('\\') && !backslash) {
462 backslash = true;
463 now = true;
464 } else if (str[i].isSpace() && !backslash) {
465 break;
466 }
467
468 if (!now) {
469 backslash = false;
470 }
471
472 ++i;
473 }
474
475 if (endPos) {
476 *endPos = i - 1;
477 }
478
479 return str.sliced(start, i - start);
480}
481
482//! Characters that can be escaped.
483template<class Trait>
484static const typename Trait::String s_canBeEscaped =
485 Trait::latin1ToString("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
486
487//! Remove backslashes from the string.
488template<class String, class Trait>
489inline String
490removeBackslashes(const String &s)
491{
492 String r = s;
493 bool backslash = false;
494 long long int extra = 0;
495
496 for (long long int i = 0; i < s.length(); ++i) {
497 bool now = false;
498
499 if (s[i] == Trait::latin1ToChar('\\') && !backslash && i != s.length() - 1) {
500 backslash = true;
501 now = true;
502 } else if (s_canBeEscaped<Trait>.contains(s[i]) && backslash) {
503 r.remove(i - extra - 1, 1);
504 ++extra;
505 }
506
507 if (!now) {
508 backslash = false;
509 }
510 }
511
512 return r;
513}
514
515//! \return Is string a start of code?
516template<class Trait>
517inline bool
518isStartOfCode(const typename Trait::String &str,
519 typename Trait::String *syntax = nullptr,
520 WithPosition *delim = nullptr,
521 WithPosition *syntaxPos = nullptr)
522{
523 long long int p = skipSpaces<Trait>(0, str);
524
525 if (delim) {
526 delim->setStartColumn(p);
527 }
528
529 if (p > 3) {
530 return false;
531 }
532
533 if (str.size() - p < 3) {
534 return false;
535 }
536
537 const bool c96 = str[p] == Trait::latin1ToChar('`');
538 const bool c126 = str[p] == Trait::latin1ToChar('~');
539
540 if (c96 || c126) {
541 ++p;
542 long long int c = 1;
543
544 while (p < str.length()) {
545 if (str[p] != (c96 ? Trait::latin1ToChar('`') : Trait::latin1ToChar('~'))) {
546 break;
547 }
548
549 ++c;
550 ++p;
551 }
552
553 if (delim) {
554 delim->setEndColumn(p - 1);
555 }
556
557 if (c < 3) {
558 return false;
559 }
560
561 if (syntax) {
562 p = skipSpaces<Trait>(p, str);
563 long long int endSyntaxPos = p;
564
565 if (p < str.size()) {
567 readEscapedSequence<Trait>(p, str, &endSyntaxPos));
568
569 if (syntaxPos) {
570 syntaxPos->setStartColumn(p);
571 syntaxPos->setEndColumn(endSyntaxPos);
572 }
573 }
574 }
575
576 return true;
577 }
578
579 return false;
580}
581
582//! \return Is string a horizontal line?
583template<class Trait>
584inline bool
585isHorizontalLine(const typename Trait::String &s)
586{
587 if (s.size() < 3) {
588 return false;
589 }
590
591 typename Trait::Char c;
592
593 if (s[0] == Trait::latin1ToChar('*')) {
594 c = Trait::latin1ToChar('*');
595 } else if (s[0] == Trait::latin1ToChar('-')) {
596 c = Trait::latin1ToChar('-');
597 } else if (s[0] == Trait::latin1ToChar('_')) {
598 c = Trait::latin1ToChar('_');
599 } else {
600 return false;
601 }
602
603 long long int p = 1;
604 long long int count = 1;
605
606 for (; p < s.size(); ++p) {
607 if (s[p] != c && !s[p].isSpace()) {
608 break;
609 } else if (s[p] == c) {
610 ++count;
611 }
612 }
613
614 if (count < 3) {
615 return false;
616 }
617
618 if (p == s.size()) {
619 return true;
620 }
621
622 return false;
623}
624
625//! \return Is string a column alignment?
626template<class Trait>
627inline bool
628isColumnAlignment(const typename Trait::String &s)
629{
630 long long int p = skipSpaces<Trait>(0, s);
631
632 static const typename Trait::String s_legitime = Trait::latin1ToString(":-");
633
634 if (p >= s.length()) {
635 return false;
636 }
637
638 if (!s_legitime.contains(s[p])) {
639 return false;
640 }
641
642 if (s[p] == Trait::latin1ToChar(':')) {
643 ++p;
644 }
645
646 for (; p < s.size(); ++p) {
647 if (s[p] != Trait::latin1ToChar('-')) {
648 break;
649 }
650 }
651
652 if (p == s.size()) {
653 return true;
654 }
655
656 if (s[p] != Trait::latin1ToChar(':') && !s[p].isSpace()) {
657 return false;
658 }
659
660 ++p;
661
662 for (; p < s.size(); ++p) {
663 if (!s[p].isSpace()) {
664 return false;
665 }
666 }
667
668 return true;
669}
670
671//! Split string.
672template<class Trait>
673typename Trait::StringList
674splitString(const typename Trait::String &str, const typename Trait::Char &ch);
675
676#ifdef MD4QT_ICU_STL_SUPPORT
677
678template<>
679inline typename UnicodeStringTrait::StringList
680splitString<UnicodeStringTrait>(const UnicodeString &str, const UnicodeChar &ch)
681{
682 return str.split(ch);
683}
684
685#endif
686
687#ifdef MD4QT_QT_SUPPORT
688
689template<>
690inline typename QStringTrait::StringList
691splitString<QStringTrait>(const QString &str, const QChar &ch)
692{
693 return str.split(ch, Qt::SkipEmptyParts);
694}
695
696#endif
697
698//! \return Number of columns?
699template<class Trait>
700inline int
701isTableAlignment(const typename Trait::String &s)
702{
703 const auto columns = splitString<Trait>(s.simplified(), Trait::latin1ToChar('|'));
704
705 for (const auto &c : columns) {
706 if (!isColumnAlignment<Trait>(c)) {
707 return 0;
708 }
709 }
710
711 return columns.size();
712}
713
714//! \return Is given string a HTML comment.
715template<class Trait>
716inline bool
717isHtmlComment(const typename Trait::String &s)
718{
719 auto c = s;
720
721 if (s.startsWith(Trait::latin1ToString(s_startComment))) {
722 c.remove(0, 4);
723 } else {
724 return false;
725 }
726
727 long long int p = -1;
728 bool endFound = false;
729
730 while ((p = c.indexOf(Trait::latin1ToString("--"), p + 1)) > -1) {
731 if (c.size() > p + 2 && c[p + 2] == Trait::latin1ToChar('>')) {
732 if (!endFound) {
733 endFound = true;
734 } else {
735 return false;
736 }
737 } else if (p - 2 >= 0 && c.sliced(p - 2, 4) == Trait::latin1ToString("<!--")) {
738 return false;
739 } else if (c.size() > p + 3 && c.sliced(p, 4) == Trait::latin1ToString("--!>")) {
740 return false;
741 }
742 }
743
744 return endFound;
745}
746
747//! Replace entities in the string with corresponding character.
748template<class Trait>
749inline typename Trait::String
750replaceEntity(const typename Trait::String &s)
751{
752 long long int p1 = 0;
753
754 typename Trait::String res;
755 long long int i = 0;
756
757 while ((p1 = s.indexOf(Trait::latin1ToChar('&'), p1)) != -1) {
758 if (p1 > 0 && s[p1 - 1] == Trait::latin1ToChar('\\')) {
759 ++p1;
760
761 continue;
762 }
763
764 const auto p2 = s.indexOf(Trait::latin1ToChar(';'), p1);
765
766 if (p2 != -1) {
767 const auto en = s.sliced(p1, p2 - p1 + 1);
768
769 if (en.size() > 2 && en[1] == Trait::latin1ToChar('#')) {
770 if (en.size() > 3 && en[2].toLower() == Trait::latin1ToChar('x')) {
771 const auto hex = en.sliced(3, en.size() - 4);
772
773 if (hex.size() <= 6 && hex.size() > 0) {
774 bool ok = false;
775
776 const char32_t c = hex.toInt(&ok, 16);
777
778 if (ok) {
779 res.push_back(s.sliced(i, p1 - i));
780 i = p2 + 1;
781
782 if (c) {
783 Trait::appendUcs4(res, c);
784 } else {
785 res.push_back(typename Trait::Char(0xFFFD));
786 }
787 }
788 }
789 } else {
790 const auto dec = en.sliced(2, en.size() - 3);
791
792 if (dec.size() <= 7 && dec.size() > 0) {
793 bool ok = false;
794
795 const char32_t c = dec.toInt(&ok, 10);
796
797 if (ok) {
798 res.push_back(s.sliced(i, p1 - i));
799 i = p2 + 1;
800
801 if (c) {
802 Trait::appendUcs4(res, c);
803 } else {
804 res.push_back(typename Trait::Char(0xFFFD));
805 }
806 }
807 }
808 }
809 } else {
810 const auto it = s_entityMap<Trait>.find(en);
811
812 if (it != s_entityMap<Trait>.cend()) {
813 res.push_back(s.sliced(i, p1 - i));
814 i = p2 + 1;
815 res.push_back(Trait::utf16ToString(it->second));
816 }
817 }
818 } else {
819 break;
820 }
821
822 p1 = p2 + 1;
823 }
824
825 res.push_back(s.sliced(i, s.size() - i));
826
827 return res;
828}
829
830//! Remove backslashes in block.
831template<class Trait>
832inline typename MdBlock<Trait>::Data
834{
835 auto tmp = d;
836
837 for (auto &line : tmp) {
839 }
840
841 return tmp;
842}
843
844//! Type of the paragraph's optimization.
846 //! Full optimization.
847 Full,
848 //! Semi optimization, optimization won't concatenate text
849 //! items if style delimiters will be in the middle.
850 Semi,
851 //! Full optimization, but raw text data won't be concatenated (will be untouched).
853 //! Semi optimization, but raw text data won't be concatenated (will be untouched).
855};
856
857//
858// TextPlugin
859//
860
861//! ID of text plugin.
862enum TextPlugin : int {
863 //! Unknown plugin.
865 //! GitHub's autolinks plugin.
867 //! First user defined plugin ID.
869}; // enum TextPlugin
870
871//
872// Style
873//
874
875//! Emphasis type.
876enum class Style {
877 //! "*"
878 Italic1,
879 //! "_"
880 Italic2,
881 //! "**"
882 Bold1,
883 //! "__"
884 Bold2,
885 //! "~"
887 //! Unknown.
888 Unknown
889};
890
891//! \return Text option from style.
892inline TextOption
894{
895 switch (s) {
896 case Style::Italic1:
897 case Style::Italic2:
898 return ItalicText;
899
900 case Style::Bold1:
901 case Style::Bold2:
902 return BoldText;
903
905 return StrikethroughText;
906
907 default:
908 return TextWithoutFormat;
909 }
910}
911
912//
913// TextPluginFunc
914//
915
916template<class Trait>
917struct TextParsingOpts;
918
919//! Functor type for text plugin.
920template<class Trait>
921using TextPluginFunc = std::function<void(std::shared_ptr<Paragraph<Trait>>,
923 const typename Trait::StringList &)>;
924
925//
926// TextPluginsMap
927//
928
929//! Type of the map of text plugins.
930template<class Trait>
931using TextPluginsMap = std::map<int, std::tuple<TextPluginFunc<Trait>,
932 bool,
933 typename Trait::StringList>>;
934
935//
936// TextParsingOpts
937//
938
939//! Internal structure for auxiliary options for parser.
940template<class Trait>
943 std::shared_ptr<Block<Trait>> m_parent;
944 std::shared_ptr<RawHtml<Trait>> m_tmpHtml;
945 std::shared_ptr<Document<Trait>> m_doc;
946 typename Trait::StringList &m_linksToParse;
947 typename Trait::String m_workingPath;
948 typename Trait::String m_fileName;
953 std::shared_ptr<Text<Trait>> m_lastText = {};
954 bool m_wasRefLink = false;
957
958 struct TextData {
959 typename Trait::String m_str;
960 long long int m_pos = -1;
961 long long int m_line = -1;
962 };
963
964 std::vector<TextData> m_rawTextData = {};
965
966 inline void
967 concatenateAuxText(long long int start, long long int end)
968 {
969 if (start < end && (end - start > 1)) {
970 for (auto i = start + 1; i < end; ++i) {
971 m_rawTextData[start].m_str += m_rawTextData[i].m_str;
972 }
973
974 m_rawTextData.erase(m_rawTextData.cbegin() + start + 1, m_rawTextData.cbegin() + end);
975 }
976 }
977
978 enum class Detected { Nothing = 0, Table = 1, HTML = 2, List = 3, Code = 4 }; // enum class Detected
979
981
982 inline bool
984 {
985 switch (m_detected) {
986 case Detected::Table:
987 case Detected::List:
988 case Detected::Code:
989 return true;
990
991 default:
992 return false;
993 }
994 }
995
996 long long int m_line = 0;
997 long long int m_pos = 0;
998 long long int m_startTableLine = -1;
999 long long int m_lastTextLine = -1;
1000 long long int m_lastTextPos = -1;
1003 std::vector<std::pair<Style, long long int>> m_styles = {};
1005 std::shared_ptr<ItemWithOpts<Trait>> m_lastItemWithStyle = nullptr;
1006}; // struct TextParsingOpts
1007
1008//
1009// virginSubstr
1010//
1011
1012//! \return Substring from fragment with given virgin positions.
1013template<class Trait>
1014inline typename Trait::String
1015virginSubstr(const MdBlock<Trait> &fr, const WithPosition &virginPos)
1016{
1017 if (fr.m_data.empty()) {
1018 return {};
1019 }
1020
1021 long long int startLine = virginPos.startLine() < fr.m_data.at(0).second.m_lineNumber ?
1022 (virginPos.endLine() < fr.m_data.at(0).second.m_lineNumber ? -1 : 0) :
1023 virginPos.startLine() - fr.m_data.at(0).second.m_lineNumber;
1024
1025 if (startLine >= static_cast<long long int>(fr.m_data.size()) || startLine < 0) {
1026 return {};
1027 }
1028
1029 auto spos = virginPos.startColumn() - fr.m_data.at(startLine).first.virginPos(0);
1030
1031 if (spos < 0) {
1032 spos = 0;
1033 }
1034
1035 long long int epos = 0;
1036 long long int linesCount = virginPos.endLine() - virginPos.startLine() -
1037 (virginPos.startLine() < fr.m_data.at(0).second.m_lineNumber ?
1038 fr.m_data.at(0).second.m_lineNumber - virginPos.startLine() : 0);
1039
1040 if (startLine + linesCount > static_cast<long long int>(fr.m_data.size())) {
1041 linesCount = fr.m_data.size() - startLine - 1;
1042 epos = fr.m_data.back().first.length();
1043 } else {
1044 epos = virginPos.endColumn() - fr.m_data.at(linesCount + startLine).first.virginPos(0) + 1;
1045 }
1046
1047 if (epos < 0) {
1048 epos = 0;
1049 }
1050
1051 if (epos > fr.m_data.at(linesCount + startLine).first.length()) {
1052 epos = fr.m_data.at(linesCount + startLine).first.length();
1053 }
1054
1055 typename Trait::String str =
1056 (linesCount ? fr.m_data.at(startLine).first.sliced(spos).asString() :
1057 fr.m_data.at(startLine).first.sliced(spos, epos - spos).asString());
1058
1059 long long int i = startLine + 1;
1060
1061 for (; i < startLine + linesCount; ++i) {
1062 str.push_back(Trait::latin1ToString("\n"));
1063 str.push_back(fr.m_data.at(i).first.asString());
1064 }
1065
1066 if (linesCount) {
1067 str.push_back(Trait::latin1ToString("\n"));
1068 str.push_back(fr.m_data.at(i).first.sliced(0, epos).asString());
1069 }
1070
1071 return str;
1072}
1073
1074//
1075// localPosFromVirgin
1076//
1077
1078//! \return Local position ( { column, line } ) in fragment for given virgin position if exists.
1079//! \return { -1, -1 } if there is no given position.
1080template<class Trait>
1081inline std::pair<long long int, long long int>
1082localPosFromVirgin(const MdBlock<Trait> &fr, long long int virginColumn, long long int virginLine)
1083{
1084 if (fr.m_data.empty()) {
1085 return {-1, -1};
1086 }
1087
1088 if (fr.m_data.front().second.m_lineNumber > virginLine ||
1089 fr.m_data.back().second.m_lineNumber < virginLine) {
1090 return {-1, -1};
1091 }
1092
1093 auto line = virginLine - fr.m_data.front().second.m_lineNumber;
1094
1095 if (fr.m_data.at(line).first.isEmpty()) {
1096 return {-1, -1};
1097 }
1098
1099 const auto vzpos = fr.m_data.at(line).first.virginPos(0);
1100
1101 if (vzpos > virginColumn || virginColumn > vzpos + fr.m_data.at(line).first.length() - 1) {
1102 return {-1, -1};
1103 }
1104
1105 return {virginColumn - vzpos, line};
1106}
1107
1108//
1109// GitHubAutolinkPlugin
1110//
1111
1112/*
1113 "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?"
1114 "(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
1115*/
1116//! \return Is the given string a valid email?
1117template<class Trait>
1118inline bool
1119isEmail(const typename Trait::String &url)
1120{
1121 auto isAllowed = [](const typename Trait::Char &ch) -> bool {
1122 const auto unicode = ch.unicode();
1123 return ((unicode >= 48 && unicode <= 57) || (unicode >= 97 && unicode <= 122) ||
1124 (unicode >= 65 && unicode <= 90));
1125 };
1126
1127 auto isAdditional = [](const typename Trait::Char &ch) -> bool {
1128 const auto unicode = ch.unicode();
1129 return (unicode == 33 || (unicode >= 35 && unicode <= 39) ||
1130 unicode == 42 || unicode == 43 || (unicode >= 45 && unicode <= 47) ||
1131 unicode == 61 || unicode == 63 || (unicode >= 94 && unicode <= 96) ||
1132 (unicode >= 123 && unicode <= 126));
1133 };
1134
1135 static const auto s_delim = Trait::latin1ToChar('-');
1136 static const auto s_dog = Trait::latin1ToChar('@');
1137 static const auto s_dot = Trait::latin1ToChar('.');
1138
1139 long long int i = (url.startsWith(Trait::latin1ToString("mailto:")) ? 7 : 0);
1140 const auto dogPos = url.indexOf(s_dog, i);
1141
1142 if (dogPos != -1) {
1143 if (i == dogPos) {
1144 return false;
1145 }
1146
1147 for (; i < dogPos; ++i) {
1148 if (!isAllowed(url[i]) && !isAdditional(url[i])) {
1149 return false;
1150 }
1151 }
1152
1153 auto checkToDot = [&](long long int start, long long int dotPos) -> bool {
1154 static const long long int maxlen = 63;
1155
1156 if (dotPos - start > maxlen ||
1157 start + 1 > dotPos ||
1158 start >= url.length() ||
1159 dotPos > url.length()) {
1160 return false;
1161 }
1162
1163 if (url[start] == s_delim) {
1164 return false;
1165 }
1166
1167 if (url[dotPos - 1] == s_delim) {
1168 return false;
1169 }
1170
1171 for (; start < dotPos; ++start) {
1172 if (!isAllowed(url[start]) && url[start] != s_delim) {
1173 return false;
1174 }
1175 }
1176
1177 return true;
1178 };
1179
1180 long long int dotPos = url.indexOf(s_dot, dogPos + 1);
1181
1182 if (dotPos != -1) {
1183 i = dogPos + 1;
1184
1185 while (dotPos != -1) {
1186 if (!checkToDot(i, dotPos)) {
1187 return false;
1188 }
1189
1190 i = dotPos + 1;
1191 dotPos = url.indexOf(s_dot, i);
1192 }
1193
1194 if (!checkToDot(i, url.length())) {
1195 return false;
1196 }
1197
1198 return true;
1199 }
1200 }
1201
1202 return false;
1203}
1204
1205//! \return Is the fiven string a valid URL?
1206template<class Trait>
1207inline bool
1208isValidUrl(const typename Trait::String &url);
1209
1210//! \return Is the given string a GitHub autolink?
1211template<class Trait>
1212inline bool
1213isGitHubAutolink(const typename Trait::String &url);
1214
1215#ifdef MD4QT_QT_SUPPORT
1216
1217template<>
1218inline bool
1220{
1221 const QUrl u(url, QUrl::StrictMode);
1222
1223 return (u.isValid() && !u.isRelative());
1224}
1225
1226template<>
1227inline bool
1229{
1230 const QUrl u(url, QUrl::StrictMode);
1231
1232 return (u.isValid()
1233 && ((!u.scheme().isEmpty() && !u.host().isEmpty())
1234 || (url.startsWith(QStringLiteral("www.")) && url.length() >= 7 &&
1235 url.indexOf(QLatin1Char('.'), 4) != -1)));
1236}
1237
1238#endif
1239
1240#ifdef MD4QT_ICU_STL_SUPPORT
1241
1242template<>
1243inline bool
1244isValidUrl<UnicodeStringTrait>(const UnicodeString &url)
1245{
1246 const UrlUri u(url);
1247
1248 return (u.isValid() && !u.isRelative());
1249}
1250
1251template<>
1252inline bool
1253isGitHubAutolink<UnicodeStringTrait>(const UnicodeString &url)
1254{
1255 const UrlUri u(url);
1256
1257 return (u.isValid()
1258 && ((!u.scheme().isEmpty() && !u.host().isEmpty())
1259 || (url.startsWith(UnicodeString("www.")) && url.length() >= 7 &&
1260 url.indexOf(UnicodeChar('.'), 4) != -1)));
1261}
1262
1263#endif
1264
1265//! Process GitHub autolinks for the text with index \p idx.
1266template<class Trait>
1267inline long long int
1270 long long int idx)
1271{
1272 if (idx < 0 || idx >= (long long int)po.m_rawTextData.size()) {
1273 return idx;
1274 }
1275
1276 static const auto s_delims = Trait::latin1ToString("*_~()<>");
1277 auto s = po.m_rawTextData[idx];
1278 bool first = true;
1279 long long int j = 0;
1280 auto end = typename Trait::Char(0x00);
1281 bool skipSpace = true;
1282 long long int ret = idx;
1283
1284 while (s.m_str.length()) {
1285 long long int i = 0;
1286 end = typename Trait::Char(0x00);
1287
1288 for (; i < s.m_str.length(); ++i) {
1289 if (first) {
1290 if (s.m_str[i] == Trait::latin1ToChar('(')) {
1291 end = Trait::latin1ToChar(')');
1292 }
1293
1294 if (s_delims.indexOf(s.m_str[i]) == -1 && !s.m_str[i].isSpace()) {
1295 first = false;
1296 j = i;
1297 }
1298 } else {
1299 if (s.m_str[i].isSpace() || i == s.m_str.length() - 1 || s.m_str[i] == end) {
1300 auto tmp = s.m_str.sliced(j, i - j +
1301 (i == s.m_str.length() - 1 && s.m_str[i] != end && !s.m_str[i].isSpace() ?
1302 1 : 0));
1303 skipSpace = s.m_str[i].isSpace();
1304
1305 const auto email = isEmail<Trait>(tmp);
1306
1307 if (isGitHubAutolink<Trait>(tmp) || email) {
1308 auto ti = textAtIdx(p, idx);
1309
1310 if (ti >= 0 && ti < static_cast<long long int>(p->items().size())) {
1311 typename ItemWithOpts<Trait>::Styles openStyles, closeStyles;
1312 const auto opts = std::static_pointer_cast<Text<Trait>>(p->items().at(ti))->opts();
1313
1314 if (j == 0 || s.m_str.sliced(0, j).isEmpty()) {
1315 openStyles = std::static_pointer_cast<ItemWithOpts<Trait>>(p->items().at(ti))->openStyles();
1316 closeStyles = std::static_pointer_cast<ItemWithOpts<Trait>>(p->items().at(ti))->closeStyles();
1317 p->removeItemAt(ti);
1318 po.m_rawTextData.erase(po.m_rawTextData.cbegin() + idx);
1319 --ret;
1320 } else {
1321 const auto tmp = s.m_str.sliced(0, j);
1322
1323 auto t = std::static_pointer_cast<Text<Trait>>(p->items().at(ti));
1324 t->setEndColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + j - 1));
1325 closeStyles = t->closeStyles();
1326 t->closeStyles() = {};
1327 po.m_rawTextData[idx].m_str = tmp;
1328 ++idx;
1330 ++ti;
1331 }
1332
1333 std::shared_ptr<Link<Trait>> lnk(new Link<Trait>);
1334 lnk->setStartColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + j));
1335 lnk->setStartLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1336 lnk->setEndColumn(
1337 po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + i -
1338 (i == s.m_str.length() - 1 && s.m_str[i] != end && !s.m_str[i].isSpace() ?
1339 0 : 1)));
1340 lnk->setEndLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1341 lnk->openStyles() = openStyles;
1342 lnk->setTextPos({lnk->startColumn(), lnk->startLine(), lnk->endColumn(), lnk->endLine()});
1343 lnk->setUrlPos(lnk->textPos());
1344
1345 if (email && !tmp.toLower().startsWith(Trait::latin1ToString("mailto:"))) {
1346 tmp = Trait::latin1ToString("mailto:") + tmp;
1347 }
1348
1349 if (!email && tmp.toLower().startsWith(Trait::latin1ToString("www."))) {
1350 tmp = Trait::latin1ToString("http://") + tmp;
1351 }
1352
1353 lnk->setUrl(tmp);
1354 lnk->setOpts(opts);
1355 p->insertItem(ti, lnk);
1356
1357 s.m_pos += i + (s.m_str[i] == end || s.m_str[i].isSpace() ? 0 : 1);
1358 s.m_str.remove(0, i + (s.m_str[i] == end || s.m_str[i].isSpace() ? 0 : 1));
1359 j = 0;
1360 i = 0;
1361
1362 if (!s.m_str.isEmpty()) {
1363 po.m_rawTextData.insert(po.m_rawTextData.cbegin() + idx, s);
1364 ++ret;
1365
1366 auto t = std::make_shared<Text<Trait>>();
1367 t->setStartColumn(po.m_fr.m_data[s.m_line].first.virginPos(s.m_pos));
1368 t->setStartLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1369 t->setEndLine(po.m_fr.m_data.at(s.m_line).second.m_lineNumber);
1370 t->setEndColumn(po.m_fr.m_data.at(s.m_line).first.virginPos(s.m_pos + s.m_str.length() - 1));
1372 t->closeStyles() = closeStyles;
1373 p->insertItem(ti + 1, t);
1374 } else {
1375 lnk->closeStyles() = closeStyles;
1376 }
1377
1378 break;
1379 }
1380 }
1381
1382 j = i + (skipSpace ? 1 : 0);
1383 }
1384 }
1385 }
1386
1387 first = true;
1388
1389 if (i == s.m_str.length()) {
1390 break;
1391 }
1392 }
1393
1394 return ret;
1395}
1396
1397//! GitHub autolinks plugin.
1398template<class Trait>
1399inline void
1402 const typename Trait::StringList &)
1403{
1404 if (!po.m_collectRefLinks) {
1405 long long int i = 0;
1406
1407 while (i >= 0 && i < (long long int)po.m_rawTextData.size()) {
1408 i = processGitHubAutolinkExtension(p, po, i);
1409
1410 ++i;
1411 }
1412 }
1413}
1414
1415//
1416// Parser
1417//
1418
1419//! Markdown parser.
1420template<class Trait>
1421class Parser final
1422{
1423public:
1428
1429 ~Parser() = default;
1430
1431 //! \return Parsed Markdown document.
1432 std::shared_ptr<Document<Trait>>
1433 parse(
1434 //! File name of the Markdown document.
1435 const typename Trait::String &fileName,
1436 //! Should parsing be recursive? If recursive all links to existing Markdown
1437 //! files will be parsed and presented in the returned document.
1438 bool recursive = true,
1439 //! Allowed extensions for Markdonw document files. If Markdown file doesn't
1440 //! have given extension it will be ignored.
1441 const typename Trait::StringList &ext = {Trait::latin1ToString("md"), Trait::latin1ToString("markdown")},
1442 //! Make full optimization, or just semi one. In full optimization
1443 //! text items with one style but with some closing delimiters
1444 //! in the middle will be concatenated in one, like in **text* text*,
1445 //! here in full optimization will be "text text" with 2 open/close
1446 //! style delimiters, but one closing delimiter is in the middle.
1447 bool fullyOptimizeParagraphs = true);
1448
1449 //! \return Parsed Markdown document.
1450 std::shared_ptr<Document<Trait>>
1451 parse(
1452 //! Stream to parse.
1453 typename Trait::TextStream &stream,
1454 //! Absolute path to the root folder for the document.
1455 //! This path will be used to resolve local links.
1456 const typename Trait::String &path,
1457 //! This argument needed only for anchor.
1458 const typename Trait::String &fileName,
1459 //! Make full optimization, or just semi one. In full optimization
1460 //! text items with one style but with some closing delimiters
1461 //! in the middle will be concatenated in one, like in **text* text*,
1462 //! here in full optimization will be "text text" with 2 open/close
1463 //! style delimiters, but one closing delimiter is in the middle.
1464 bool fullyOptimizeParagraphs = true);
1465
1466 //! Add text plugin.
1467 void
1469 //! ID of a plugin. Use TextPlugin::UserDefinedPluginID value for start ID.
1470 int id,
1471 //! Function of a plugin, that will be invoked to processs raw text.
1472 TextPluginFunc<Trait> plugin,
1473 //! Should this plugin be used in parsing of internals of links?
1474 bool processInLinks,
1475 //! User data that will be passed to plugin function.
1476 const typename Trait::StringList &userData)
1477 {
1478 m_textPlugins.insert({id, {plugin, processInLinks, userData}});
1479 }
1480
1481 //! Remove text plugin.
1482 void
1484 //! ID of plugin that should be removed.
1485 int id)
1486 {
1487 m_textPlugins.erase(id);
1488 }
1489
1490private:
1491 void
1492 parseFile(const typename Trait::String &fileName,
1493 bool recursive,
1494 std::shared_ptr<Document<Trait>> doc,
1495 const typename Trait::StringList &ext,
1496 typename Trait::StringList *parentLinks = nullptr);
1497
1498 void
1499 parseStream(typename Trait::TextStream &stream,
1500 const typename Trait::String &workingPath,
1501 const typename Trait::String &fileName,
1502 bool recursive,
1503 std::shared_ptr<Document<Trait>> doc,
1504 const typename Trait::StringList &ext,
1505 typename Trait::StringList *parentLinks = nullptr);
1506
1507 void
1508 clearCache();
1509
1510 enum class BlockType {
1511 Unknown,
1512 EmptyLine,
1513 Text,
1514 List,
1515 ListWithFirstEmptyLine,
1516 CodeIndentedBySpaces,
1517 Code,
1518 Blockquote,
1519 Heading,
1520 SomethingInList,
1521 FensedCodeInList,
1522 Footnote
1523 }; // enum BlockType
1524
1525 struct ListIndent {
1526 long long int m_level = -1;
1527 long long int m_indent = -1;
1528 }; // struct ListIndent
1529
1530 BlockType
1531 whatIsTheLine(typename Trait::InternalString &str,
1532 bool inList = false,
1533 bool inListWithFirstEmptyLine = false,
1534 bool fensedCodeInList = false,
1535 typename Trait::String *startOfCode = nullptr,
1536 ListIndent *indent = nullptr,
1537 bool emptyLinePreceded = false,
1538 bool calcIndent = false,
1539 const std::vector<long long int> *indents = nullptr);
1540
1541 long long int
1542 parseFragment(MdBlock<Trait> &fr,
1543 std::shared_ptr<Block<Trait>> parent,
1544 std::shared_ptr<Document<Trait>> doc,
1545 typename Trait::StringList &linksToParse,
1546 const typename Trait::String &workingPath,
1547 const typename Trait::String &fileName,
1548 bool collectRefLinks,
1549 RawHtmlBlock<Trait> &html);
1550
1551 void
1552 parseText(MdBlock<Trait> &fr,
1553 std::shared_ptr<Block<Trait>> parent,
1554 std::shared_ptr<Document<Trait>> doc,
1555 typename Trait::StringList &linksToParse,
1556 const typename Trait::String &workingPath,
1557 const typename Trait::String &fileName,
1558 bool collectRefLinks,
1559 RawHtmlBlock<Trait> &html);
1560
1561 void
1562 parseBlockquote(MdBlock<Trait> &fr,
1563 std::shared_ptr<Block<Trait>> parent,
1564 std::shared_ptr<Document<Trait>> doc,
1565 typename Trait::StringList &linksToParse,
1566 const typename Trait::String &workingPath,
1567 const typename Trait::String &fileName,
1568 bool collectRefLinks,
1569 RawHtmlBlock<Trait> &html);
1570
1571 long long int
1572 parseList(MdBlock<Trait> &fr,
1573 std::shared_ptr<Block<Trait>> parent,
1574 std::shared_ptr<Document<Trait>> doc,
1575 typename Trait::StringList &linksToParse,
1576 const typename Trait::String &workingPath,
1577 const typename Trait::String &fileName,
1578 bool collectRefLinks,
1579 RawHtmlBlock<Trait> &html);
1580
1581 void
1582 parseCode(MdBlock<Trait> &fr, std::shared_ptr<Block<Trait>> parent, bool collectRefLinks);
1583
1584 void
1585 parseCodeIndentedBySpaces(MdBlock<Trait> &fr,
1586 std::shared_ptr<Block<Trait>> parent,
1587 bool collectRefLinks,
1588 int indent,
1589 const typename Trait::String &syntax,
1590 long long int emptyColumn,
1591 long long int startLine,
1592 bool fensedCode,
1593 const WithPosition &startDelim = {},
1594 const WithPosition &endDelim = {},
1595 const WithPosition &syntaxPos = {});
1596
1597 long long int
1598 parseListItem(MdBlock<Trait> &fr,
1599 std::shared_ptr<Block<Trait>> parent,
1600 std::shared_ptr<Document<Trait>> doc,
1601 typename Trait::StringList &linksToParse,
1602 const typename Trait::String &workingPath,
1603 const typename Trait::String &fileName,
1604 bool collectRefLinks,
1605 RawHtmlBlock<Trait> &html,
1606 std::shared_ptr<ListItem<Trait>> *resItem = nullptr);
1607
1608 void
1609 parseHeading(MdBlock<Trait> &fr,
1610 std::shared_ptr<Block<Trait>> parent,
1611 std::shared_ptr<Document<Trait>> doc,
1612 typename Trait::StringList &linksToParse,
1613 const typename Trait::String &workingPath,
1614 const typename Trait::String &fileName,
1615 bool collectRefLinks);
1616
1617 void
1618 parseFootnote(MdBlock<Trait> &fr,
1619 std::shared_ptr<Block<Trait>> parent,
1620 std::shared_ptr<Document<Trait>> doc,
1621 typename Trait::StringList &linksToParse,
1622 const typename Trait::String &workingPath,
1623 const typename Trait::String &fileName,
1624 bool collectRefLinks);
1625
1626 void
1627 parseTable(MdBlock<Trait> &fr,
1628 std::shared_ptr<Block<Trait>> parent,
1629 std::shared_ptr<Document<Trait>> doc,
1630 typename Trait::StringList &linksToParse,
1631 const typename Trait::String &workingPath,
1632 const typename Trait::String &fileName,
1633 bool collectRefLinks,
1634 int columnsCount);
1635
1636 void
1637 parseParagraph(MdBlock<Trait> &fr,
1638 std::shared_ptr<Block<Trait>> parent,
1639 std::shared_ptr<Document<Trait>> doc,
1640 typename Trait::StringList &linksToParse,
1641 const typename Trait::String &workingPath,
1642 const typename Trait::String &fileName,
1643 bool collectRefLinks,
1644 RawHtmlBlock<Trait> &html);
1645
1646 void
1647 parseFormattedTextLinksImages(MdBlock<Trait> &fr,
1648 std::shared_ptr<Block<Trait>> parent,
1649 std::shared_ptr<Document<Trait>> doc,
1650 typename Trait::StringList &linksToParse,
1651 const typename Trait::String &workingPath,
1652 const typename Trait::String &fileName,
1653 bool collectRefLinks,
1654 bool ignoreLineBreak,
1655 RawHtmlBlock<Trait> &html,
1656 bool inLink);
1657
1658 RawHtmlBlock<Trait>
1659 parse(StringListStream<Trait> &stream,
1660 std::shared_ptr<Block<Trait>> parent,
1661 std::shared_ptr<Document<Trait>> doc,
1662 typename Trait::StringList &linksToParse,
1663 const typename Trait::String &workingPath,
1664 const typename Trait::String &fileName,
1665 bool collectRefLinks,
1666 bool top = false,
1667 bool dontProcessLastFreeHtml = false);
1668
1669 struct ParserContext {
1670 typename Trait::template Vector<MdBlock<Trait>> m_splitted;
1671 typename MdBlock<Trait>::Data m_fragment;
1672 bool m_emptyLineInList = false;
1673 bool m_fensedCodeInList = false;
1674 long long int m_emptyLinesCount = 0;
1675 long long int m_lineCounter = 0;
1676 std::vector<long long int> m_indents;
1677 ListIndent m_indent;
1678 RawHtmlBlock<Trait> m_html;
1679 long long int m_emptyLinesBefore = 0;
1680 MdLineData::CommentDataMap m_htmlCommentData;
1681 typename Trait::String m_startOfCode;
1682 typename Trait::String m_startOfCodeInList;
1683 BlockType m_type = BlockType::EmptyLine;
1684 BlockType m_lineType = BlockType::Unknown;
1685 BlockType m_prevLineType = BlockType::Unknown;
1686 }; // struct ParserContext
1687
1688 void
1689 parseFragment(ParserContext &ctx,
1690 std::shared_ptr<Block<Trait>> parent,
1691 std::shared_ptr<Document<Trait>> doc,
1692 typename Trait::StringList &linksToParse,
1693 const typename Trait::String &workingPath,
1694 const typename Trait::String &fileName,
1695 bool collectRefLinks);
1696
1697 void
1698 eatFootnote(ParserContext &ctx,
1699 StringListStream<Trait> &stream,
1700 std::shared_ptr<Block<Trait>> parent,
1701 std::shared_ptr<Document<Trait>> doc,
1702 typename Trait::StringList &linksToParse,
1703 const typename Trait::String &workingPath,
1704 const typename Trait::String &fileName,
1705 bool collectRefLinks);
1706
1707 void
1708 finishHtml(ParserContext &ctx,
1709 std::shared_ptr<Block<Trait>> parent,
1710 std::shared_ptr<Document<Trait>> doc,
1711 bool collectRefLinks,
1712 bool top,
1713 bool dontProcessLastFreeHtml);
1714
1715 void
1716 makeLineMain(ParserContext &ctx,
1717 const typename Trait::InternalString &line,
1718 long long int emptyLinesCount,
1719 const ListIndent &currentIndent,
1720 long long int ns,
1721 long long int currentLineNumber);
1722
1723 void
1724 parseFragmentAndMakeNextLineMain(ParserContext &ctx,
1725 std::shared_ptr<Block<Trait>> parent,
1726 std::shared_ptr<Document<Trait>> doc,
1727 typename Trait::StringList &linksToParse,
1728 const typename Trait::String &workingPath,
1729 const typename Trait::String &fileName,
1730 bool collectRefLinks,
1731 const typename Trait::InternalString &line,
1732 const ListIndent &currentIndent,
1733 long long int ns,
1734 long long int currentLineNumber);
1735
1736 bool
1737 isListType(BlockType t);
1738
1739 typename Trait::InternalString
1740 readLine(ParserContext &ctx, StringListStream<Trait> &stream);
1741
1742 std::shared_ptr<Image<Trait>>
1743 makeImage(const typename Trait::String &url,
1744 const typename MdBlock<Trait>::Data &text,
1745 TextParsingOpts<Trait> &po,
1746 bool doNotCreateTextOnFail,
1747 long long int startLine,
1748 long long int startPos,
1749 long long int lastLine,
1750 long long int lastPos,
1751 const WithPosition &textPos,
1752 const WithPosition &urlPos);
1753
1754 std::shared_ptr<Link<Trait>>
1755 makeLink(const typename Trait::String &url,
1756 const typename MdBlock<Trait>::Data &text,
1757 TextParsingOpts<Trait> &po,
1758 bool doNotCreateTextOnFail,
1759 long long int startLine,
1760 long long int startPos,
1761 long long int lastLine,
1762 long long int lastPos,
1763 const WithPosition &textPos,
1764 const WithPosition &urlPos);
1765
1766 struct Delimiter {
1767 enum DelimiterType {
1768 // (
1769 ParenthesesOpen,
1770 // )
1771 ParenthesesClose,
1772 // [
1773 SquareBracketsOpen,
1774 // ]
1775 SquareBracketsClose,
1776 // ![
1777 ImageOpen,
1778 // ~~
1779 Strikethrough,
1780 // *
1781 Emphasis1,
1782 // _
1783 Emphasis2,
1784 // `
1785 InlineCode,
1786 // <
1787 Less,
1788 // >
1789 Greater,
1790 // $
1791 Math,
1792 HorizontalLine,
1793 H1,
1794 H2,
1795 Unknown
1796 }; // enum DelimiterType
1797
1798 DelimiterType m_type = Unknown;
1799 long long int m_line = -1;
1800 long long int m_pos = -1;
1801 long long int m_len = 0;
1802 bool m_isWordBefore = false;
1803 bool m_backslashed = false;
1804 bool m_leftFlanking = false;
1805 bool m_rightFlanking = false;
1806 }; // struct Delimiter
1807
1808 using Delims = typename Trait::template Vector<Delimiter>;
1809
1810 bool
1811 createShortcutImage(const typename MdBlock<Trait>::Data &text,
1812 TextParsingOpts<Trait> &po,
1813 long long int startLine,
1814 long long int startPos,
1815 long long int lastLineForText,
1816 long long int lastPosForText,
1817 typename Delims::const_iterator lastIt,
1818 const typename MdBlock<Trait>::Data &linkText,
1819 bool doNotCreateTextOnFail,
1820 const WithPosition &textPos,
1821 const WithPosition &linkTextPos);
1822
1823 typename Delims::const_iterator
1824 checkForImage(typename Delims::const_iterator it,
1825 typename Delims::const_iterator last,
1826 TextParsingOpts<Trait> &po);
1827
1828 bool
1829 createShortcutLink(const typename MdBlock<Trait>::Data &text,
1830 TextParsingOpts<Trait> &po,
1831 long long int startLine,
1832 long long int startPos,
1833 long long int lastLineForText,
1834 long long int lastPosForText,
1835 typename Delims::const_iterator lastIt,
1836 const typename MdBlock<Trait>::Data &linkText,
1837 bool doNotCreateTextOnFail,
1838 const WithPosition &textPos,
1839 const WithPosition &linkTextPos);
1840
1841 typename Delims::const_iterator
1842 checkForLink(typename Delims::const_iterator it,
1843 typename Delims::const_iterator last,
1844 TextParsingOpts<Trait> &po);
1845
1846 Delims
1847 collectDelimiters(const typename MdBlock<Trait>::Data &fr);
1848
1849 std::pair<typename Trait::String, bool>
1850 readHtmlTag(typename Delims::const_iterator it, TextParsingOpts<Trait> &po);
1851
1852 typename Delims::const_iterator
1853 findIt(typename Delims::const_iterator it,
1854 typename Delims::const_iterator last,
1855 TextParsingOpts<Trait> &po);
1856
1857 void
1858 finishRule1HtmlTag(typename Delims::const_iterator it,
1859 typename Delims::const_iterator last,
1860 TextParsingOpts<Trait> &po,
1861 bool skipFirst);
1862
1863 void
1864 finishRule2HtmlTag(typename Delims::const_iterator it,
1865 typename Delims::const_iterator last,
1866 TextParsingOpts<Trait> &po);
1867
1868 void
1869 finishRule3HtmlTag(typename Delims::const_iterator it,
1870 typename Delims::const_iterator last,
1871 TextParsingOpts<Trait> &po);
1872
1873 void
1874 finishRule4HtmlTag(typename Delims::const_iterator it,
1875 typename Delims::const_iterator last,
1876 TextParsingOpts<Trait> &po);
1877
1878 void
1879 finishRule5HtmlTag(typename Delims::const_iterator it,
1880 typename Delims::const_iterator last,
1881 TextParsingOpts<Trait> &po);
1882
1883 void
1884 finishRule6HtmlTag(typename Delims::const_iterator it,
1885 typename Delims::const_iterator last,
1886 TextParsingOpts<Trait> &po);
1887
1888 typename Parser<Trait>::Delims::const_iterator
1889 finishRule7HtmlTag(typename Delims::const_iterator it,
1890 typename Delims::const_iterator last,
1891 TextParsingOpts<Trait> &po);
1892
1893 typename Delims::const_iterator
1894 finishRawHtmlTag(typename Delims::const_iterator it,
1895 typename Delims::const_iterator last,
1896 TextParsingOpts<Trait> &po,
1897 bool skipFirst);
1898
1899 int
1900 htmlTagRule(typename Delims::const_iterator it,
1901 typename Delims::const_iterator last,
1902 TextParsingOpts<Trait> &po);
1903
1904 typename Delims::const_iterator
1905 checkForRawHtml(typename Delims::const_iterator it,
1906 typename Delims::const_iterator last,
1907 TextParsingOpts<Trait> &po);
1908
1909 typename Delims::const_iterator
1910 checkForMath(typename Delims::const_iterator it,
1911 typename Delims::const_iterator last,
1912 TextParsingOpts<Trait> &po);
1913
1914 typename Delims::const_iterator
1915 checkForAutolinkHtml(typename Delims::const_iterator it,
1916 typename Delims::const_iterator last,
1917 TextParsingOpts<Trait> &po,
1918 bool updatePos);
1919
1920 typename Delims::const_iterator
1921 checkForInlineCode(typename Delims::const_iterator it,
1922 typename Delims::const_iterator last,
1923 TextParsingOpts<Trait> &po);
1924
1925 std::pair<typename MdBlock<Trait>::Data, typename Delims::const_iterator>
1926 readTextBetweenSquareBrackets(typename Delims::const_iterator start,
1927 typename Delims::const_iterator it,
1928 typename Delims::const_iterator last,
1929 TextParsingOpts<Trait> &po,
1930 bool doNotCreateTextOnFail,
1931 WithPosition *pos = nullptr);
1932
1933 std::pair<typename MdBlock<Trait>::Data, typename Delims::const_iterator>
1934 checkForLinkText(typename Delims::const_iterator it,
1935 typename Delims::const_iterator last,
1936 TextParsingOpts<Trait> &po,
1937 WithPosition *pos = nullptr);
1938
1939 std::pair<typename MdBlock<Trait>::Data, typename Delims::const_iterator>
1940 checkForLinkLabel(typename Delims::const_iterator it,
1941 typename Delims::const_iterator last,
1942 TextParsingOpts<Trait> &po,
1943 WithPosition *pos = nullptr);
1944
1945 std::tuple<typename Trait::String, typename Trait::String, typename Delims::const_iterator, bool>
1946 checkForInlineLink(typename Delims::const_iterator it,
1947 typename Delims::const_iterator last,
1948 TextParsingOpts<Trait> &po,
1949 WithPosition *urlPos = nullptr);
1950
1951 inline std::tuple<typename Trait::String, typename Trait::String, typename Delims::const_iterator, bool>
1952 checkForRefLink(typename Delims::const_iterator it,
1953 typename Delims::const_iterator last,
1954 TextParsingOpts<Trait> &po,
1955 WithPosition *urlPos = nullptr);
1956
1957 typename Trait::String
1958 toSingleLine(const typename MdBlock<Trait>::Data &d);
1959
1960 template<class Func>
1961 typename Delims::const_iterator
1962 checkShortcut(typename Delims::const_iterator it,
1963 typename Delims::const_iterator last,
1964 TextParsingOpts<Trait> &po,
1965 Func functor)
1966 {
1967 const auto start = it;
1968
1969 typename MdBlock<Trait>::Data text;
1970
1971 WithPosition labelPos;
1972 std::tie(text, it) = checkForLinkLabel(start, last, po, &labelPos);
1973
1974 if (it != start && !toSingleLine(text).simplified().isEmpty()) {
1975 if ((this->*functor)(text, po, start->m_line, start->m_pos, start->m_line,
1976 start->m_pos + start->m_len, it, {}, false, labelPos, {})) {
1977 return it;
1978 }
1979 }
1980
1981 return start;
1982 }
1983
1984 void
1985 createStyles(std::vector<std::pair<Style, long long int>> &s,
1986 long long int l,
1987 typename Delimiter::DelimiterType t,
1988 long long int &count);
1989
1990 bool
1991 isSequence(typename Delims::const_iterator it,
1992 long long int itLine,
1993 long long int itPos,
1994 typename Delimiter::DelimiterType t);
1995
1996 typename Delims::const_iterator
1997 readSequence(typename Delims::const_iterator it,
1998 typename Delims::const_iterator last,
1999 long long int &line,
2000 long long int &pos,
2001 long long int &len,
2002 typename Delims::const_iterator &current);
2003
2004 int
2005 emphasisToInt(typename Delimiter::DelimiterType t);
2006
2007 std::pair<bool, size_t>
2008 checkEmphasisSequence(const std::vector<std::pair<std::pair<long long int, bool>, int>> &s,
2009 size_t idx);
2010
2011 std::vector<std::pair<std::pair<long long int, bool>, int>>
2012 fixSequence(const std::vector<std::pair<std::pair<long long int, bool>, int>> &s);
2013
2014 std::vector<std::vector<std::pair<std::pair<long long int, bool>, int>>>
2015 closedSequences(const std::vector<std::vector<std::pair<std::pair<long long int, bool>, int>>> &vars,
2016 size_t idx);
2017
2018 std::vector<std::pair<Style, long long int>>
2019 createStyles(const std::vector<std::pair<std::pair<long long int, bool>, int>> &s,
2020 size_t i,
2021 typename Delimiter::DelimiterType t,
2022 long long int &count);
2023
2024 std::tuple<bool, std::vector<std::pair<Style, long long int>>, long long int, long long int>
2025 isStyleClosed(typename Delims::const_iterator it,
2026 typename Delims::const_iterator last,
2027 TextParsingOpts<Trait> &po);
2028
2029 typename Delims::const_iterator
2030 incrementIterator(typename Delims::const_iterator it,
2031 typename Delims::const_iterator last,
2032 long long int count);
2033
2034 typename Delims::const_iterator
2035 checkForStyle(typename Delims::const_iterator first,
2036 typename Delims::const_iterator it,
2037 typename Delims::const_iterator last,
2038 TextParsingOpts<Trait> &po);
2039
2040 bool
2041 isListOrQuoteAfterHtml(TextParsingOpts<Trait> &po);
2042
2043 void
2044 parseTableInParagraph(TextParsingOpts<Trait> &po,
2045 std::shared_ptr<Paragraph<Trait>> parent,
2046 std::shared_ptr<Document<Trait>> doc,
2047 typename Trait::StringList &linksToParse,
2048 const typename Trait::String &workingPath,
2049 const typename Trait::String &fileName,
2050 bool collectRefLinks);
2051
2052 bool
2053 isNewBlockIn(MdBlock<Trait> &fr,
2054 long long int startLine,
2055 long long int endLine);
2056
2057 void
2058 makeInlineCode(long long int startLine,
2059 long long int startPos,
2060 long long int lastLine,
2061 long long int lastPos,
2062 TextParsingOpts<Trait> &po,
2063 typename Delims::const_iterator startDelimIt,
2064 typename Delims::const_iterator endDelimIt);
2065
2067 defaultParagraphOptimization() const
2068 {
2069 return (m_fullyOptimizeParagraphs ? OptimizeParagraphType::Full :
2071 }
2072
2073private:
2074 //! Used in tests.
2075 friend struct PrivateAccess;
2076
2077private:
2078 typename Trait::StringList m_parsedFiles;
2079 TextPluginsMap<Trait> m_textPlugins;
2080 bool m_fullyOptimizeParagraphs = true;
2081
2083}; // class Parser
2084
2085//
2086// Parser
2087//
2088
2089template<class Trait>
2090inline std::shared_ptr<Document<Trait>>
2091Parser<Trait>::parse(const typename Trait::String &fileName,
2092 bool recursive,
2093 const typename Trait::StringList &ext,
2094 bool fullyOptimizeParagraphs)
2095{
2096 m_fullyOptimizeParagraphs = fullyOptimizeParagraphs;
2097
2098 std::shared_ptr<Document<Trait>> doc(new Document<Trait>);
2099
2100 parseFile(fileName, recursive, doc, ext);
2101
2102 clearCache();
2103
2104 return doc;
2105}
2106
2107template<class Trait>
2108inline std::shared_ptr<Document<Trait>>
2109Parser<Trait>::parse(typename Trait::TextStream &stream,
2110 const typename Trait::String &path,
2111 const typename Trait::String &fileName,
2112 bool fullyOptimizeParagraphs)
2113{
2114 m_fullyOptimizeParagraphs = fullyOptimizeParagraphs;
2115
2116 std::shared_ptr<Document<Trait>> doc(new Document<Trait>);
2117
2118 parseStream(stream, path, fileName, false, doc, typename Trait::StringList());
2119
2120 clearCache();
2121
2122 return doc;
2123}
2124
2125template<class Trait>
2127
2128#ifdef MD4QT_QT_SUPPORT
2129
2130//! Wrapper for QTextStream.
2131template<>
2132class TextStream<QStringTrait>
2133{
2134public:
2135 TextStream(QTextStream &stream)
2136 : m_stream(stream)
2137 , m_lastBuf(false)
2138 , m_pos(0)
2139 {
2140 }
2141
2142 bool
2143 atEnd() const
2144 {
2145 return (m_lastBuf && m_pos == m_buf.size());
2146 }
2147
2148 QString
2149 readLine()
2150 {
2151 QString line;
2152 bool rFound = false;
2153
2154 while (!atEnd()) {
2155 const auto c = getChar();
2156
2157 if (rFound && c != QLatin1Char('\n')) {
2158 --m_pos;
2159
2160 return line;
2161 }
2162
2163 if (c == QLatin1Char('\r')) {
2164 rFound = true;
2165
2166 continue;
2167 } else if (c == QLatin1Char('\n')) {
2168 return line;
2169 }
2170
2171 if (!c.isNull()) {
2172 line.push_back(c);
2173 }
2174 }
2175
2176 return line;
2177 }
2178
2179private:
2180 void
2181 fillBuf()
2182 {
2183 m_buf = m_stream.read(512);
2184
2185 if (m_stream.atEnd()) {
2186 m_lastBuf = true;
2187 }
2188
2189 m_pos = 0;
2190 }
2191
2192 QChar
2193 getChar()
2194 {
2195 if (m_pos < m_buf.size()) {
2196 return m_buf.at(m_pos++);
2197 } else if (!atEnd()) {
2198 fillBuf();
2199
2200 return getChar();
2201 } else {
2202 return QChar();
2203 }
2204 }
2205
2206private:
2207 QTextStream &m_stream;
2208 QString m_buf;
2209 bool m_lastBuf;
2210 long long int m_pos;
2211}; // class TextStream
2212
2213#endif
2214
2215#ifdef MD4QT_ICU_STL_SUPPORT
2216
2217//! Wrapper for std::istream.
2218template<>
2219class TextStream<UnicodeStringTrait>
2220{
2221public:
2222 TextStream(std::istream &stream)
2223 : m_pos(0)
2224 {
2225 std::vector<unsigned char> content;
2226
2227 stream.seekg(0, std::ios::end);
2228 const auto ssize = stream.tellg();
2229 content.resize((size_t)ssize + 1);
2230 stream.seekg(0, std::ios::beg);
2231 stream.read((char *)&content[0], ssize);
2232 content[(size_t)ssize] = 0;
2233
2234 const auto z = std::count(content.cbegin(), content.cend(), 0);
2235
2236 if (z > 1) {
2237 std::vector<unsigned char> tmp;
2238 tmp.resize(content.size() + (z - 1) * 2);
2239
2240 for (size_t i = 0, j = 0; i < content.size() - 1; ++i, ++j) {
2241 if (content[i] == 0) {
2242 // 0xFFFD - replacement character in UTF-8.
2243 tmp[j++] = 0xEF;
2244 tmp[j++] = 0xBF;
2245 tmp[j] = 0xBD;
2246 } else {
2247 tmp[j] = content[i];
2248 }
2249 }
2250
2251 tmp[tmp.size() - 1] = 0;
2252
2253 std::swap(content, tmp);
2254 }
2255
2256 m_str = UnicodeString::fromUTF8((char *)&content[0]);
2257 }
2258
2259 bool
2260 atEnd() const
2261 {
2262 return m_pos == m_str.size();
2263 }
2264
2265 UnicodeString
2266 readLine()
2267 {
2268 UnicodeString line;
2269
2270 bool rFound = false;
2271
2272 while (!atEnd()) {
2273 const auto c = getChar();
2274
2275 if (rFound && c != UnicodeChar('\n')) {
2276 --m_pos;
2277
2278 return line;
2279 }
2280
2281 if (c == UnicodeChar('\r')) {
2282 rFound = true;
2283
2284 continue;
2285 } else if (c == UnicodeChar('\n')) {
2286 return line;
2287 }
2288
2289 if (!c.isNull()) {
2290 line.push_back(c);
2291 }
2292 }
2293
2294 return line;
2295 }
2296
2297private:
2298 UnicodeChar
2299 getChar()
2300 {
2301 if (!atEnd()) {
2302 return m_str[m_pos++];
2303 } else {
2304 return UnicodeChar();
2305 }
2306 }
2307
2308private:
2309 UnicodeString m_str;
2310 long long int m_pos;
2311};
2312
2313#endif
2314
2315//! \return Is HTML comment closed?
2316template<class Trait>
2317inline bool
2318checkForEndHtmlComments(const typename Trait::String &line,
2319 long long int pos)
2320{
2321 const long long int e = line.indexOf(Trait::latin1ToString("-->"), pos);
2322
2323 if (e != -1) {
2324 return isHtmlComment<Trait>(line.sliced(0, e + 3));
2325 }
2326
2327 return false;
2328}
2329
2330//! Collect information about HTML comments.
2331template<class Trait>
2332inline void
2333checkForHtmlComments(const typename Trait::InternalString &line,
2336{
2337 long long int p = 0, l = stream.currentLineNumber();
2338
2339 const auto &str = line.asString();
2340
2341 while ((p = str.indexOf(Trait::latin1ToString(s_startComment), p)) != -1) {
2342 bool addNegative = false;
2343
2344 auto c = str.sliced(p);
2345
2346 if (c.startsWith(Trait::latin1ToString("<!-->"))) {
2347 res.insert({line.virginPos(p), {0, true}});
2348
2349 p += 5;
2350
2351 continue;
2352 } else if (c.startsWith(Trait::latin1ToString("<!--->"))) {
2353 res.insert({line.virginPos(p), {1, true}});
2354
2355 p += 6;
2356
2357 continue;
2358 }
2359
2361 res.insert({line.virginPos(p), {2, true}});
2362 } else {
2363 addNegative = true;
2364
2365 for (; l < stream.size(); ++l) {
2366 c.push_back(Trait::latin1ToChar(' '));
2367 c.push_back(stream.lineAt(l).asString());
2368
2370 res.insert({line.virginPos(p), {2, true}});
2371
2372 addNegative = false;
2373
2374 break;
2375 }
2376 }
2377 }
2378
2379 if (addNegative) {
2380 res.insert({line.virginPos(p), {-1, false}});
2381 }
2382
2383 ++p;
2384 }
2385}
2386
2387template<class Trait>
2388inline void
2389Parser<Trait>::parseFragment(typename Parser<Trait>::ParserContext &ctx,
2390 std::shared_ptr<Block<Trait>> parent,
2391 std::shared_ptr<Document<Trait>> doc,
2392 typename Trait::StringList &linksToParse,
2393 const typename Trait::String &workingPath,
2394 const typename Trait::String &fileName,
2395 bool collectRefLinks)
2396{
2397 if (!ctx.m_fragment.empty()) {
2398 MdBlock<Trait> block = {ctx.m_fragment, ctx.m_emptyLinesBefore, ctx.m_emptyLinesCount > 0};
2399
2400 ctx.m_emptyLinesBefore = ctx.m_emptyLinesCount;
2401
2402 ctx.m_splitted.push_back(block);
2403
2404 long long int line = 0;
2405
2406 while (line >= 0) {
2407 line = parseFragment(block, parent, doc, linksToParse, workingPath,
2408 fileName, collectRefLinks, ctx.m_html);
2409
2410 assert(line != 0);
2411
2412 if (line > 0) {
2413 if (ctx.m_html.m_html) {
2414 if (!collectRefLinks) {
2415 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2416 }
2417
2418 resetHtmlTag<Trait>(ctx.m_html);
2419 }
2420
2421 const auto it = std::find_if(ctx.m_fragment.cbegin(), ctx.m_fragment.cend(), [line](const auto &d) {
2422 return (d.second.m_lineNumber == line);
2423 });
2424
2425 block.m_data.clear();
2426 std::copy(it, ctx.m_fragment.cend(), std::back_inserter(block.m_data));
2427 block.m_emptyLinesBefore = 0;
2428 }
2429 }
2430
2431 ctx.m_fragment.clear();
2432 }
2433
2434 ctx.m_type = BlockType::EmptyLine;
2435 ctx.m_emptyLineInList = false;
2436 ctx.m_fensedCodeInList = false;
2437 ctx.m_emptyLinesCount = 0;
2438 ctx.m_lineCounter = 0;
2439 ctx.m_indents.clear();
2440 ctx.m_indent = {-1, -1};
2441 ctx.m_startOfCode.clear();
2442 ctx.m_startOfCodeInList.clear();
2443}
2444
2445//! Replace tabs with spaces (just for internal simpler use).
2446template<class Trait>
2447inline void
2448replaceTabs(typename Trait::InternalString &s)
2449{
2450 unsigned char size = 4;
2451 long long int len = s.length();
2452
2453 for (long long int i = 0; i < len; ++i, --size) {
2454 if (s[i] == Trait::latin1ToChar('\t')) {
2455 s.replaceOne(i, 1, typename Trait::String(size, Trait::latin1ToChar(' ')));
2456
2457 len += size - 1;
2458 i += size - 1;
2459 size = 5;
2460 }
2461
2462 if (size == 1) {
2463 size = 5;
2464 }
2465 }
2466}
2467
2468template<class Trait>
2469inline void
2470Parser<Trait>::eatFootnote(typename Parser<Trait>::ParserContext &ctx,
2471 StringListStream<Trait> &stream,
2472 std::shared_ptr<Block<Trait>> parent,
2473 std::shared_ptr<Document<Trait>> doc,
2474 typename Trait::StringList &linksToParse,
2475 const typename Trait::String &workingPath,
2476 const typename Trait::String &fileName,
2477 bool collectRefLinks)
2478{
2479 long long int emptyLinesCount = 0;
2480 bool wasEmptyLine = false;
2481
2482 while (!stream.atEnd()) {
2483 const auto currentLineNumber = stream.currentLineNumber();
2484
2485 auto line = readLine(ctx, stream);
2486
2487 replaceTabs<Trait>(line);
2488
2489 const auto ns = skipSpaces<Trait>(0, line.asString());
2490
2491 if (ns == line.length() || line.asString().startsWith(Trait::latin1ToString(" "))) {
2492 if (ns == line.length()) {
2493 ++emptyLinesCount;
2494 wasEmptyLine = true;
2495 } else {
2496 emptyLinesCount = 0;
2497 }
2498
2499 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2500 } else if (!wasEmptyLine) {
2501 if (isFootnote<Trait>(line.sliced(ns).asString())) {
2502 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2503
2504 ctx.m_lineType = BlockType::Footnote;
2505
2506 makeLineMain(ctx, line, emptyLinesCount, ctx.m_indent, ns, currentLineNumber);
2507
2508 continue;
2509 } else {
2510 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2511 }
2512 } else {
2513 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2514
2515 ctx.m_lineType =
2516 whatIsTheLine(line, false, false, false, &ctx.m_startOfCodeInList, &ctx.m_indent,
2517 ctx.m_lineType == BlockType::EmptyLine, true, &ctx.m_indents);
2518
2519 makeLineMain(ctx, line, emptyLinesCount, ctx.m_indent, ns, currentLineNumber);
2520
2521 if (ctx.m_type == BlockType::Footnote) {
2522 wasEmptyLine = false;
2523
2524 continue;
2525 } else {
2526 break;
2527 }
2528 }
2529 }
2530
2531 if (stream.atEnd() && !ctx.m_fragment.empty()) {
2532 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2533 }
2534}
2535
2536template<class Trait>
2537inline void
2538Parser<Trait>::finishHtml(ParserContext &ctx,
2539 std::shared_ptr<Block<Trait>> parent,
2540 std::shared_ptr<Document<Trait>> doc,
2541 bool collectRefLinks,
2542 bool top,
2543 bool dontProcessLastFreeHtml)
2544{
2545 if (!collectRefLinks || top) {
2546 if (ctx.m_html.m_html->isFreeTag()) {
2547 if (!dontProcessLastFreeHtml) {
2548 if (ctx.m_html.m_parent) {
2549 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2550
2551 updateLastPosInList(ctx.m_html);
2552 } else {
2553 parent->appendItem(ctx.m_html.m_html);
2554 }
2555 }
2556 } else {
2557 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
2558 p->appendItem(ctx.m_html.m_html);
2559 p->setStartColumn(ctx.m_html.m_html->startColumn());
2560 p->setStartLine(ctx.m_html.m_html->startLine());
2561 p->setEndColumn(ctx.m_html.m_html->endColumn());
2562 p->setEndLine(ctx.m_html.m_html->endLine());
2563 doc->appendItem(p);
2564 }
2565 }
2566
2567 if (!dontProcessLastFreeHtml) {
2568 resetHtmlTag(ctx.m_html);
2569 }
2570
2571 ctx.m_html.m_toAdjustLastPos.clear();
2572}
2573
2574template<class Trait>
2575inline void
2576Parser<Trait>::makeLineMain(ParserContext &ctx,
2577 const typename Trait::InternalString &line,
2578 long long int emptyLinesCount,
2579 const ListIndent &currentIndent,
2580 long long int ns,
2581 long long int currentLineNumber)
2582{
2583 if (ctx.m_html.m_htmlBlockType >= 6) {
2584 ctx.m_html.m_continueHtml = (emptyLinesCount <= 0);
2585 }
2586
2587 ctx.m_type = ctx.m_lineType;
2588
2589 switch (ctx.m_type) {
2590 case BlockType::List:
2591 case BlockType::ListWithFirstEmptyLine: {
2592 if (ctx.m_indents.empty())
2593 ctx.m_indents.push_back(currentIndent.m_indent);
2594
2595 ctx.m_indent = currentIndent;
2596 } break;
2597
2598 case BlockType::Code:
2599 ctx.m_startOfCode = startSequence<Trait>(line.asString());
2600 break;
2601
2602 default:
2603 break;
2604 }
2605
2606 if (!line.isEmpty() && ns < line.length()) {
2607 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2608 }
2609
2610 ctx.m_lineCounter = 1;
2611 ctx.m_emptyLinesCount = 0;
2612}
2613
2614template<class Trait>
2615inline void
2616Parser<Trait>::parseFragmentAndMakeNextLineMain(ParserContext &ctx,
2617 std::shared_ptr<Block<Trait>> parent,
2618 std::shared_ptr<Document<Trait>> doc,
2619 typename Trait::StringList &linksToParse,
2620 const typename Trait::String &workingPath,
2621 const typename Trait::String &fileName,
2622 bool collectRefLinks,
2623 const typename Trait::InternalString &line,
2624 const ListIndent &currentIndent,
2625 long long int ns,
2626 long long int currentLineNumber)
2627{
2628 const auto empty = ctx.m_emptyLinesCount;
2629
2630 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2631
2632 makeLineMain(ctx, line, empty, currentIndent, ns, currentLineNumber);
2633}
2634
2635template<class Trait>
2636inline bool
2637Parser<Trait>::isListType(BlockType t)
2638{
2639 switch (t) {
2640 case BlockType::List:
2641 case BlockType::ListWithFirstEmptyLine:
2642 return true;
2643
2644 default:
2645 return false;
2646 }
2647}
2648
2649template<class Trait>
2650typename Trait::InternalString
2651Parser<Trait>::readLine(typename Parser<Trait>::ParserContext &ctx,
2652 StringListStream<Trait> &stream)
2653{
2654 ctx.m_htmlCommentData.clear();
2655
2656 auto line = stream.readLine();
2657
2658 static const char16_t c_zeroReplaceWith[2] = {0xFFFD, 0};
2659
2660 line.replace(typename Trait::Char(0), Trait::utf16ToString(&c_zeroReplaceWith[0]));
2661
2662 checkForHtmlComments(line, stream, ctx.m_htmlCommentData);
2663
2664 return line;
2665}
2666
2667template<class Trait>
2668inline RawHtmlBlock<Trait>
2669Parser<Trait>::parse(StringListStream<Trait> &stream,
2670 std::shared_ptr<Block<Trait>> parent,
2671 std::shared_ptr<Document<Trait>> doc,
2672 typename Trait::StringList &linksToParse,
2673 const typename Trait::String &workingPath,
2674 const typename Trait::String &fileName,
2675 bool collectRefLinks,
2676 bool top,
2677 bool dontProcessLastFreeHtml)
2678{
2679 ParserContext ctx;
2680
2681 while (!stream.atEnd()) {
2682 const auto currentLineNumber = stream.currentLineNumber();
2683
2684 auto line = readLine(ctx, stream);
2685
2686 if (ctx.m_lineType != BlockType::Unknown) {
2687 ctx.m_prevLineType = ctx.m_lineType;
2688 }
2689
2690 ctx.m_lineType = whatIsTheLine(line,
2691 (ctx.m_emptyLineInList || isListType(ctx.m_type)),
2692 ctx.m_prevLineType == BlockType::ListWithFirstEmptyLine,
2693 ctx.m_fensedCodeInList,
2694 &ctx.m_startOfCodeInList,
2695 &ctx.m_indent,
2696 ctx.m_lineType == BlockType::EmptyLine,
2697 true,
2698 &ctx.m_indents);
2699
2700 if (isListType(ctx.m_type) && ctx.m_lineType == BlockType::FensedCodeInList) {
2701 ctx.m_fensedCodeInList = !ctx.m_fensedCodeInList;
2702 }
2703
2704 const auto currentIndent = ctx.m_indent;
2705
2706 const auto ns = skipSpaces<Trait>(0, line.asString());
2707
2708 const auto indentInListValue = indentInList(&ctx.m_indents, ns, true);
2709
2710 if (isListType(ctx.m_lineType) && !ctx.m_fensedCodeInList && ctx.m_indent.m_level > -1) {
2711 if (ctx.m_indent.m_level < (long long int)ctx.m_indents.size()) {
2712 ctx.m_indents.erase(ctx.m_indents.cbegin() + ctx.m_indent.m_level, ctx.m_indents.cend());
2713 }
2714
2715 ctx.m_indents.push_back(ctx.m_indent.m_indent);
2716 }
2717
2718 if (ctx.m_type == BlockType::CodeIndentedBySpaces && ns > 3) {
2719 ctx.m_lineType = BlockType::CodeIndentedBySpaces;
2720 }
2721
2722 if (ctx.m_type == BlockType::ListWithFirstEmptyLine && ctx.m_lineCounter == 2 &&
2723 !isListType(ctx.m_lineType)) {
2724 if (ctx.m_emptyLinesCount > 0) {
2725 parseFragmentAndMakeNextLineMain(ctx,
2726 parent,
2727 doc,
2728 linksToParse,
2729 workingPath,
2730 fileName,
2731 collectRefLinks,
2732 line,
2733 currentIndent,
2734 ns,
2735 currentLineNumber);
2736
2737 continue;
2738 } else {
2739 ctx.m_emptyLineInList = false;
2740 ctx.m_emptyLinesCount = 0;
2741 }
2742 }
2743
2744 if (ctx.m_type == BlockType::ListWithFirstEmptyLine && ctx.m_lineCounter == 2) {
2745 ctx.m_type = BlockType::List;
2746 }
2747
2748 // Footnote.
2749 if (ctx.m_lineType == BlockType::Footnote) {
2750 parseFragmentAndMakeNextLineMain(ctx,
2751 parent,
2752 doc,
2753 linksToParse,
2754 workingPath,
2755 fileName,
2756 collectRefLinks,
2757 line,
2758 currentIndent,
2759 ns,
2760 currentLineNumber);
2761
2762 eatFootnote(ctx, stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2763
2764 continue;
2765 }
2766
2767 // First line of the fragment.
2768 if (ns != line.length() && ctx.m_type == BlockType::EmptyLine) {
2769 makeLineMain(ctx, line, ctx.m_emptyLinesCount, currentIndent, ns, currentLineNumber);
2770
2771 continue;
2772 } else if (ns == line.length() && ctx.m_type == BlockType::EmptyLine) {
2773 continue;
2774 }
2775
2776 ++ctx.m_lineCounter;
2777
2778 // Got new empty line.
2779 if (ns == line.length()) {
2780 ++ctx.m_emptyLinesCount;
2781
2782 switch (ctx.m_type) {
2783 case BlockType::Blockquote: {
2784 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2785
2786 continue;
2787 }
2788
2789 case BlockType::Text:
2790 case BlockType::CodeIndentedBySpaces:
2791 continue;
2792 break;
2793
2794 case BlockType::Code: {
2795 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2796 ctx.m_emptyLinesCount = 0;
2797
2798 continue;
2799 }
2800
2801 case BlockType::List:
2802 case BlockType::ListWithFirstEmptyLine: {
2803 ctx.m_emptyLineInList = true;
2804
2805 continue;
2806 }
2807
2808 default:
2809 break;
2810 }
2811 }
2812 // Empty new line in list.
2813 else if (ctx.m_emptyLineInList) {
2814 if (indentInListValue || isListType(ctx.m_lineType) || ctx.m_lineType == BlockType::SomethingInList) {
2815 for (long long int i = 0; i < ctx.m_emptyLinesCount; ++i) {
2816 ctx.m_fragment.push_back({typename Trait::String(),
2817 {currentLineNumber - ctx.m_emptyLinesCount + i, {}}});
2818 }
2819
2820 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2821
2822 ctx.m_emptyLineInList = false;
2823 ctx.m_emptyLinesCount = 0;
2824
2825 continue;
2826 } else {
2827 const auto empty = ctx.m_emptyLinesCount;
2828
2829 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2830
2831 ctx.m_lineType = whatIsTheLine(line, false, false, false, nullptr, nullptr,
2832 true, false, &ctx.m_indents);
2833
2834 makeLineMain(ctx, line, empty, currentIndent, ns, currentLineNumber);
2835
2836 continue;
2837 }
2838 } else if (ctx.m_emptyLinesCount > 0) {
2839 if (ctx.m_type == BlockType::CodeIndentedBySpaces &&
2840 ctx.m_lineType == BlockType::CodeIndentedBySpaces) {
2841 const auto indent = skipSpaces<Trait>(0, ctx.m_fragment.front().first.asString());
2842
2843 for (long long int i = 0; i < ctx.m_emptyLinesCount; ++i) {
2844 ctx.m_fragment.push_back({typename Trait::String(indent, Trait::latin1ToChar(' ')),
2845 {currentLineNumber - ctx.m_emptyLinesCount + i, {}}});
2846 }
2847
2848 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2849 ctx.m_emptyLinesCount = 0;
2850 } else {
2851 parseFragmentAndMakeNextLineMain(ctx,
2852 parent,
2853 doc,
2854 linksToParse,
2855 workingPath,
2856 fileName,
2857 collectRefLinks,
2858 line,
2859 currentIndent,
2860 ns,
2861 currentLineNumber);
2862 }
2863
2864 continue;
2865 }
2866
2867 // Something new and first block is not a code block or a list, blockquote.
2868 if (ctx.m_type != ctx.m_lineType && ctx.m_type != BlockType::Code &&
2869 !isListType(ctx.m_type) && ctx.m_type != BlockType::Blockquote) {
2870 if (ctx.m_type == BlockType::Text && ctx.m_lineType == BlockType::CodeIndentedBySpaces) {
2871 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2872 }
2873 else {
2874 if (ctx.m_type == BlockType::Text && isListType(ctx.m_lineType)) {
2875 if (ctx.m_lineType != BlockType::ListWithFirstEmptyLine) {
2876 int num = 0;
2877
2878 if (isOrderedList<Trait>(line.asString(), &num)) {
2879 if (num != 1) {
2880 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2881
2882 continue;
2883 }
2884 }
2885 } else {
2886 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2887
2888 continue;
2889 }
2890 }
2891
2892 parseFragmentAndMakeNextLineMain(ctx,
2893 parent,
2894 doc,
2895 linksToParse,
2896 workingPath,
2897 fileName,
2898 collectRefLinks,
2899 line,
2900 currentIndent,
2901 ns,
2902 currentLineNumber);
2903 }
2904 }
2905 // End of code block.
2906 else if (ctx.m_type == BlockType::Code && ctx.m_type == ctx.m_lineType &&
2907 !ctx.m_startOfCode.isEmpty() &&
2908 startSequence<Trait>(line.asString()).contains(ctx.m_startOfCode) &&
2909 isCodeFences<Trait>(line.asString(), true)) {
2910 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2911
2912 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2913 }
2914 // Not a continue of list.
2915 else if (ctx.m_type != ctx.m_lineType && isListType(ctx.m_type) &&
2916 ctx.m_lineType != BlockType::SomethingInList &&
2917 ctx.m_lineType != BlockType::FensedCodeInList && !isListType(ctx.m_lineType)) {
2918 parseFragmentAndMakeNextLineMain(ctx,
2919 parent,
2920 doc,
2921 linksToParse,
2922 workingPath,
2923 fileName,
2924 collectRefLinks,
2925 line,
2926 currentIndent,
2927 ns,
2928 currentLineNumber);
2929 } else if (ctx.m_type == BlockType::Heading) {
2930 parseFragmentAndMakeNextLineMain(ctx,
2931 parent,
2932 doc,
2933 linksToParse,
2934 workingPath,
2935 fileName,
2936 collectRefLinks,
2937 line,
2938 currentIndent,
2939 ns,
2940 currentLineNumber);
2941 } else {
2942 ctx.m_fragment.push_back({line, {currentLineNumber, ctx.m_htmlCommentData}});
2943 }
2944
2945 ctx.m_emptyLinesCount = 0;
2946 }
2947
2948 if (!ctx.m_fragment.empty()) {
2949 if (ctx.m_type == BlockType::Code) {
2950 ctx.m_fragment.push_back({ctx.m_startOfCode, {-1, {}}});
2951 }
2952
2953 parseFragment(ctx, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
2954 }
2955
2956 if (top) {
2957 resetHtmlTag(ctx.m_html);
2958
2959 for (long long int i = 0; i < (long long int)ctx.m_splitted.size(); ++i) {
2960 long long int line = 0;
2961
2962 auto &data = ctx.m_splitted[i];
2963
2964 while (line >= 0) {
2965 line = parseFragment(data, parent, doc, linksToParse, workingPath, fileName, false, ctx.m_html);
2966
2967 assert(line != 0);
2968
2969 if (line > 0) {
2970 if (ctx.m_html.m_html) {
2971 ctx.m_html.m_parent->appendItem(ctx.m_html.m_html);
2972
2973 resetHtmlTag<Trait>(ctx.m_html);
2974 }
2975
2976 const auto it = std::find_if(data.m_data.cbegin(), data.m_data.cend(), [line](const auto &d) {
2977 return (d.second.m_lineNumber == line);
2978 });
2979
2980 data.m_data.erase(data.m_data.cbegin(), it);
2981 }
2982 }
2983
2984 if (ctx.m_html.m_htmlBlockType >= 6) {
2985 ctx.m_html.m_continueHtml = (!ctx.m_splitted[i].m_emptyLineAfter);
2986 }
2987
2988 if (ctx.m_html.m_html && !ctx.m_html.m_continueHtml) {
2989 finishHtml(ctx, parent, doc, collectRefLinks, top, dontProcessLastFreeHtml);
2990 } else if (!ctx.m_html.m_html) {
2991 ctx.m_html.m_toAdjustLastPos.clear();
2992 }
2993 }
2994 }
2995
2996 if (ctx.m_html.m_html) {
2997 finishHtml(ctx, parent, doc, collectRefLinks, top, dontProcessLastFreeHtml);
2998 }
2999
3000 return ctx.m_html;
3001}
3002
3003#ifdef MD4QT_QT_SUPPORT
3004
3005template<>
3006inline void
3007Parser<QStringTrait>::parseFile(const QString &fileName,
3008 bool recursive,
3009 std::shared_ptr<Document<QStringTrait>> doc,
3010 const QStringList &ext,
3011 QStringList *parentLinks)
3012{
3013 QFileInfo fi(fileName);
3014
3015 if (fi.exists() && ext.contains(fi.suffix().toLower())) {
3016 QFile f(fileName);
3017
3018 if (f.open(QIODevice::ReadOnly)) {
3019 QTextStream s(f.readAll());
3020 f.close();
3021
3022 parseStream(s, fi.absolutePath(), fi.fileName(), recursive, doc, ext, parentLinks);
3023 }
3024 }
3025}
3026
3027#endif
3028
3029#ifdef MD4QT_ICU_STL_SUPPORT
3030
3031template<>
3032inline void
3033Parser<UnicodeStringTrait>::parseFile(const UnicodeString &fileName,
3034 bool recursive,
3035 std::shared_ptr<Document<UnicodeStringTrait>> doc,
3036 const std::vector<UnicodeString> &ext,
3037 std::vector<UnicodeString> *parentLinks)
3038{
3039 if (UnicodeStringTrait::fileExists(fileName)) {
3040 std::string fn;
3041 fileName.toUTF8String(fn);
3042
3043 try {
3044 auto e = UnicodeString::fromUTF8(std::filesystem::u8path(fn).extension().u8string());
3045
3046 if (!e.isEmpty()) {
3047 e.remove(0, 1);
3048 }
3049
3050 if (std::find(ext.cbegin(), ext.cend(), e.toLower()) != ext.cend()) {
3051 auto path = std::filesystem::canonical(std::filesystem::u8path(fn));
3052 std::ifstream file(path.c_str(), std::ios::in | std::ios::binary);
3053
3054 if (file.good()) {
3055 const auto fileNameS = path.filename().u8string();
3056 auto workingDirectory = path.remove_filename().u8string();
3057
3058 if (!workingDirectory.empty()) {
3059 workingDirectory.erase(workingDirectory.size() - 1, 1);
3060 }
3061
3062 std::replace(workingDirectory.begin(), workingDirectory.end(), '\\', '/');
3063
3064 parseStream(file, UnicodeString::fromUTF8(workingDirectory),
3065 UnicodeString::fromUTF8(fileNameS), recursive, doc, ext, parentLinks);
3066
3067 file.close();
3068 }
3069 }
3070 } catch (const std::exception &) {
3071 }
3072 }
3073}
3074
3075#endif
3076
3077//! Resolve links in the document.
3078template<class Trait>
3079void
3080resolveLinks(typename Trait::StringList &linksToParse,
3081 std::shared_ptr<Document<Trait>> doc)
3082{
3083 for (auto it = linksToParse.begin(), last = linksToParse.end(); it != last; ++it) {
3084 auto nextFileName = *it;
3085
3086 if (nextFileName.startsWith(Trait::latin1ToString("#"))) {
3087 const auto lit = doc->labeledLinks().find(nextFileName);
3088
3089 if (lit != doc->labeledLinks().cend()) {
3090 nextFileName = lit->second->url();
3091 } else {
3092 continue;
3093 }
3094 }
3095
3096 if (Trait::fileExists(nextFileName)) {
3097 *it = Trait::absoluteFilePath(nextFileName);
3098 }
3099 }
3100}
3101
3102template<class Trait>
3103inline void
3104Parser<Trait>::parseStream(typename Trait::TextStream &s,
3105 const typename Trait::String &workingPath,
3106 const typename Trait::String &fileName,
3107 bool recursive,
3108 std::shared_ptr<Document<Trait>> doc,
3109 const typename Trait::StringList &ext,
3110 typename Trait::StringList *parentLinks)
3111{
3112 typename Trait::StringList linksToParse;
3113
3114 const auto path = workingPath.isEmpty() ? typename Trait::String(fileName) :
3115 typename Trait::String(workingPath + Trait::latin1ToString("/") + fileName);
3116
3117 doc->appendItem(std::shared_ptr<Anchor<Trait>>(new Anchor<Trait>(path)));
3118
3119 typename MdBlock<Trait>::Data data;
3120
3121 {
3122 TextStream<Trait> stream(s);
3123
3124 long long int i = 0;
3125
3126 while (!stream.atEnd()) {
3127 data.push_back(std::pair<typename Trait::InternalString, MdLineData>(stream.readLine(), {i}));
3128 ++i;
3129 }
3130 }
3131
3132 StringListStream<Trait> stream(data);
3133
3134 parse(stream, doc, doc, linksToParse, workingPath, fileName, true, true);
3135
3136 m_parsedFiles.push_back(path);
3137
3138 resolveLinks<Trait>(linksToParse, doc);
3139
3140 // Parse all links if parsing is recursive.
3141 if (recursive && !linksToParse.empty()) {
3142 const auto tmpLinks = linksToParse;
3143
3144 while (!linksToParse.empty()) {
3145 auto nextFileName = linksToParse.front();
3146 linksToParse.erase(linksToParse.cbegin());
3147
3148 if (parentLinks) {
3149 const auto pit = std::find(parentLinks->cbegin(), parentLinks->cend(), nextFileName);
3150
3151 if (pit != parentLinks->cend()) {
3152 continue;
3153 }
3154 }
3155
3156 if (nextFileName.startsWith(Trait::latin1ToString("#"))) {
3157 continue;
3158 }
3159
3160 const auto pit = std::find(m_parsedFiles.cbegin(), m_parsedFiles.cend(), nextFileName);
3161
3162 if (pit == m_parsedFiles.cend()) {
3163 if (!doc->isEmpty() && doc->items().back()->type() != ItemType::PageBreak) {
3164 doc->appendItem(std::shared_ptr<PageBreak<Trait>>(new PageBreak<Trait>));
3165 }
3166
3167 parseFile(nextFileName, recursive, doc, ext, &linksToParse);
3168 }
3169 }
3170
3171 if (parentLinks) {
3172 std::copy(tmpLinks.cbegin(), tmpLinks.cend(), std::back_inserter(*parentLinks));
3173 }
3174 }
3175}
3176
3177//! \return Position of first character in list item.
3178template<class Trait>
3179inline long long int
3180posOfListItem(const typename Trait::String &s,
3181 bool ordered)
3182{
3183 long long int p = 0;
3184
3185 for (; p < s.size(); ++p) {
3186 if (!s[p].isSpace()) {
3187 break;
3188 }
3189 }
3190
3191 if (ordered) {
3192 for (; p < s.size(); ++p) {
3193 if (!s[p].isDigit()) {
3194 break;
3195 }
3196 }
3197 }
3198
3199 ++p;
3200
3201 long long int sc = 0;
3202
3203 for (; p < s.size(); ++p) {
3204 if (!s[p].isSpace()) {
3205 break;
3206 } else {
3207 ++sc;
3208 }
3209 }
3210
3211 if (p == s.length() || sc > 4) {
3212 p = p - sc + 1;
3213 } else if (sc == 0) {
3214 ++p;
3215 }
3216
3217 return p;
3218}
3219
3220//! \return Level in indents for the given position.
3221inline long long int
3222listLevel(const std::vector<long long int> &indents,
3223 long long int pos)
3224{
3225 long long int level = indents.size();
3226
3227 for (auto it = indents.crbegin(), last = indents.crend(); it != last; ++it) {
3228 if (pos >= *it) {
3229 break;
3230 } else {
3231 --level;
3232 }
3233 }
3234
3235 return level;
3236}
3237
3238template<class Trait>
3239inline typename Parser<Trait>::BlockType
3240Parser<Trait>::whatIsTheLine(typename Trait::InternalString &str,
3241 bool inList,
3242 bool inListWithFirstEmptyLine,
3243 bool fensedCodeInList,
3244 typename Trait::String *startOfCode,
3245 ListIndent *indent,
3246 bool emptyLinePreceded,
3247 bool calcIndent,
3248 const std::vector<long long int> *indents)
3249{
3250 replaceTabs<Trait>(str);
3251
3252 const auto first = skipSpaces<Trait>(0, str.asString());
3253
3254 if (first < str.length()) {
3255 auto s = str.sliced(first);
3256
3257 const bool isBlockquote = s.asString().startsWith(Trait::latin1ToString(">"));
3258 const bool indentIn = indentInList(indents, first, false);
3259 bool isHeading = false;
3260
3261 if (first < 4 && isFootnote<Trait>(s.asString())) {
3262 return BlockType::Footnote;
3263 }
3264
3265 if (s.asString().startsWith(Trait::latin1ToString("#")) &&
3266 (indent ? first - indent->m_indent < 4 : first < 4)) {
3267 long long int c = 0;
3268
3269 while (c < s.length() && s[c] == Trait::latin1ToChar('#')) {
3270 ++c;
3271 }
3272
3273 if (c <= 6 && ((c < s.length() && s[c].isSpace()) || c == s.length())) {
3274 isHeading = true;
3275 }
3276 }
3277
3278 if (inList) {
3279 bool isFirstLineEmpty = false;
3280 const auto orderedList = isOrderedList<Trait>(str.asString(), nullptr, nullptr, nullptr,
3281 &isFirstLineEmpty);
3282 const bool fensedCode = isCodeFences<Trait>(s.asString());
3283 const auto codeIndentedBySpaces = emptyLinePreceded && first >= 4 &&
3284 !indentInList(indents, first, true);
3285
3286 if (fensedCodeInList) {
3287 if (indentInList(indents, first, true)) {
3288 if (fensedCode) {
3289 if (startOfCode && startSequence<Trait>(s.asString()).contains(*startOfCode)) {
3290 return BlockType::FensedCodeInList;
3291 }
3292 }
3293
3294 return BlockType::SomethingInList;
3295 }
3296 }
3297
3298 if (fensedCode && indentIn) {
3299 if (startOfCode) {
3300 *startOfCode = startSequence<Trait>(s.asString());
3301 }
3302
3303 return BlockType::FensedCodeInList;
3304 } else if ((((s.asString().startsWith(Trait::latin1ToString("-")) ||
3305 s.asString().startsWith(Trait::latin1ToString("+")) ||
3306 s.asString().startsWith(Trait::latin1ToString("*"))) &&
3307 ((s.length() > 1 && s[1] == Trait::latin1ToChar(' ')) || s.length() == 1)) ||
3308 orderedList) && (first < 4 || indentIn)) {
3309 if (codeIndentedBySpaces) {
3310 return BlockType::CodeIndentedBySpaces;
3311 }
3312
3313 if (indent && calcIndent) {
3314 indent->m_indent = posOfListItem<Trait>(str.asString(), orderedList);
3315 indent->m_level = (indents ? listLevel(*indents, first) : -1);
3316 }
3317
3318 if (s.simplified().length() == 1 || isFirstLineEmpty) {
3319 return BlockType::ListWithFirstEmptyLine;
3320 } else {
3321 return BlockType::List;
3322 }
3323 } else if (indentInList(indents, first, true)) {
3324 return BlockType::SomethingInList;
3325 }
3326 else {
3327 if (!isHeading && !isBlockquote &&
3328 !(fensedCode && first < 4) && !emptyLinePreceded && !inListWithFirstEmptyLine) {
3329 return BlockType::SomethingInList;
3330 }
3331 }
3332 } else {
3333 bool isFirstLineEmpty = false;
3334
3335 const auto orderedList = isOrderedList<Trait>(str.asString(), nullptr, nullptr, nullptr,
3336 &isFirstLineEmpty);
3337 const bool isHLine = first < 4 && isHorizontalLine<Trait>(s.asString());
3338
3339 if (!isHLine &&
3340 (((s.asString().startsWith(Trait::latin1ToString("-")) || s.asString().startsWith(Trait::latin1ToString("+")) ||
3341 s.asString().startsWith(Trait::latin1ToString("*"))) &&
3342 ((s.length() > 1 && s[1] == Trait::latin1ToChar(' ')) || s.length() == 1)) ||
3343 orderedList) && first < 4) {
3344 if (indent && calcIndent) {
3345 indent->m_indent = posOfListItem<Trait>(str.asString(), orderedList);
3346 indent->m_level = (indents ? listLevel(*indents, first) : -1);
3347 }
3348
3349 if (s.simplified().length() == 1 || isFirstLineEmpty) {
3350 return BlockType::ListWithFirstEmptyLine;
3351 } else {
3352 return BlockType::List;
3353 }
3354 }
3355 }
3356
3357 if (str.asString().startsWith(typename Trait::String(4, Trait::latin1ToChar(' ')))) {
3358 return BlockType::CodeIndentedBySpaces;
3359 } else if (isCodeFences<Trait>(str.asString())) {
3360 return BlockType::Code;
3361 } else if (isBlockquote) {
3362 return BlockType::Blockquote;
3363 } else if (isHeading) {
3364 return BlockType::Heading;
3365 }
3366 } else {
3367 return BlockType::EmptyLine;
3368 }
3369
3370 return BlockType::Text;
3371}
3372
3373template<class Trait>
3374inline long long int
3375Parser<Trait>::parseFragment(MdBlock<Trait> &fr,
3376 std::shared_ptr<Block<Trait>> parent,
3377 std::shared_ptr<Document<Trait>> doc,
3378 typename Trait::StringList &linksToParse,
3379 const typename Trait::String &workingPath,
3380 const typename Trait::String &fileName,
3381 bool collectRefLinks,
3382 RawHtmlBlock<Trait> &html)
3383{
3384 if (html.m_continueHtml) {
3385 parseText(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3386 } else {
3387 if (html.m_html) {
3388 if (!collectRefLinks) {
3389 parent->appendItem(html.m_html);
3390 }
3391
3392 resetHtmlTag(html);
3393 }
3394
3395 switch (whatIsTheLine(fr.m_data.front().first)) {
3396 case BlockType::Footnote:
3397 parseFootnote(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3398 break;
3399
3400 case BlockType::Text:
3401 parseText(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3402 break;
3403
3404 case BlockType::Blockquote:
3405 parseBlockquote(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3406 break;
3407
3408 case BlockType::Code:
3409 parseCode(fr, parent, collectRefLinks);
3410 break;
3411
3412 case BlockType::CodeIndentedBySpaces: {
3413 int indent = 1;
3414
3415 if (fr.m_data.front().first.asString().startsWith(Trait::latin1ToString(" "))) {
3416 indent = 4;
3417 }
3418
3419 parseCodeIndentedBySpaces(fr, parent, collectRefLinks, indent, {}, -1, -1, false);
3420 } break;
3421
3422 case BlockType::Heading:
3423 parseHeading(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3424 break;
3425
3426 case BlockType::List:
3427 case BlockType::ListWithFirstEmptyLine:
3428 return parseList(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3429
3430 default:
3431 break;
3432 }
3433 }
3434
3435 return -1;
3436}
3437
3438template<class Trait>
3439inline void
3440Parser<Trait>::clearCache()
3441{
3442 m_parsedFiles.clear();
3443}
3444
3445//! \return Number of columns in table, if the given string is a table header.
3446template<class Trait>
3447inline int
3448isTableHeader(const typename Trait::String &s)
3449{
3450 if (s.contains(Trait::latin1ToChar('|'))) {
3451 int c = 0;
3452
3453 const auto tmp = s.simplified();
3454 const auto p = tmp.startsWith(Trait::latin1ToString("|")) ? 1 : 0;
3455 const auto n = tmp.size() - p - (tmp.endsWith(Trait::latin1ToString("|")) && tmp.size() > 1 ? 1 : 0);
3456 const auto v = tmp.sliced(p, n);
3457
3458 bool backslash = false;
3459
3460 for (long long int i = 0; i < v.size(); ++i) {
3461 bool now = false;
3462
3463 if (v[i] == Trait::latin1ToChar('\\') && !backslash) {
3464 backslash = true;
3465 now = true;
3466 } else if (v[i] == Trait::latin1ToChar('|') && !backslash) {
3467 ++c;
3468 }
3469
3470 if (!now) {
3471 backslash = false;
3472 }
3473 }
3474
3475 ++c;
3476
3477 return c;
3478 } else {
3479 return 0;
3480 }
3481}
3482
3483template<class Trait>
3484inline void
3485Parser<Trait>::parseText(MdBlock<Trait> &fr,
3486 std::shared_ptr<Block<Trait>> parent,
3487 std::shared_ptr<Document<Trait>> doc,
3488 typename Trait::StringList &linksToParse,
3489 const typename Trait::String &workingPath,
3490 const typename Trait::String &fileName,
3491 bool collectRefLinks,
3492 RawHtmlBlock<Trait> &html)
3493{
3494 const auto h = isTableHeader<Trait>(fr.m_data.front().first.asString());
3495 const auto c = fr.m_data.size() > 1 ? isTableAlignment<Trait>(fr.m_data[1].first.asString()) : 0;
3496
3497 if (c && h && c == h && !html.m_continueHtml) {
3498 parseTable(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, c);
3499
3500 if (!fr.m_data.empty()) {
3501 StringListStream<Trait> stream(fr.m_data);
3502
3503 Parser<Trait>::parse(stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
3504 }
3505 } else {
3506 parseParagraph(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks, html);
3507 }
3508}
3509
3510//! Find and remove heading label.
3511template<class Trait>
3512inline std::pair<typename Trait::String, WithPosition>
3513findAndRemoveHeaderLabel(typename Trait::InternalString &s)
3514{
3515 const auto start = s.asString().indexOf(Trait::latin1ToString("{#"));
3516
3517 if (start >= 0) {
3518 long long int p = start + 2;
3519
3520 for (; p < s.length(); ++p) {
3521 if (s[p] == Trait::latin1ToChar('}')) {
3522 break;
3523 }
3524 }
3525
3526 if (p < s.length() && s[p] == Trait::latin1ToChar('}')) {
3527 WithPosition pos;
3528 pos.setStartColumn(s.virginPos(start));
3529 pos.setEndColumn(s.virginPos(p));
3530
3531 const auto label = s.sliced(start, p - start + 1).asString();
3532 s.remove(start, p - start + 1);
3533 return {label, pos};
3534 }
3535 }
3536
3537 return {};
3538}
3539
3540//! Convert string to label.
3541template<class Trait>
3542inline typename Trait::String
3543stringToLabel(const typename Trait::String &s)
3544{
3545 typename Trait::String res;
3546
3547 for (long long int i = 0; i < s.length(); ++i) {
3548 const auto c = s[i];
3549
3550 if (c.isLetter() || c.isDigit() || c == Trait::latin1ToChar('-') ||
3551 c == Trait::latin1ToChar('_')) {
3552 res.push_back(c.toLower());
3553 } else if (c.isSpace()) {
3554 res.push_back(Trait::latin1ToString("-"));
3555 }
3556 }
3557
3558 return res;
3559}
3560
3561//! Convert Paragraph to label.
3562template<class Trait>
3563inline typename Trait::String
3565{
3566 typename Trait::String l;
3567
3568 if (!p) {
3569 return l;
3570 }
3571
3572 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it) {
3573 switch ((*it)->type()) {
3574 case ItemType::Text: {
3575 auto t = static_cast<Text<Trait> *>(it->get());
3576 const auto text = t->text();
3577 l.push_back(stringToLabel<Trait>(text));
3578 } break;
3579
3580 case ItemType::Image: {
3581 auto i = static_cast<Image<Trait> *>(it->get());
3582
3583 if (!i->p()->isEmpty()) {
3584 l.push_back(paragraphToLabel(i->p().get()));
3585 } else if (!i->text().isEmpty()) {
3586 l.push_back(stringToLabel<Trait>(i->text()));
3587 }
3588 } break;
3589
3590 case ItemType::Link: {
3591 auto link = static_cast<Link<Trait> *>(it->get());
3592
3593 if (!link->p()->isEmpty()) {
3594 l.push_back(paragraphToLabel(link->p().get()));
3595 } else if (!link->text().isEmpty()) {
3596 l.push_back(stringToLabel<Trait>(link->text()));
3597 }
3598 } break;
3599
3600 case ItemType::Code: {
3601 auto c = static_cast<Code<Trait> *>(it->get());
3602
3603 if (!c->text().isEmpty()) {
3604 l.push_back(stringToLabel<Trait>(c->text()));
3605 }
3606 } break;
3607
3608 default:
3609 break;
3610 }
3611 }
3612
3613 return l;
3614}
3615
3616//! Find and remove closing sequence of "#" in heading.
3617template<class Trait>
3618inline WithPosition
3619findAndRemoveClosingSequence(typename Trait::InternalString &s)
3620{
3621 long long int end = -1;
3622 long long int start = -1;
3623
3624 for (long long int i = s.length() - 1; i >= 0; --i) {
3625 if (!s[i].isSpace() && s[i] != Trait::latin1ToChar('#') && end == -1) {
3626 return {};
3627 }
3628
3629 if (s[i] == Trait::latin1ToChar('#')) {
3630 if (end == -1) {
3631 end = i;
3632 }
3633
3634 if (i - 1 >= 0) {
3635 if (s[i - 1].isSpace()) {
3636 start = i;
3637 break;
3638 } else if (s[i - 1] != Trait::latin1ToChar('#')) {
3639 return {};
3640 }
3641 } else {
3642 start = 0;
3643 }
3644 }
3645 }
3646
3647 WithPosition ret;
3648
3649 if (start != -1 && end != -1) {
3650 ret.setStartColumn(s.virginPos(start));
3651 ret.setEndColumn(s.virginPos(end));
3652
3653 s.remove(start, end - start + 1);
3654 }
3655
3656 return ret;
3657}
3658
3659template<class Trait>
3660inline void
3661Parser<Trait>::parseHeading(MdBlock<Trait> &fr,
3662 std::shared_ptr<Block<Trait>> parent,
3663 std::shared_ptr<Document<Trait>> doc,
3664 typename Trait::StringList &linksToParse,
3665 const typename Trait::String &workingPath,
3666 const typename Trait::String &fileName,
3667 bool collectRefLinks)
3668{
3669 if (!fr.m_data.empty() && !collectRefLinks) {
3670 auto line = fr.m_data.front().first;
3671
3672 std::shared_ptr<Heading<Trait>> h(new Heading<Trait>);
3673 h->setStartColumn(line.virginPos(skipSpaces<Trait>(0, line.asString())));
3674 h->setStartLine(fr.m_data.front().second.m_lineNumber);
3675 h->setEndColumn(line.virginPos(line.length() - 1));
3676 h->setEndLine(h->startLine());
3677
3678 long long int pos = 0;
3679 pos = skipSpaces<Trait>(pos, line.asString());
3680
3681 if (pos > 0) {
3682 line = line.sliced(pos);
3683 }
3684
3685 pos = 0;
3686 int lvl = 0;
3687
3688 while (pos < line.length() && line[pos] == Trait::latin1ToChar('#')) {
3689 ++lvl;
3690 ++pos;
3691 }
3692
3693 WithPosition startDelim = {h->startColumn(), h->startLine(),
3694 line.virginPos(pos - 1), h->startLine()};
3695
3696 pos = skipSpaces<Trait>(pos, line.asString());
3697
3698 if (pos > 0) {
3699 fr.m_data.front().first = line.sliced(pos);
3700 }
3701
3702 auto label = findAndRemoveHeaderLabel<Trait>(fr.m_data.front().first);
3703
3704 typename Heading<Trait>::Delims delims = {startDelim};
3705
3706 auto endDelim = findAndRemoveClosingSequence<Trait>(fr.m_data.front().first);
3707
3708 if (endDelim.startColumn() != -1) {
3709 endDelim.setStartLine(fr.m_data.front().second.m_lineNumber);
3710 endDelim.setEndLine(endDelim.startLine());
3711
3712 delims.push_back(endDelim);
3713 }
3714
3715 h->setDelims(delims);
3716
3717 h->setLevel(lvl);
3718
3719 if (!label.first.isEmpty()) {
3720 h->setLabel(label.first.sliced(1, label.first.length() - 2) + Trait::latin1ToString("/") +
3721 (!workingPath.isEmpty() ? workingPath + Trait::latin1ToString("/") :
3722 Trait::latin1ToString("")) + fileName);
3723
3724 label.second.setStartLine(fr.m_data.front().second.m_lineNumber);
3725 label.second.setEndLine(label.second.startLine());
3726
3727 h->setLabelPos(label.second);
3728 }
3729
3730 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
3731
3732 typename MdBlock<Trait>::Data tmp;
3734 tmp.push_back(fr.m_data.front());
3735 MdBlock<Trait> block = {tmp, 0};
3736
3737 RawHtmlBlock<Trait> html;
3738
3739 parseFormattedTextLinksImages(block, p, doc, linksToParse, workingPath, fileName,
3740 false, false, html, false);
3741
3742 fr.m_data.erase(fr.m_data.cbegin());
3743
3744 if (p->items().size() && p->items().at(0)->type() == ItemType::Paragraph) {
3745 h->setText(std::static_pointer_cast<Paragraph<Trait>>(p->items().at(0)));
3746 } else {
3747 h->setText(p);
3748 }
3749
3750 if (h->isLabeled()) {
3751 doc->insertLabeledHeading(h->label(), h);
3752 } else {
3753 typename Trait::String label = Trait::latin1ToString("#") +
3754 paragraphToLabel(h->text().get());
3755
3756 label += Trait::latin1ToString("/") +
3757 (!workingPath.isEmpty() ? workingPath + Trait::latin1ToString("/") :
3758 Trait::latin1ToString("")) + fileName;
3759
3760 h->setLabel(label);
3761
3762 doc->insertLabeledHeading(label, h);
3763 }
3764
3765 parent->appendItem(h);
3766 }
3767}
3768
3769//! Prepare data in table cell for parsing.
3770template<class Trait>
3771inline typename Trait::InternalString
3772prepareTableData(typename Trait::InternalString s)
3773{
3774 s.replace(Trait::latin1ToString("\\|"), Trait::latin1ToString("|"));
3775
3776 return s;
3777}
3778
3779//! Split table's row on cells.
3780template<class Trait>
3781inline std::pair<typename Trait::InternalStringList, std::vector<long long int>>
3782splitTableRow(const typename Trait::InternalString &s)
3783{
3784 typename Trait::InternalStringList res;
3785 std::vector<long long int> columns;
3786
3787 bool backslash = false;
3788 long long int start = 0;
3789
3790 for (long long int i = 0; i < s.length(); ++i) {
3791 bool now = false;
3792
3793 if (s[i] == Trait::latin1ToChar('\\') && !backslash) {
3794 backslash = true;
3795 now = true;
3796 } else if (s[i] == Trait::latin1ToChar('|') && !backslash) {
3797 res.push_back(prepareTableData<Trait>(s.sliced(start, i - start)));
3798 columns.push_back(s.virginPos(i));
3799 start = i + 1;
3800 }
3801
3802 if (!now) {
3803 backslash = false;
3804 }
3805 }
3806
3807 res.push_back(prepareTableData<Trait>(s.sliced(start, s.length() - start)));
3808
3809 return {res, columns};
3810}
3811
3812template<class Trait>
3813inline void
3814Parser<Trait>::parseTable(MdBlock<Trait> &fr,
3815 std::shared_ptr<Block<Trait>> parent,
3816 std::shared_ptr<Document<Trait>> doc,
3817 typename Trait::StringList &linksToParse,
3818 const typename Trait::String &workingPath,
3819 const typename Trait::String &fileName,
3820 bool collectRefLinks,
3821 int columnsCount)
3822{
3823 static const char sep = '|';
3824
3825 if (fr.m_data.size() >= 2) {
3826 std::shared_ptr<Table<Trait>> table(new Table<Trait>);
3827 table->setStartColumn(fr.m_data.front().first.virginPos(0));
3828 table->setStartLine(fr.m_data.front().second.m_lineNumber);
3829 table->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
3830 table->setEndLine(fr.m_data.back().second.m_lineNumber);
3831
3832 auto parseTableRow = [&](const typename MdBlock<Trait>::Line &lineData) -> bool {
3833 const auto &row = lineData.first;
3834
3835 if (row.asString().startsWith(Trait::latin1ToString(" "))) {
3836 return false;
3837 }
3838
3839 auto line = row;
3840 auto p = skipSpaces<Trait>(0, line.asString());
3841
3842 if (p == line.length()) {
3843 return false;
3844 }
3845
3846 if (line[p] == Trait::latin1ToChar(sep)) {
3847 line.remove(0, p + 1);
3848 }
3849
3850 for (p = line.length() - 1; p >= 0; --p) {
3851 if (!line[p].isSpace()) {
3852 break;
3853 }
3854 }
3855
3856 if (p < 0) {
3857 return false;
3858 }
3859
3860 if (line[p] == Trait::latin1ToChar(sep)) {
3861 line.remove(p, line.length() - p);
3862 }
3863
3864 auto columns = splitTableRow<Trait>(line);
3865 columns.second.insert(columns.second.begin(), row.virginPos(0));
3866 columns.second.push_back(row.virginPos(row.length() - 1));
3867
3868 std::shared_ptr<TableRow<Trait>> tr(new TableRow<Trait>);
3869 tr->setStartColumn(row.virginPos(0));
3870 tr->setStartLine(lineData.second.m_lineNumber);
3871 tr->setEndColumn(row.virginPos(row.length() - 1));
3872 tr->setEndLine(lineData.second.m_lineNumber);
3873
3874 int col = 0;
3875
3876 for (auto it = columns.first.begin(), last = columns.first.end(); it != last; ++it, ++col) {
3877 if (col == columnsCount) {
3878 break;
3879 }
3880
3881 std::shared_ptr<TableCell<Trait>> c(new TableCell<Trait>);
3882 c->setStartColumn(columns.second.at(col));
3883 c->setStartLine(lineData.second.m_lineNumber);
3884 c->setEndColumn(columns.second.at(col + 1));
3885 c->setEndLine(lineData.second.m_lineNumber);
3886
3887 if (!it->isEmpty()) {
3888 it->replace(Trait::latin1ToString("&#124;"), Trait::latin1ToChar(sep));
3889
3890 typename MdBlock<Trait>::Data fragment;
3891 fragment.push_back({*it, lineData.second});
3892 MdBlock<Trait> block = {fragment, 0};
3893
3894 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
3895
3896 RawHtmlBlock<Trait> html;
3897
3898 parseFormattedTextLinksImages(block, p, doc, linksToParse, workingPath, fileName,
3899 collectRefLinks, false, html, false);
3900
3901 if (!p->isEmpty()) {
3902 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it ) {
3903 switch ((*it)->type()) {
3904 case ItemType::Paragraph: {
3905 const auto pp = std::static_pointer_cast<Paragraph<Trait>>(*it);
3906
3907 for (auto it = pp->items().cbegin(), last = pp->items().cend(); it != last; ++it) {
3908 c->appendItem((*it));
3909 }
3910 }
3911 break;
3912
3913 default:
3914 c->appendItem((*it));
3915 break;
3916 }
3917 }
3918 }
3919
3920 if (html.m_html.get()) {
3921 c->appendItem(html.m_html);
3922 }
3923 }
3924
3925 tr->appendCell(c);
3926 }
3927
3928 if (!tr->isEmpty())
3929 table->appendRow(tr);
3930
3931 return true;
3932 };
3933
3934 {
3935 auto fmt = fr.m_data.at(1).first;
3936
3937 auto columns = fmt.split(typename Trait::InternalString(Trait::latin1ToChar(sep)));
3938
3939 for (auto it = columns.begin(), last = columns.end(); it != last; ++it) {
3940 *it = it->simplified();
3941
3942 if (!it->isEmpty()) {
3944
3945 if (it->asString().endsWith(Trait::latin1ToString(":")) &&
3946 it->asString().startsWith(Trait::latin1ToString(":"))) {
3948 } else if (it->asString().endsWith(Trait::latin1ToString(":"))) {
3950 }
3951
3952 table->setColumnAlignment(table->columnsCount(), a);
3953 }
3954 }
3955 }
3956
3957 fr.m_data.erase(fr.m_data.cbegin() + 1);
3958
3959 long long int r = 0;
3960
3961 for (const auto &line : std::as_const(fr.m_data)) {
3962 if (!parseTableRow(line)) {
3963 break;
3964 }
3965
3966 ++r;
3967 }
3968
3969 fr.m_data.erase(fr.m_data.cbegin(), fr.m_data.cbegin() + r);
3970
3971 if (!table->isEmpty() && !collectRefLinks) {
3972 parent->appendItem(table);
3973 }
3974 }
3975}
3976
3977//! \return Is the given string a heading's service sequence?
3978template<class Trait>
3979inline bool
3980isH(const typename Trait::String &s,
3981 const typename Trait::Char &c)
3982{
3983 long long int p = skipSpaces<Trait>(0, s);
3984
3985 if (p > 3) {
3986 return false;
3987 }
3988
3989 const auto start = p;
3990
3991 for (; p < s.size(); ++p) {
3992 if (s[p] != c) {
3993 break;
3994 }
3995 }
3996
3997 if (p - start < 1) {
3998 return false;
3999 }
4000
4001 for (; p < s.size(); ++p) {
4002 if (!s[p].isSpace()) {
4003 return false;
4004 }
4005 }
4006
4007 return true;
4008}
4009
4010//! \return Is the given string a heading's service sequence of level 1?
4011template<class Trait>
4012inline bool
4013isH1(const typename Trait::String &s)
4014{
4015 return isH<Trait>(s, Trait::latin1ToChar('='));
4016}
4017
4018//! \return Is the given string a heading's service sequence of level 2?
4019template<class Trait>
4020inline bool
4021isH2(const typename Trait::String &s)
4022{
4023 return isH<Trait>(s, Trait::latin1ToChar('-'));
4024}
4025
4026//! \return Previous position in the block.
4027template<class Trait>
4028inline std::pair<long long int, long long int>
4030 long long int pos,
4031 long long int line)
4032{
4033 if (pos > 0) {
4034 return {pos - 1, line};
4035 }
4036
4037 for (long long int i = 0; i < static_cast<long long int>(fr.m_data.size()); ++i) {
4038 if (fr.m_data.at(i).second.m_lineNumber == line) {
4039 if (i > 0) {
4040 return {fr.m_data.at(i - 1).first.virginPos(fr.m_data.at(i - 1).first.length() - 1),
4041 line - 1};
4042 }
4043 }
4044 }
4045
4046 return {pos, line};
4047}
4048
4049//! \return Next position in the block.
4050template<class Trait>
4051inline std::pair<long long int, long long int>
4053 long long int pos,
4054 long long int line)
4055{
4056 for (long long int i = 0; i < static_cast<long long int>(fr.m_data.size()); ++i) {
4057 if (fr.m_data.at(i).second.m_lineNumber == line) {
4058 if (fr.m_data.at(i).first.virginPos(fr.m_data.at(i).first.length() - 1) >= pos + 1) {
4059 return {pos + 1, line};
4060 } else if (i + 1 < static_cast<long long int>(fr.m_data.size())) {
4061 return {fr.m_data.at(i + 1).first.virginPos(0), fr.m_data.at(i + 1).second.m_lineNumber};
4062 } else {
4063 return {pos, line};
4064 }
4065 }
4066 }
4067
4068 return {pos, line};
4069}
4070
4071template<class Trait>
4072inline void
4073Parser<Trait>::parseParagraph(MdBlock<Trait> &fr,
4074 std::shared_ptr<Block<Trait>> parent,
4075 std::shared_ptr<Document<Trait>> doc,
4076 typename Trait::StringList &linksToParse,
4077 const typename Trait::String &workingPath,
4078 const typename Trait::String &fileName,
4079 bool collectRefLinks,
4080 RawHtmlBlock<Trait> &html)
4081{
4082 parseFormattedTextLinksImages(fr, parent, doc, linksToParse, workingPath, fileName,
4083 collectRefLinks, false, html, false);
4084}
4085
4086template<class Trait>
4088 static bool
4089 isFreeTag(std::shared_ptr<RawHtml<Trait>> html)
4090 {
4091 return html->isFreeTag();
4092 }
4093
4094 static void
4095 setFreeTag(std::shared_ptr<RawHtml<Trait>> html, bool on)
4096 {
4097 html->setFreeTag(on);
4098 }
4099};
4100
4101template<class Trait>
4102inline typename Parser<Trait>::Delims
4103Parser<Trait>::collectDelimiters(const typename MdBlock<Trait>::Data &fr)
4104{
4105 Delims d;
4106
4107 for (long long int line = 0; line < (long long int)fr.size(); ++line) {
4108 const typename Trait::String &str = fr.at(line).first.asString();
4109 const auto p = skipSpaces<Trait>(0, str);
4110 const auto withoutSpaces = str.sliced(p);
4111
4112 if (isHorizontalLine<Trait>(withoutSpaces) && p < 4) {
4113 d.push_back({Delimiter::HorizontalLine, line, 0, str.length(), false, false, false});
4114 } else if (isH1<Trait>(withoutSpaces) && p < 4) {
4115 d.push_back({Delimiter::H1, line, 0, str.length(), false, false, false});
4116 } else if (isH2<Trait>(withoutSpaces) && p < 4) {
4117 d.push_back({Delimiter::H2, line, 0, str.length(), false, false, false});
4118 } else {
4119 bool backslash = false;
4120 bool word = false;
4121
4122 for (long long int i = p; i < str.size(); ++i) {
4123 bool now = false;
4124
4125 if (str[i] == Trait::latin1ToChar('\\') && !backslash) {
4126 backslash = true;
4127 now = true;
4128 }
4129 // * or _
4130 else if ((str[i] == Trait::latin1ToChar('_') || str[i] == Trait::latin1ToChar('*')) && !backslash) {
4131 typename Trait::String style;
4132
4133 const bool punctBefore = (i > 0 ? str[i - 1].isPunct() || str[i - 1].isSymbol() : true);
4134 const bool uWhitespaceBefore = (i > 0 ? Trait::isUnicodeWhitespace(str[i - 1]) : true);
4135 const bool uWhitespaceOrPunctBefore = uWhitespaceBefore || punctBefore;
4136 const bool alNumBefore = (i > 0 ? str[i - 1].isLetterOrNumber() : false);
4137
4138 const auto ch = str[i];
4139
4140 while (i < str.length() && str[i] == ch) {
4141 style.push_back(str[i]);
4142 ++i;
4143 }
4144
4145 typename Delimiter::DelimiterType dt = Delimiter::Unknown;
4146
4147 if (ch == Trait::latin1ToChar('*')) {
4148 dt = Delimiter::Emphasis1;
4149 } else {
4150 dt = Delimiter::Emphasis2;
4151 }
4152
4153 const bool punctAfter = (i < str.length() ? str[i].isPunct() || str[i].isSymbol() : true);
4154 const bool uWhitespaceAfter = (i < str.length() ? Trait::isUnicodeWhitespace(str[i]) : true);
4155 const bool alNumAfter = (i < str.length() ? str[i].isLetterOrNumber() : false);
4156 const bool leftFlanking = !uWhitespaceAfter && (!punctAfter || (punctAfter && uWhitespaceOrPunctBefore))
4157 && !(ch == Trait::latin1ToChar('_') && alNumBefore && alNumAfter);
4158 const bool rightFlanking = !uWhitespaceBefore && (!punctBefore || (punctBefore && (uWhitespaceAfter || punctAfter)))
4159 && !(ch == Trait::latin1ToChar('_') && alNumBefore && alNumAfter);
4160
4161 if (leftFlanking || rightFlanking) {
4162 for (auto j = 0; j < style.length(); ++j) {
4163 d.push_back({dt, line, i - style.length() + j, 1,
4164 word, false, leftFlanking, rightFlanking});
4165 }
4166
4167 word = false;
4168 } else {
4169 word = true;
4170 }
4171
4172 --i;
4173 }
4174 // ~
4175 else if (str[i] == Trait::latin1ToChar('~') && !backslash) {
4176 typename Trait::String style;
4177
4178 const bool punctBefore = (i > 0 ? str[i - 1].isPunct() || str[i - 1].isSymbol() : true);
4179 const bool uWhitespaceBefore = (i > 0 ? Trait::isUnicodeWhitespace(str[i - 1]) : true);
4180 const bool uWhitespaceOrPunctBefore = uWhitespaceBefore || punctBefore;
4181
4182 while (i < str.length() && str[i] == Trait::latin1ToChar('~')) {
4183 style.push_back(str[i]);
4184 ++i;
4185 }
4186
4187 if (style.length() <= 2) {
4188 const bool punctAfter = (i < str.length() ? str[i].isPunct() || str[i].isSymbol() : true);
4189 const bool uWhitespaceAfter = (i < str.length() ? Trait::isUnicodeWhitespace(str[i]) : true);
4190 const bool leftFlanking = !uWhitespaceAfter && (!punctAfter || (punctAfter && uWhitespaceOrPunctBefore));
4191 const bool rightFlanking = !uWhitespaceBefore && (!punctBefore || (punctBefore && (uWhitespaceAfter || punctAfter)));
4192
4193 if (leftFlanking || rightFlanking) {
4194 d.push_back({Delimiter::Strikethrough,
4195 line,
4196 i - style.length(),
4197 style.length(),
4198 word,
4199 false,
4200 leftFlanking,
4201 rightFlanking});
4202
4203 word = false;
4204 } else {
4205 word = true;
4206 }
4207 } else {
4208 word = true;
4209 }
4210
4211 --i;
4212 }
4213 // [
4214 else if (str[i] == Trait::latin1ToChar('[') && !backslash) {
4215 d.push_back({Delimiter::SquareBracketsOpen, line, i, 1, word, false});
4216
4217 word = false;
4218 }
4219 // !
4220 else if (str[i] == Trait::latin1ToChar('!') && !backslash) {
4221 if (i + 1 < str.length()) {
4222 if (str[i + 1] == Trait::latin1ToChar('[')) {
4223 d.push_back({Delimiter::ImageOpen, line, i, 2, word, false});
4224
4225 ++i;
4226
4227 word = false;
4228 } else {
4229 word = true;
4230 }
4231 } else {
4232 word = true;
4233 }
4234 }
4235 // (
4236 else if (str[i] == Trait::latin1ToChar('(') && !backslash) {
4237 d.push_back({Delimiter::ParenthesesOpen, line, i, 1, word, false});
4238
4239 word = false;
4240 }
4241 // ]
4242 else if (str[i] == Trait::latin1ToChar(']') && !backslash) {
4243 d.push_back({Delimiter::SquareBracketsClose, line, i, 1, word, false});
4244
4245 word = false;
4246 }
4247 // )
4248 else if (str[i] == Trait::latin1ToChar(')') && !backslash) {
4249 d.push_back({Delimiter::ParenthesesClose, line, i, 1, word, false});
4250
4251 word = false;
4252 }
4253 // <
4254 else if (str[i] == Trait::latin1ToChar('<') && !backslash) {
4255 d.push_back({Delimiter::Less, line, i, 1, word, false});
4256
4257 word = false;
4258 }
4259 // >
4260 else if (str[i] == Trait::latin1ToChar('>') && !backslash) {
4261 d.push_back({Delimiter::Greater, line, i, 1, word, false});
4262
4263 word = false;
4264 }
4265 // `
4266 else if (str[i] == Trait::latin1ToChar('`')) {
4267 typename Trait::String code;
4268
4269 while (i < str.length() && str[i] == Trait::latin1ToChar('`')) {
4270 code.push_back(str[i]);
4271 ++i;
4272 }
4273
4274 d.push_back({Delimiter::InlineCode,
4275 line,
4276 i - code.length() - (backslash ? 1 : 0),
4277 code.length() + (backslash ? 1 : 0),
4278 word,
4279 backslash});
4280
4281 word = false;
4282
4283 --i;
4284 }
4285 // $
4286 else if (str[i] == Trait::latin1ToChar('$')) {
4287 typename Trait::String m;
4288
4289 while (i < str.length() && str[i] == Trait::latin1ToChar('$')) {
4290 m.push_back(str[i]);
4291 ++i;
4292 }
4293
4294 if (m.length() <= 2 && !backslash) {
4295 d.push_back({Delimiter::Math, line, i - m.length(), m.length(),
4296 false, false, false, false});
4297 }
4298
4299 word = false;
4300
4301 --i;
4302 } else {
4303 word = true;
4304 }
4305
4306 if (!now) {
4307 backslash = false;
4308 }
4309 }
4310 }
4311 }
4312
4313 return d;
4314}
4315
4316//! \return Is the given string a line break.
4317template<class Trait>
4318inline bool
4319isLineBreak(const typename Trait::String &s)
4320{
4321 return (s.endsWith(Trait::latin1ToString(" ")) || s.endsWith(Trait::latin1ToString("\\")));
4322}
4323
4324//! \return Length of line break.
4325template<class Trait>
4326inline long long int
4327lineBreakLength(const typename Trait::String &s)
4328{
4329 return (s.endsWith(Trait::latin1ToString(" ")) ? 2 : 1);
4330}
4331
4332//! Remove line break from the end of string.
4333template<class Trait>
4334inline typename Trait::String
4335removeLineBreak(const typename Trait::String &s)
4336{
4337 if (s.endsWith(Trait::latin1ToString("\\"))) {
4338 return s.sliced(0, s.size() - 1);
4339 } else {
4340 return s;
4341 }
4342}
4343
4344//! Initialize item with style information and set it as last item.
4345template<class Trait>
4346inline void
4348 std::shared_ptr<ItemWithOpts<Trait>> item)
4349{
4350 item->openStyles() = po.m_openStyles;
4351 po.m_openStyles.clear();
4352 po.m_lastItemWithStyle = item;
4353}
4354
4355//! Make text item.
4356template<class Trait>
4357inline void
4358makeTextObject(const typename Trait::String &text,
4360 long long int startPos,
4361 long long int startLine,
4362 long long int endPos,
4363 long long int endLine,
4364 bool doRemoveSpacesAtEnd = false)
4365{
4366 if (endPos < 0 && endLine - 1 >= 0) {
4367 endPos = po.m_fr.m_data.at(endLine - 1).first.length() - 1;
4368 --endLine;
4369 }
4370
4371 if (endPos == po.m_fr.m_data.at(endLine).first.length() - 1) {
4372 doRemoveSpacesAtEnd = true;
4373 }
4374
4375 auto s = removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(text));
4376
4377 if (doRemoveSpacesAtEnd) {
4378 removeSpacesAtEnd<typename Trait::String>(s);
4379 }
4380
4381 if (startPos == 0) {
4382 if (s.length()) {
4383 const auto p = skipSpaces<Trait>(0, s);
4384
4385 if (p > 0) {
4386 s.remove(0, p);
4387 }
4388 }
4389 }
4390
4391 if (!s.isEmpty()) {
4392 po.m_rawTextData.push_back({text, startPos, startLine});
4393
4394 std::shared_ptr<Text<Trait>> t(new Text<Trait>);
4395 t->setText(s);
4396 t->setOpts(po.m_opts);
4397 t->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
4398 t->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
4399 t->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(endPos, true));
4400 t->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4401
4402 initLastItemWithOpts<Trait>(po, t);
4403
4404 po.m_parent->setEndColumn(t->endColumn());
4405 po.m_parent->setEndLine(t->endLine());
4406
4407 po.m_wasRefLink = false;
4408 po.m_firstInParagraph = false;
4409 po.m_parent->appendItem(t);
4410
4411 po.m_lastText = t;
4412 } else {
4413 po.m_pos = startPos;
4414 }
4415}
4416
4417//! Make text item with line break.
4418template<class Trait>
4419inline void
4420makeTextObjectWithLineBreak(const typename Trait::String &text,
4422 long long int startPos,
4423 long long int startLine,
4424 long long int endPos,
4425 long long int endLine)
4426{
4427 makeTextObject(text, po, startPos, startLine, endPos, endLine, true);
4428
4429 std::shared_ptr<LineBreak<Trait>> hr(new LineBreak<Trait>);
4430 hr->setText(po.m_fr.m_data.at(endLine).first.asString().sliced(endPos + 1));
4431 hr->setStartColumn(po.m_fr.m_data.at(endLine).first.virginPos(endPos + 1));
4432 hr->setStartLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4433 hr->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(po.m_fr.m_data.at(endLine).first.length() - 1));
4434 hr->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
4435 po.m_parent->setEndColumn(hr->endColumn());
4436 po.m_parent->setEndLine(hr->endLine());
4437 po.m_wasRefLink = false;
4438 po.m_firstInParagraph = false;
4439 po.m_parent->appendItem(hr);
4440}
4441
4442//! Check for table in paragraph.
4443template<class Trait>
4444inline void
4446 long long int lastLine)
4447{
4448 if (!po.m_opts) {
4449 long long int i = po.m_pos > 0 ? po.m_line + 1 : po.m_line;
4450
4451 for (; i <= lastLine; ++i) {
4452 const auto h = isTableHeader<Trait>(po.m_fr.m_data[i].first.asString());
4453 const auto c = i + 1 < static_cast<long long int>(po.m_fr.m_data.size()) ?
4454 isTableAlignment<Trait>(po.m_fr.m_data[i + 1].first.asString()) : 0;
4455
4456 if (h && c && c == h) {
4458 po.m_startTableLine = i;
4459 po.m_columnsCount = c;
4460 po.m_lastTextLine = i - 1;
4461 po.m_lastTextPos = po.m_fr.m_data[po.m_lastTextLine].first.length();
4462
4463 return;
4464 }
4465 }
4466 }
4467
4468 po.m_lastTextLine = po.m_fr.m_data.size() - 1;
4469 po.m_lastTextPos = po.m_fr.m_data.back().first.length();
4470}
4471
4472//! Make text item.
4473template<class Trait>
4474inline void
4476 // Inclusive. Don't pass lastLine > actual line position with 0 lastPos. Pass as is,
4477 // i.e. if line length is 18 and you need whole line then pass lastLine = index of line,
4478 // and lastPos = 18, or you may crash here if you will pass lastLine = index of line + 1
4479 // and lastPos = 0...
4480 long long int lastLine,
4481 // Not inclusive
4482 long long int lastPos,
4484{
4485 if (po.m_line > lastLine) {
4486 return;
4487 } else if (po.m_line == lastLine && po.m_pos >= lastPos) {
4488 return;
4489 }
4490
4491 typename Trait::String text;
4492
4493 const auto isLastChar = po.m_pos >= po.m_fr.m_data.at(po.m_line).first.length();
4494 long long int startPos = (isLastChar ? 0 : po.m_pos);
4495 long long int startLine = (isLastChar ? po.m_line + 1 : po.m_line);
4496
4497 bool lineBreak =
4498 (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4499 (po.m_line == lastLine ? (lastPos == po.m_fr.m_data.at(po.m_line).first.length() &&
4500 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString())) :
4501 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString())));
4502
4503 // makeTOWLB
4504 auto makeTOWLB = [&]() {
4505 if (po.m_line != (long long int)(po.m_fr.m_data.size() - 1)) {
4506 const auto &line = po.m_fr.m_data.at(po.m_line).first.asString();
4507
4508 makeTextObjectWithLineBreak(text, po, startPos, startLine,
4509 line.length() - lineBreakLength<Trait>(line) - 1, po.m_line);
4510
4511 startPos = 0;
4512 startLine = po.m_line + 1;
4513
4514 text.clear();
4515 }
4516 }; // makeTOWLB
4517
4518 if (lineBreak) {
4519 text.push_back(removeLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString(po.m_pos)));
4520
4521 makeTOWLB();
4522 } else {
4523 const auto length = (po.m_line == lastLine ?
4524 lastPos - po.m_pos : po.m_fr.m_data.at(po.m_line).first.length() - po.m_pos);
4525 const auto s = po.m_fr.m_data.at(po.m_line).first.virginSubString(po.m_pos, length);
4526 text.push_back(s);
4527
4528 po.m_pos = (po.m_line == lastLine ? lastPos : po.m_fr.m_data.at(po.m_line).first.length());
4529
4530 makeTextObject(text,
4531 po,
4532 startPos,
4533 startLine,
4534 po.m_line == lastLine ? lastPos - 1 : po.m_fr.m_data.at(po.m_line).first.length() - 1,
4535 po.m_line);
4536
4537 text.clear();
4538 }
4539
4540 if (po.m_line != lastLine) {
4541 ++po.m_line;
4542 startPos = 0;
4543 startLine = po.m_line;
4544
4545 for (; po.m_line < lastLine; ++po.m_line) {
4546 lineBreak = (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4547 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.asString()));
4548
4549 const auto s = (lineBreak ? removeLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.virginSubString()) :
4550 po.m_fr.m_data.at(po.m_line).first.virginSubString());
4551 text.push_back(s);
4552
4553 if (lineBreak) {
4554 makeTOWLB();
4555 } else {
4556 makeTextObject(text, po, 0, po.m_line,
4557 po.m_fr.m_data.at(po.m_line).first.length() - 1, po.m_line);
4558 }
4559
4560 text.clear();
4561 }
4562
4563 lineBreak = (!po.m_ignoreLineBreak && po.m_line != (long long int)(po.m_fr.m_data.size() - 1) &&
4564 lastPos == po.m_fr.m_data.at(po.m_line).first.length() &&
4565 isLineBreak<Trait>(po.m_fr.m_data.at(po.m_line).first.asString()));
4566
4567 auto s = po.m_fr.m_data.at(po.m_line).first.virginSubString(0, lastPos);
4568
4569 po.m_pos = lastPos;
4570
4571 if (!lineBreak) {
4572 text.push_back(s);
4573
4574 makeTextObject(text, po, 0, lastLine, lastPos - 1, lastLine);
4575 } else {
4576 s = removeLineBreak<Trait>(s);
4577 text.push_back(s);
4578
4579 makeTOWLB();
4580 }
4581 }
4582}
4583
4584//! Skip spaces.
4585template<class Trait>
4586inline void
4587skipSpacesInHtml(long long int &l,
4588 long long int &p,
4589 const typename MdBlock<Trait>::Data &fr)
4590{
4591 while (l < (long long int)fr.size()) {
4592 p = skipSpaces<Trait>(p, fr[l].first.asString());
4593
4594 if (p < fr[l].first.length()) {
4595 return;
4596 }
4597
4598 p = 0;
4599 ++l;
4600 }
4601}
4602
4603//! Read HTML attribute value.
4604template<class Trait>
4605inline std::pair<bool, bool>
4607 long long int &p,
4608 const typename MdBlock<Trait>::Data &fr)
4609{
4610 static const typename Trait::String notAllowed = Trait::latin1ToString("\"`=<'");
4611
4612 const auto start = p;
4613
4614 for (; p < fr[l].first.length(); ++p) {
4615 if (fr[l].first[p].isSpace()) {
4616 break;
4617 } else if (notAllowed.contains(fr[l].first[p])) {
4618 return {false, false};
4619 } else if (fr[l].first[p] == Trait::latin1ToChar('>')) {
4620 return {p - start > 0, p - start > 0};
4621 }
4622 }
4623
4624 return {p - start > 0, p - start > 0};
4625}
4626
4627//! Read HTML attribute value.
4628template<class Trait>
4629inline std::pair<bool, bool>
4630readHtmlAttrValue(long long int &l,
4631 long long int &p,
4632 const typename MdBlock<Trait>::Data &fr)
4633{
4634 if (p < fr[l].first.length() && fr[l].first[p] != Trait::latin1ToChar('"') &&
4635 fr[l].first[p] != Trait::latin1ToChar('\'')) {
4636 return readUnquotedHtmlAttrValue<Trait>(l, p, fr);
4637 }
4638
4639 const auto s = fr[l].first[p];
4640
4641 ++p;
4642
4643 if (p >= fr[l].first.length()) {
4644 return {false, false};
4645 }
4646
4647 for (; l < (long long int)fr.size(); ++l) {
4648 bool doBreak = false;
4649
4650 for (; p < fr[l].first.length(); ++p) {
4651 const auto ch = fr[l].first[p];
4652
4653 if (ch == s) {
4654 doBreak = true;
4655
4656 break;
4657 }
4658 }
4659
4660 if (doBreak) {
4661 break;
4662 }
4663
4664 p = 0;
4665 }
4666
4667 if (l >= (long long int)fr.size()) {
4668 return {false, false};
4669 }
4670
4671 if (p >= fr[l].first.length()) {
4672 return {false, false};
4673 }
4674
4675 if (fr[l].first[p] != s) {
4676 return {false, false};
4677 }
4678
4679 ++p;
4680
4681 return {true, true};
4682}
4683
4684//! Read HTML attribute.
4685template<class Trait>
4686inline std::pair<bool, bool>
4687readHtmlAttr(long long int &l,
4688 long long int &p,
4689 const typename MdBlock<Trait>::Data &fr,
4690 bool checkForSpace)
4691{
4692 long long int tl = l, tp = p;
4693
4694 skipSpacesInHtml<Trait>(l, p, fr);
4695
4696 if (l >= (long long int)fr.size()) {
4697 return {false, false};
4698 }
4699
4700 // /
4701 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('/')) {
4702 return {false, true};
4703 }
4704
4705 // >
4706 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('>')) {
4707 return {false, true};
4708 }
4709
4710 if (checkForSpace) {
4711 if (tl == l && tp == p) {
4712 return {false, false};
4713 }
4714 }
4715
4716 const auto start = p;
4717
4718 for (; p < fr[l].first.length(); ++p) {
4719 const auto ch = fr[l].first[p];
4720
4721 if (ch.isSpace() || ch == Trait::latin1ToChar('>') || ch == Trait::latin1ToChar('=')) {
4722 break;
4723 }
4724 }
4725
4726 const typename Trait::String name = fr[l].first.asString().sliced(start, p - start).toLower();
4727
4728 if (!name.startsWith(Trait::latin1ToString("_")) && !name.startsWith(Trait::latin1ToString(":")) &&
4729 !name.isEmpty() && !(name[0].unicode() >= 97 && name[0].unicode() <= 122)) {
4730 return {false, false};
4731 }
4732
4733 static const typename Trait::String allowedInName =
4734 Trait::latin1ToString("abcdefghijklmnopqrstuvwxyz0123456789_.:-");
4735
4736 for (long long int i = 1; i < name.length(); ++i) {
4737 if (!allowedInName.contains(name[i])) {
4738 return {false, false};
4739 }
4740 }
4741
4742 // >
4743 if (p < fr[l].first.length() && fr[l].first[p] == Trait::latin1ToChar('>')) {
4744 return {false, true};
4745 }
4746
4747 tl = l;
4748 tp = p;
4749
4750 skipSpacesInHtml<Trait>(l, p, fr);
4751
4752 if (l >= (long long int)fr.size()) {
4753 return {false, false};
4754 }
4755
4756 // =
4757 if (p < fr[l].first.length()) {
4758 if (fr[l].first[p] != Trait::latin1ToChar('=')) {
4759 l = tl;
4760 p = tp;
4761
4762 return {true, true};
4763 } else {
4764 ++p;
4765 }
4766 } else {
4767 return {true, false};
4768 }
4769
4770 skipSpacesInHtml<Trait>(l, p, fr);
4771
4772 if (l >= (long long int)fr.size()) {
4773 return {false, false};
4774 }
4775
4776 return readHtmlAttrValue<Trait>(l, p, fr);
4777}
4778
4779//! \return Is HTML tag at the given position?
4780template<class Trait>
4781inline std::tuple<bool, long long int, long long int, bool, typename Trait::String>
4782isHtmlTag(long long int line, long long int pos, TextParsingOpts<Trait> &po, int rule);
4783
4784//! \return Is after the given position only HTML tags?
4785template<class Trait>
4786inline bool
4788 long long int pos,
4790 int rule)
4791{
4792 static const std::set<typename Trait::String> s_rule1Finish = {Trait::latin1ToString("/pre"),
4793 Trait::latin1ToString("/script"),
4794 Trait::latin1ToString("/style"),
4795 Trait::latin1ToString("/textarea")};
4796
4797 auto p = skipSpaces<Trait>(pos, po.m_fr.m_data[line].first.asString());
4798
4799 while (p < po.m_fr.m_data[line].first.length()) {
4800 bool ok = false;
4801
4802 long long int l;
4803 typename Trait::String tag;
4804
4805 std::tie(ok, l, p, std::ignore, tag) = isHtmlTag(line, p, po, rule);
4806
4807 ++p;
4808
4809 if (rule != 1) {
4810 if (!ok) {
4811 return false;
4812 }
4813
4814 if (l > line) {
4815 return true;
4816 }
4817 } else {
4818 if (s_rule1Finish.find(tag.toLower()) != s_rule1Finish.cend() && l == line) {
4819 return true;
4820 }
4821
4822 if (l > line) {
4823 return false;
4824 }
4825 }
4826
4827 p = skipSpaces<Trait>(p, po.m_fr.m_data[line].first.asString());
4828 }
4829
4830 if (p >= po.m_fr.m_data[line].first.length()) {
4831 return true;
4832 }
4833
4834 return false;
4835}
4836
4837//! \return Is setext heading in the lines?
4838template<class Trait>
4839inline bool
4841 long long int startLine,
4842 long long int endLine)
4843{
4844 for (; startLine <= endLine; ++startLine) {
4845 const auto pos = skipSpaces<Trait>(0, po.m_fr.m_data.at(startLine).first.asString());
4846 const auto line = po.m_fr.m_data.at(startLine).first.asString().sliced(pos);
4847
4848 if ((isH1<Trait>(line) || isH2<Trait>(line)) && pos < 4) {
4849 return true;
4850 }
4851 }
4852
4853 return false;
4854}
4855
4856//! \return Is HTML tag at the given position?
4857template<class Trait>
4858inline std::tuple<bool, long long int, long long int, bool, typename Trait::String>
4859isHtmlTag(long long int line,
4860 long long int pos,
4862 int rule)
4863{
4864 if (po.m_fr.m_data[line].first[pos] != Trait::latin1ToChar('<')) {
4865 return {false, line, pos, false, {}};
4866 }
4867
4868 typename Trait::String tag;
4869
4870 long long int l = line;
4871 long long int p = pos + 1;
4872 bool first = false;
4873
4874 {
4875 const auto tmp = skipSpaces<Trait>(0, po.m_fr.m_data[l].first.asString());
4876 first = (tmp == pos);
4877 }
4878
4879 if (p >= po.m_fr.m_data[l].first.length()) {
4880 return {false, line, pos, first, tag};
4881 }
4882
4883 bool closing = false;
4884
4885 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
4886 closing = true;
4887
4888 tag.push_back(Trait::latin1ToChar('/'));
4889
4890 ++p;
4891 }
4892
4893 const auto start = p;
4894
4895 // tag
4896 for (; p < po.m_fr.m_data[l].first.length(); ++p) {
4897 const auto ch = po.m_fr.m_data[l].first[p];
4898
4899 if (ch.isSpace() || ch == Trait::latin1ToChar('>') || ch == Trait::latin1ToChar('/')) {
4900 break;
4901 }
4902 }
4903
4904 tag.push_back(po.m_fr.m_data[l].first.asString().sliced(start, p - start));
4905
4906 if (p < po.m_fr.m_data[l].first.length() && po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
4907 if (p + 1 < po.m_fr.m_data[l].first.length() &&
4908 po.m_fr.m_data[l].first[p + 1] == Trait::latin1ToChar('>')) {
4909 long long int tmp = 0;
4910
4911 if (rule == 7) {
4912 tmp = skipSpaces<Trait>(p + 2, po.m_fr.m_data[l].first.asString());
4913 }
4914
4915 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
4916 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 2, po, rule == 1)));
4917
4918 if (!isSetextHeadingBetween(po, line, l)) {
4919 return {true, l, p + 1, onLine, tag};
4920 } else {
4921 return {false, line, pos, first, tag};
4922 }
4923 } else {
4924 return {false, line, pos, first, tag};
4925 }
4926 }
4927
4928 if (p < po.m_fr.m_data[l].first.length() && po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
4929 long long int tmp = 0;
4930
4931 if (rule == 7) {
4932 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
4933 }
4934
4935 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
4936 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
4937
4938 if (!isSetextHeadingBetween(po, line, l)) {
4939 return {true, l, p, onLine, tag};
4940 } else {
4941 return {false, line, pos, first, tag};
4942 }
4943 }
4944
4945 skipSpacesInHtml<Trait>(l, p, po.m_fr.m_data);
4946
4947 if (l >= (long long int)po.m_fr.m_data.size()) {
4948 return {false, line, pos, first, tag};
4949 }
4950
4951 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
4952 long long int tmp = 0;
4953
4954 if (rule == 7) {
4955 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
4956 }
4957
4958 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
4959 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
4960
4961 if (!isSetextHeadingBetween(po, line, l)) {
4962 return {true, l, p, onLine, tag};
4963 } else {
4964 return {false, line, pos, first, tag};
4965 }
4966 }
4967
4968 bool attr = true;
4969 bool firstAttr = true;
4970
4971 while (attr) {
4972 bool ok = false;
4973
4974 std::tie(attr, ok) = readHtmlAttr<Trait>(l, p, po.m_fr.m_data, !firstAttr);
4975
4976 firstAttr = false;
4977
4978 if (closing && attr) {
4979 return {false, line, pos, first, tag};
4980 }
4981
4982 if (!ok) {
4983 return {false, line, pos, first, tag};
4984 }
4985 }
4986
4987 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('/')) {
4988 ++p;
4989 } else {
4990 skipSpacesInHtml<Trait>(l, p, po.m_fr.m_data);
4991
4992 if (l >= (long long int)po.m_fr.m_data.size()) {
4993 return {false, line, pos, first, tag};
4994 }
4995 }
4996
4997 if (po.m_fr.m_data[l].first[p] == Trait::latin1ToChar('>')) {
4998 long long int tmp = 0;
4999
5000 if (rule == 7) {
5001 tmp = skipSpaces<Trait>(p + 1, po.m_fr.m_data[l].first.asString());
5002 }
5003
5004 bool onLine = (first && (rule == 7 ? tmp == po.m_fr.m_data[l].first.length() :
5005 isOnlyHtmlTagsAfterOrClosedRule1(l, p + 1, po, rule == 1)));
5006
5007 if (!isSetextHeadingBetween(po, line, l)) {
5008 return {true, l, p, onLine, tag};
5009 } else {
5010 return {false, line, pos, first, tag};
5011 }
5012 }
5013
5014 return {false, line, pos, first, {}};
5015}
5016
5017//! Read HTML tag.
5018template<class Trait>
5019inline std::pair<typename Trait::String, bool>
5020Parser<Trait>::readHtmlTag(typename Delims::const_iterator it,
5021 TextParsingOpts<Trait> &po)
5022{
5023 long long int i = it->m_pos + 1;
5024 const auto start = i;
5025
5026 if (start >= po.m_fr.m_data[it->m_line].first.length()) {
5027 return {{}, false};
5028 }
5029
5030 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5031 const auto ch = po.m_fr.m_data[it->m_line].first[i];
5032
5033 if (ch.isSpace() || ch == Trait::latin1ToChar('>')) {
5034 break;
5035 }
5036 }
5037
5038 return {po.m_fr.m_data[it->m_line].first.asString().sliced(start, i - start),
5039 i < po.m_fr.m_data[it->m_line].first.length() ?
5040 po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('>') : false};
5041}
5042
5043template<class Trait>
5044inline typename Parser<Trait>::Delims::const_iterator
5045Parser<Trait>::findIt(typename Delims::const_iterator it,
5046 typename Delims::const_iterator last,
5047 TextParsingOpts<Trait> &po)
5048{
5049 auto ret = it;
5050
5051 for (; it != last; ++it) {
5052 if ((it->m_line == po.m_line && it->m_pos < po.m_pos) || it->m_line < po.m_line) {
5053 ret = it;
5054 } else {
5055 break;
5056 }
5057 }
5058
5059 return ret;
5060}
5061
5062//! Read HTML data.
5063template<class Trait>
5064inline void
5065eatRawHtml(long long int line,
5066 long long int pos,
5067 long long int toLine,
5068 long long int toPos,
5070 bool finish,
5071 int htmlRule,
5072 bool onLine,
5073 bool continueEating = false)
5074{
5075 if (line <= toLine) {
5076 typename Trait::String h = po.m_html.m_html->text();
5077
5078 if (!h.isEmpty() && !continueEating) {
5079 for (long long int i = 0; i < po.m_fr.m_emptyLinesBefore; ++i) {
5080 h.push_back(Trait::latin1ToChar('\n'));
5081 }
5082 }
5083
5084 const auto first = po.m_fr.m_data[line].first.asString().sliced(
5085 pos,
5086 (line == toLine ? (toPos >= 0 ? toPos - pos : po.m_fr.m_data[line].first.length() - pos) :
5087 po.m_fr.m_data[line].first.length() - pos));
5088
5089 if (!h.isEmpty() && !first.isEmpty() && po.m_html.m_html->endLine() != po.m_fr.m_data[line].second.m_lineNumber) {
5090 h.push_back(Trait::latin1ToChar('\n'));
5091 }
5092
5093 if (!first.isEmpty()) {
5094 h.push_back(first);
5095 }
5096
5097 ++line;
5098
5099 for (; line < toLine; ++line) {
5100 h.push_back(Trait::latin1ToChar('\n'));
5101 h.push_back(po.m_fr.m_data[line].first.asString());
5102 }
5103
5104 if (line == toLine && toPos != 0) {
5105 h.push_back(Trait::latin1ToChar('\n'));
5106 h.push_back(po.m_fr.m_data[line].first.asString().sliced(0, toPos > 0 ?
5107 toPos : po.m_fr.m_data[line].first.length()));
5108 }
5109
5110 auto endColumn = toPos;
5111 auto endLine = toLine;
5112
5113 if (endColumn == 0 && endLine > 0) {
5114 --endLine;
5115 endColumn = po.m_fr.m_data.at(endLine).first.length();
5116 }
5117
5118 po.m_html.m_html->setEndColumn(po.m_fr.m_data.at(endLine).first.virginPos(endColumn >= 0 ?
5119 endColumn - 1 : po.m_fr.m_data.at(endLine).first.length() - 1));
5120 po.m_html.m_html->setEndLine(po.m_fr.m_data.at(endLine).second.m_lineNumber);
5121
5122 po.m_line = (toPos >= 0 ? toLine : toLine + 1);
5123 po.m_pos = (toPos >= 0 ? toPos : 0);
5124
5125 if (po.m_line + 1 < static_cast<long long int>(po.m_fr.m_data.size()) &&
5126 po.m_pos >= po.m_fr.m_data.at(po.m_line).first.length()) {
5127 ++po.m_line;
5128 po.m_pos = 0;
5129 }
5130
5131 po.m_html.m_html->setText(h);
5132 }
5133
5135
5136 if (finish) {
5137 if (po.m_html.m_onLine || htmlRule == 7 || po.m_line < (long long int)po.m_fr.m_data.size()) {
5138 if (!po.m_collectRefLinks) {
5139 po.m_parent->appendItem(po.m_html.m_html);
5140 po.m_parent->setEndColumn(po.m_html.m_html->endColumn());
5141 po.m_parent->setEndLine(po.m_html.m_html->endLine());
5142 initLastItemWithOpts<Trait>(po, po.m_html.m_html);
5143 po.m_html.m_html->setOpts(po.m_opts);
5144 po.m_lastText = nullptr;
5145 } else {
5146 po.m_tmpHtml = po.m_html.m_html;
5147 }
5148
5149 resetHtmlTag(po.m_html);
5150 }
5151 } else {
5152 po.m_html.m_continueHtml = true;
5153 }
5154}
5155
5156template<class Trait>
5157inline bool
5158Parser<Trait>::isNewBlockIn(MdBlock<Trait> &fr,
5159 long long int startLine,
5160 long long int endLine)
5161{
5162 for (auto i = startLine + 1; i <= endLine; ++i) {
5163 const auto type = whatIsTheLine(fr.m_data[i].first);
5164
5165 switch (type) {
5166 case Parser<Trait>::BlockType::Footnote:
5167 case Parser<Trait>::BlockType::FensedCodeInList:
5168 case Parser<Trait>::BlockType::SomethingInList:
5169 case Parser<Trait>::BlockType::List:
5170 case Parser<Trait>::BlockType::ListWithFirstEmptyLine:
5171 case Parser<Trait>::BlockType::Code:
5172 case Parser<Trait>::BlockType::Blockquote:
5173 case Parser<Trait>::BlockType::Heading:
5174 case Parser<Trait>::BlockType::EmptyLine:
5175 return true;
5176
5177 default:
5178 break;
5179 }
5180
5181 const auto ns = skipSpaces<Trait>(0, fr.m_data[i].first.asString());
5182
5183 if (ns < 4) {
5184 const auto s = fr.m_data[i].first.asString().sliced(ns);
5185
5186 if (isHorizontalLine<Trait>(s) || isH1<Trait>(s) || isH2<Trait>(s)) {
5187 return true;
5188 }
5189 }
5190 }
5191
5192 return false;
5193}
5194
5195template<class Trait>
5196inline void
5197Parser<Trait>::finishRule1HtmlTag(typename Delims::const_iterator it,
5198 typename Delims::const_iterator last,
5199 TextParsingOpts<Trait> &po,
5200 bool skipFirst)
5201{
5202 static const std::set<typename Trait::String> s_finish = {Trait::latin1ToString("/pre"),
5203 Trait::latin1ToString("/script"),
5204 Trait::latin1ToString("/style"),
5205 Trait::latin1ToString("/textarea")};
5206
5207 if (it != last) {
5208 bool ok = false;
5209 long long int l = -1, p = -1;
5210
5211 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less && skipFirst) {
5212 std::tie(ok, l, p, po.m_html.m_onLine, std::ignore) =
5213 isHtmlTag(it->m_line, it->m_pos, po, 1);
5214 }
5215
5216 if (po.m_html.m_onLine) {
5217 for (it = (skipFirst && it != last ? std::next(it) : it); it != last; ++it) {
5218 if (it->m_type == Delimiter::Less) {
5219 typename Trait::String tag;
5220 bool closed = false;
5221
5222 std::tie(tag, closed) = readHtmlTag(it, po);
5223
5224 if (closed) {
5225 if (s_finish.find(tag.toLower()) != s_finish.cend()) {
5226 eatRawHtml(po.m_line, po.m_pos, it->m_line, -1, po,
5227 true, 1, po.m_html.m_onLine);
5228
5229 return;
5230 }
5231 }
5232 }
5233 }
5234 } else if (ok && !isNewBlockIn(po.m_fr, it->m_line, l)) {
5235 eatRawHtml(po.m_line, po.m_pos, l, p + 1, po, true, 1, false);
5236
5237 return;
5238 } else {
5239 resetHtmlTag(po.m_html);
5240
5241 return;
5242 }
5243 }
5244
5245 if (po.m_html.m_onLine) {
5246 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 1, po.m_html.m_onLine);
5247 } else {
5248 resetHtmlTag(po.m_html);
5249 }
5250}
5251
5252template<class Trait>
5253inline void
5254Parser<Trait>::finishRule2HtmlTag(typename Delims::const_iterator it,
5255 typename Delims::const_iterator last,
5256 TextParsingOpts<Trait> &po)
5257{
5258 if (it != last) {
5259 const auto start = it;
5260
5261 MdLineData::CommentData commentData = {2, true};
5262 bool onLine = po.m_html.m_onLine;
5263
5264 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5265 long long int i = po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos);
5266
5267 commentData = po.m_fr.m_data[it->m_line].second.m_htmlCommentData[i];
5268
5269 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5270 po.m_html.m_onLine = onLine;
5271 }
5272
5273 if (commentData.first != -1 && commentData.second) {
5274 for (; it != last; ++it) {
5275 if (it->m_type == Delimiter::Greater) {
5276 auto p = it->m_pos;
5277
5278 bool doContinue = false;
5279
5280 for (char i = 0; i < commentData.first; ++i) {
5281 if (!(p > 0 && po.m_fr.m_data[it->m_line].first[p - 1] == Trait::latin1ToChar('-'))) {
5282 doContinue = true;
5283
5284 break;
5285 }
5286
5287 --p;
5288 }
5289
5290 if (doContinue) {
5291 continue;
5292 }
5293
5294 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5295 eatRawHtml(po.m_line, po.m_pos, it->m_line,
5296 onLine ? po.m_fr.m_data[it->m_line].first.length() : it->m_pos + 1,
5297 po, true, 2, onLine);
5298 } else {
5299 resetHtmlTag(po.m_html);
5300 }
5301
5302 return;
5303 }
5304 }
5305 }
5306 }
5307
5308 if (po.m_html.m_onLine) {
5309 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 2, po.m_html.m_onLine);
5310 } else {
5311 resetHtmlTag(po.m_html);
5312 }
5313}
5314
5315template<class Trait>
5316inline void
5317Parser<Trait>::finishRule3HtmlTag(typename Delims::const_iterator it,
5318 typename Delims::const_iterator last,
5319 TextParsingOpts<Trait> &po)
5320{
5321 bool onLine = po.m_html.m_onLine;
5322
5323 if (it != last) {
5324 const auto start = it;
5325
5326 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5327 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5328 po.m_html.m_onLine = onLine;
5329 }
5330
5331 for (; it != last; ++it) {
5332 if (it->m_type == Delimiter::Greater) {
5333 if (it->m_pos > 0 && po.m_fr.m_data[it->m_line].first[it->m_pos - 1] == Trait::latin1ToChar('?')) {
5334 long long int i = it->m_pos + 1;
5335
5336 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5337 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5338 break;
5339 }
5340 }
5341
5342 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5343 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 3, onLine);
5344 } else {
5345 resetHtmlTag(po.m_html);
5346 }
5347
5348 return;
5349 }
5350 }
5351 }
5352 }
5353
5354 if (po.m_html.m_onLine) {
5355 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 3, onLine);
5356 } else {
5357 resetHtmlTag(po.m_html);
5358 }
5359}
5360
5361template<class Trait>
5362inline void
5363Parser<Trait>::finishRule4HtmlTag(typename Delims::const_iterator it,
5364 typename Delims::const_iterator last,
5365 TextParsingOpts<Trait> &po)
5366{
5367 if (it != last) {
5368 const auto start = it;
5369
5370 bool onLine = po.m_html.m_onLine;
5371
5372 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5373 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5374 po.m_html.m_onLine = onLine;
5375 }
5376
5377 for (; it != last; ++it) {
5378 if (it->m_type == Delimiter::Greater) {
5379 long long int i = it->m_pos + 1;
5380
5381 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5382 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5383 break;
5384 }
5385 }
5386
5387 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5388 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 4, onLine);
5389 } else {
5390 resetHtmlTag(po.m_html);
5391 }
5392
5393 return;
5394 }
5395 }
5396 }
5397
5398 if (po.m_html.m_onLine) {
5399 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 4, true);
5400 } else {
5401 resetHtmlTag(po.m_html);
5402 }
5403}
5404
5405template<class Trait>
5406inline void
5407Parser<Trait>::finishRule5HtmlTag(typename Delims::const_iterator it,
5408 typename Delims::const_iterator last,
5409 TextParsingOpts<Trait> &po)
5410{
5411 if (it != last) {
5412 const auto start = it;
5413
5414 bool onLine = po.m_html.m_onLine;
5415
5416 if (po.m_html.m_html->text().isEmpty() && it->m_type == Delimiter::Less) {
5417 onLine = (it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()));
5418 po.m_html.m_onLine = onLine;
5419 }
5420
5421 for (; it != last; ++it) {
5422 if (it->m_type == Delimiter::Greater) {
5423 if (it->m_pos > 1 && po.m_fr.m_data[it->m_line].first[it->m_pos - 1] == Trait::latin1ToChar(']') &&
5424 po.m_fr.m_data[it->m_line].first[it->m_pos - 2] == Trait::latin1ToChar(']')) {
5425 long long int i = it->m_pos + 1;
5426
5427 for (; i < po.m_fr.m_data[it->m_line].first.length(); ++i) {
5428 if (po.m_fr.m_data[it->m_line].first[i] == Trait::latin1ToChar('<')) {
5429 break;
5430 }
5431 }
5432
5433 if (onLine || !isNewBlockIn(po.m_fr, start->m_line, it->m_line)) {
5434 eatRawHtml(po.m_line, po.m_pos, it->m_line, i, po, true, 5, onLine);
5435 } else {
5436 resetHtmlTag(po.m_html);
5437 }
5438
5439 return;
5440 }
5441 }
5442 }
5443 }
5444
5445 if (po.m_html.m_onLine) {
5446 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 5, true);
5447 } else {
5448 resetHtmlTag(po.m_html);
5449 }
5450}
5451
5452template<class Trait>
5453inline void
5454Parser<Trait>::finishRule6HtmlTag(typename Delims::const_iterator it,
5455 typename Delims::const_iterator last,
5456 TextParsingOpts<Trait> &po)
5457{
5458 po.m_html.m_onLine = (it != last ?
5459 it->m_pos == skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString()) : true);
5460
5461 if (po.m_html.m_onLine) {
5462 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po,
5463 false, 6, po.m_html.m_onLine);
5464 } else {
5465 const auto nit = std::find_if(std::next(it), last, [](const auto &d) {
5466 return (d.m_type == Delimiter::Greater);
5467 });
5468
5469 if (nit != last && !isNewBlockIn(po.m_fr, it->m_line, nit->m_line)) {
5470 eatRawHtml(po.m_line, po.m_pos, nit->m_line, nit->m_pos + nit->m_len, po,
5471 true, 6, false);
5472 }
5473 }
5474
5475 if (po.m_fr.m_emptyLineAfter && po.m_html.m_html) {
5476 po.m_html.m_continueHtml = false;
5477 }
5478}
5479
5480template<class Trait>
5481inline typename Parser<Trait>::Delims::const_iterator
5482Parser<Trait>::finishRawHtmlTag(typename Delims::const_iterator it,
5483 typename Delims::const_iterator last,
5484 TextParsingOpts<Trait> &po,
5485 bool skipFirst)
5486{
5487 po.m_detected = TextParsingOpts<Trait>::Detected::HTML;
5488
5489 switch (po.m_html.m_htmlBlockType) {
5490 case 1:
5491 finishRule1HtmlTag(it, last, po, skipFirst);
5492 break;
5493
5494 case 2:
5495 finishRule2HtmlTag(it, last, po);
5496 break;
5497
5498 case 3:
5499 finishRule3HtmlTag(it, last, po);
5500 break;
5501
5502 case 4:
5503 finishRule4HtmlTag(it, last, po);
5504 break;
5505
5506 case 5:
5507 finishRule5HtmlTag(it, last, po);
5508 break;
5509
5510 case 6:
5511 finishRule6HtmlTag(it, last, po);
5512 break;
5513
5514 case 7:
5515 return finishRule7HtmlTag(it, last, po);
5516
5517 default:
5518 po.m_detected = TextParsingOpts<Trait>::Detected::Nothing;
5519 break;
5520 }
5521
5522 return findIt(it, last, po);
5523}
5524
5525template<class Trait>
5526inline int
5527Parser<Trait>::htmlTagRule(typename Delims::const_iterator it,
5528 typename Delims::const_iterator last,
5529 TextParsingOpts<Trait> &po)
5530{
5531 MD_UNUSED(last)
5532
5533 typename Trait::String tag;
5534
5535 std::tie(tag, std::ignore) = readHtmlTag(it, po);
5536
5537 if (tag.startsWith(Trait::latin1ToString("![CDATA["))) {
5538 return 5;
5539 }
5540
5541 tag = tag.toLower();
5542
5543 static const typename Trait::String s_validHtmlTagLetters =
5544 Trait::latin1ToString("abcdefghijklmnopqrstuvwxyz0123456789-");
5545
5546 bool closing = false;
5547
5548 if (tag.startsWith(Trait::latin1ToString("/"))) {
5549 tag.remove(0, 1);
5550 closing = true;
5551 }
5552
5553 if (tag.endsWith(Trait::latin1ToString("/"))) {
5554 tag.remove(tag.size() - 1, 1);
5555 }
5556
5557 if (tag.isEmpty()) {
5558 return -1;
5559 }
5560
5561 if (!tag.startsWith(Trait::latin1ToString("!")) &&
5562 !tag.startsWith(Trait::latin1ToString("?")) &&
5563 !(tag[0].unicode() >= 97 && tag[0].unicode() <= 122)) {
5564 return -1;
5565 }
5566
5567 static const std::set<typename Trait::String> s_rule1 = {Trait::latin1ToString("pre"),
5568 Trait::latin1ToString("script"),
5569 Trait::latin1ToString("style"),
5570 Trait::latin1ToString("textarea")};
5571
5572 if (!closing && s_rule1.find(tag) != s_rule1.cend()) {
5573 return 1;
5574 } else if (tag.startsWith(Trait::latin1ToString("!--"))) {
5575 return 2;
5576 } else if (tag.startsWith(Trait::latin1ToString("?"))) {
5577 return 3;
5578 } else if (tag.startsWith(Trait::latin1ToString("!")) && tag.size() > 1 &&
5579 ((tag[1].unicode() >= 65 && tag[1].unicode() <= 90) ||
5580 (tag[1].unicode() >= 97 && tag[1].unicode() <= 122))) {
5581 return 4;
5582 } else {
5583 static const std::set<typename Trait::String> s_rule6 = {
5584 Trait::latin1ToString("address"), Trait::latin1ToString("article"), Trait::latin1ToString("aside"), Trait::latin1ToString("base"),
5585 Trait::latin1ToString("basefont"), Trait::latin1ToString("blockquote"), Trait::latin1ToString("body"), Trait::latin1ToString("caption"),
5586 Trait::latin1ToString("center"), Trait::latin1ToString("col"), Trait::latin1ToString("colgroup"), Trait::latin1ToString("dd"),
5587 Trait::latin1ToString("details"), Trait::latin1ToString("dialog"), Trait::latin1ToString("dir"), Trait::latin1ToString("div"),
5588 Trait::latin1ToString("dl"), Trait::latin1ToString("dt"), Trait::latin1ToString("fieldset"), Trait::latin1ToString("figcaption"),
5589 Trait::latin1ToString("figure"), Trait::latin1ToString("footer"), Trait::latin1ToString("form"), Trait::latin1ToString("frame"),
5590 Trait::latin1ToString("frameset"), Trait::latin1ToString("h1"), Trait::latin1ToString("h2"), Trait::latin1ToString("h3"),
5591 Trait::latin1ToString("h4"), Trait::latin1ToString("h5"), Trait::latin1ToString("h6"), Trait::latin1ToString("head"),
5592 Trait::latin1ToString("header"), Trait::latin1ToString("hr"), Trait::latin1ToString("html"), Trait::latin1ToString("iframe"),
5593 Trait::latin1ToString("legend"), Trait::latin1ToString("li"), Trait::latin1ToString("link"), Trait::latin1ToString("main"),
5594 Trait::latin1ToString("menu"), Trait::latin1ToString("menuitem"), Trait::latin1ToString("nav"), Trait::latin1ToString("noframes"),
5595 Trait::latin1ToString("ol"), Trait::latin1ToString("optgroup"), Trait::latin1ToString("option"), Trait::latin1ToString("p"),
5596 Trait::latin1ToString("param"), Trait::latin1ToString("section"), Trait::latin1ToString("search"), Trait::latin1ToString("summary"),
5597 Trait::latin1ToString("table"), Trait::latin1ToString("tbody"), Trait::latin1ToString("td"), Trait::latin1ToString("tfoot"),
5598 Trait::latin1ToString("th"), Trait::latin1ToString("thead"), Trait::latin1ToString("title"), Trait::latin1ToString("tr"),
5599 Trait::latin1ToString("track"), Trait::latin1ToString("ul")};
5600
5601 for (long long int i = 1; i < tag.size(); ++i) {
5602 if (!s_validHtmlTagLetters.contains(tag[i])) {
5603 return -1;
5604 }
5605 }
5606
5607 if (s_rule6.find(tag) != s_rule6.cend()) {
5608 return 6;
5609 } else {
5610 bool tag = false;
5611
5612 std::tie(tag, std::ignore, std::ignore, std::ignore, std::ignore) =
5613 isHtmlTag(it->m_line, it->m_pos, po, 7);
5614
5615 if (tag) {
5616 return 7;
5617 }
5618 }
5619 }
5620
5621 return -1;
5622}
5623
5624template<class Trait>
5625inline typename Parser<Trait>::Delims::const_iterator
5626Parser<Trait>::checkForRawHtml(typename Delims::const_iterator it,
5627 typename Delims::const_iterator last,
5628 TextParsingOpts<Trait> &po)
5629{
5630 const auto rule = htmlTagRule(it, last, po);
5631
5632 if (rule == -1) {
5633 resetHtmlTag(po.m_html);
5634
5635 po.m_firstInParagraph = false;
5636
5637 return it;
5638 }
5639
5640 po.m_html.m_htmlBlockType = rule;
5641 po.m_html.m_html.reset(new RawHtml<Trait>);
5642 po.m_html.m_html->setStartColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
5643 po.m_html.m_html->setStartLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
5644
5645 return finishRawHtmlTag(it, last, po, true);
5646}
5647
5648template<class Trait>
5649inline typename Parser<Trait>::Delims::const_iterator
5650Parser<Trait>::finishRule7HtmlTag(typename Delims::const_iterator it,
5651 typename Delims::const_iterator last,
5652 TextParsingOpts<Trait> &po)
5653{
5654 if (it != last) {
5655 const auto start = it;
5656 long long int l = -1, p = -1;
5657 bool onLine = false;
5658 bool ok = false;
5659
5660 std::tie(ok, l, p, onLine, std::ignore) = isHtmlTag(it->m_line, it->m_pos, po, 7);
5661
5662 onLine = onLine && it->m_line == 0 && l == start->m_line;
5663
5664 if (ok) {
5665 eatRawHtml(po.m_line, po.m_pos, l, ++p, po, !onLine, 7, onLine);
5666
5667 po.m_html.m_onLine = onLine;
5668
5669 it = findIt(it, last, po);
5670
5671 if (onLine) {
5672 for (; it != last; ++it) {
5673 if (it->m_type == Delimiter::Less) {
5674 const auto rule = htmlTagRule(it, last, po);
5675
5676 if (rule != -1 && rule != 7) {
5677 eatRawHtml(po.m_line, po.m_pos, it->m_line, it->m_pos, po, true, 7, onLine, true);
5678
5679 return std::prev(it);
5680 }
5681 }
5682 }
5683
5684 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, false, 7, onLine, true);
5685
5686 return std::prev(last);
5687 } else {
5688 return it;
5689 }
5690 } else {
5691 return it;
5692 }
5693 } else {
5694 if (po.m_html.m_onLine) {
5695 eatRawHtml(po.m_line, po.m_pos, po.m_fr.m_data.size() - 1, -1, po, true, 7, true);
5696
5697 return last;
5698 } else {
5699 resetHtmlTag(po.m_html);
5700 }
5701 }
5702
5703 return it;
5704}
5705
5706template<class Trait>
5707inline typename Parser<Trait>::Delims::const_iterator
5708Parser<Trait>::checkForMath(typename Delims::const_iterator it,
5709 typename Delims::const_iterator last,
5710 TextParsingOpts<Trait> &po)
5711{
5712 po.m_wasRefLink = false;
5713 po.m_firstInParagraph = false;
5714
5715 const auto end = std::find_if(std::next(it), last, [&](const auto &d) {
5716 return (d.m_type == Delimiter::Math && d.m_len == it->m_len);
5717 });
5718
5719 if (end != last && end->m_line <= po.m_lastTextLine) {
5720 typename Trait::String math;
5721
5722 if (it->m_line == end->m_line) {
5723 math = po.m_fr.m_data[it->m_line].first.asString().sliced(
5724 it->m_pos + it->m_len, end->m_pos - (it->m_pos + it->m_len));
5725 } else {
5726 math = po.m_fr.m_data[it->m_line].first.asString().sliced(it->m_pos + it->m_len);
5727
5728 for (long long int i = it->m_line + 1; i < end->m_line; ++i) {
5729 math.push_back(Trait::latin1ToChar('\n'));
5730 math.push_back(po.m_fr.m_data[i].first.asString());
5731 }
5732
5733 math.push_back(Trait::latin1ToChar('\n'));
5734 math.push_back(po.m_fr.m_data[end->m_line].first.asString().sliced(0, end->m_pos));
5735 }
5736
5737 if (!po.m_collectRefLinks) {
5738 std::shared_ptr<Math<Trait>> m(new Math<Trait>);
5739
5740 auto startLine = po.m_fr.m_data.at(it->m_line).second.m_lineNumber;
5741 auto startColumn = po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len);
5742
5743 if (it->m_pos + it->m_len >= po.m_fr.m_data.at(it->m_line).first.length()) {
5744 std::tie(startColumn, startLine) = nextPosition(po.m_fr, startColumn, startLine);
5745 }
5746
5747 auto endColumn = po.m_fr.m_data.at(end->m_line).first.virginPos(end->m_pos);
5748 auto endLine = po.m_fr.m_data.at(end->m_line).second.m_lineNumber;
5749
5750 if (endColumn == 0) {
5751 std::tie(endColumn, endLine) = prevPosition(po.m_fr, endColumn, endLine);
5752 } else {
5753 --endColumn;
5754 }
5755
5756 m->setStartColumn(startColumn);
5757 m->setStartLine(startLine);
5758 m->setEndColumn(endColumn);
5759 m->setEndLine(endLine);
5760 m->setInline(it->m_len == 1);
5761 m->setStartDelim({po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
5762 po.m_fr.m_data[it->m_line].second.m_lineNumber,
5763 po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos + it->m_len - 1),
5764 po.m_fr.m_data[it->m_line].second.m_lineNumber});
5765 m->setEndDelim({po.m_fr.m_data[end->m_line].first.virginPos(end->m_pos),
5766 po.m_fr.m_data[end->m_line].second.m_lineNumber,
5767 po.m_fr.m_data[end->m_line].first.virginPos(end->m_pos + end->m_len - 1),
5768 po.m_fr.m_data[end->m_line].second.m_lineNumber});
5769 m->setFensedCode(false);
5770
5771 initLastItemWithOpts<Trait>(po, m);
5772
5773 if (math.startsWith(Trait::latin1ToString("`")) &&
5774 math.endsWith(Trait::latin1ToString("`")) &&
5775 !math.endsWith(Trait::latin1ToString("\\`")) &&
5776 math.length() > 1) {
5777 math = math.sliced(1, math.length() - 2);
5778 }
5779
5780 m->setExpr(math);
5781
5782 po.m_parent->appendItem(m);
5783
5784 po.m_pos = end->m_pos + end->m_len;
5785 po.m_line = end->m_line;
5786 po.m_lastText = nullptr;
5787 }
5788
5789 return end;
5790 }
5791
5792 return it;
5793}
5794
5795template<class Trait>
5796inline typename Parser<Trait>::Delims::const_iterator
5797Parser<Trait>::checkForAutolinkHtml(typename Delims::const_iterator it,
5798 typename Delims::const_iterator last,
5799 TextParsingOpts<Trait> &po,
5800 bool updatePos)
5801{
5802 const auto nit = std::find_if(std::next(it), last, [](const auto &d) {
5803 return (d.m_type == Delimiter::Greater);
5804 });
5805
5806 if (nit != last) {
5807 if (nit->m_line == it->m_line) {
5808 const auto url = po.m_fr.m_data.at(it->m_line).first.asString().sliced(
5809 it->m_pos + 1, nit->m_pos - it->m_pos - 1);
5810
5811 bool isUrl = true;
5812
5813 for (long long int i = 0; i < url.size(); ++i) {
5814 if (url[i].isSpace()) {
5815 isUrl = false;
5816
5817 break;
5818 }
5819 }
5820
5821 if (isUrl) {
5822 if (!isValidUrl<Trait>(url) && !isEmail<Trait>(url)) {
5823 isUrl = false;
5824 }
5825 }
5826
5827 if (isUrl) {
5828 if (!po.m_collectRefLinks) {
5829 std::shared_ptr<Link<Trait>> lnk(new Link<Trait>);
5830 lnk->setStartColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
5831 lnk->setStartLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
5832 lnk->setEndColumn(po.m_fr.m_data.at(nit->m_line).first.virginPos(nit->m_pos + nit->m_len - 1));
5833 lnk->setEndLine(po.m_fr.m_data.at(nit->m_line).second.m_lineNumber);
5834 lnk->setUrl(url);
5835 lnk->setOpts(po.m_opts);
5836 lnk->setTextPos({po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos + 1),
5837 po.m_fr.m_data[it->m_line].second.m_lineNumber,
5838 po.m_fr.m_data[nit->m_line].first.virginPos(nit->m_pos - 1),
5839 po.m_fr.m_data[nit->m_line].second.m_lineNumber});
5840 lnk->setUrlPos(lnk->textPos());
5841 po.m_parent->appendItem(lnk);
5842 }
5843
5844 po.m_wasRefLink = false;
5845 po.m_firstInParagraph = false;
5846 po.m_lastText = nullptr;
5847
5848 if (updatePos) {
5849 po.m_pos = nit->m_pos + nit->m_len;
5850 po.m_line = nit->m_line;
5851 }
5852
5853 return nit;
5854 } else {
5855 return checkForRawHtml(it, last, po);
5856 }
5857 } else {
5858 return checkForRawHtml(it, last, po);
5859 }
5860 } else {
5861 return checkForRawHtml(it, last, po);
5862 }
5863}
5864
5865template<class Trait>
5866inline void
5867Parser<Trait>::makeInlineCode(long long int startLine,
5868 long long int startPos,
5869 long long int lastLine,
5870 long long int lastPos,
5871 TextParsingOpts<Trait> &po,
5872 typename Delims::const_iterator startDelimIt,
5873 typename Delims::const_iterator endDelimIt)
5874{
5875 typename Trait::String c;
5876
5877 for (; po.m_line <= lastLine; ++po.m_line) {
5878 c.push_back(po.m_fr.m_data.at(po.m_line).first.asString().sliced(
5879 po.m_pos, (po.m_line == lastLine ? lastPos - po.m_pos :
5880 po.m_fr.m_data.at(po.m_line).first.length() - po.m_pos)));
5881
5882 if (po.m_line < lastLine) {
5883 c.push_back(Trait::latin1ToChar(' '));
5884 }
5885
5886 po.m_pos = 0;
5887 }
5888
5889 po.m_line = lastLine;
5890
5891 if (c[0] == Trait::latin1ToChar(' ') && c[c.size() - 1] == Trait::latin1ToChar(' ') &&
5892 skipSpaces<Trait>(0, c) < c.size()) {
5893 c.remove(0, 1);
5894 c.remove(c.size() - 1, 1);
5895 ++startPos;
5896 --lastPos;
5897 }
5898
5899 if (!c.isEmpty()) {
5900 auto code = std::make_shared<Code<Trait>>(c, false, true);
5901
5902 code->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
5903 code->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
5904 code->setEndColumn(po.m_fr.m_data.at(lastLine).first.virginPos(lastPos - 1));
5905 code->setEndLine(po.m_fr.m_data.at(lastLine).second.m_lineNumber);
5906 code->setStartDelim({po.m_fr.m_data.at(startDelimIt->m_line).first.virginPos(
5907 startDelimIt->m_pos + (startDelimIt->m_backslashed ? 1 : 0)),
5908 po.m_fr.m_data.at(startDelimIt->m_line).second.m_lineNumber,
5909 po.m_fr.m_data.at(startDelimIt->m_line).first.virginPos(
5910 startDelimIt->m_pos + (startDelimIt->m_backslashed ? 1 : 0)) +
5911 startDelimIt->m_len - 1 - (startDelimIt->m_backslashed ? 1 : 0),
5912 po.m_fr.m_data.at(startDelimIt->m_line).second.m_lineNumber});
5913 code->setEndDelim(
5914 {po.m_fr.m_data.at(endDelimIt->m_line).first.virginPos(
5915 endDelimIt->m_pos + (endDelimIt->m_backslashed ? 1 : 0)),
5916 po.m_fr.m_data.at(endDelimIt->m_line).second.m_lineNumber,
5917 po.m_fr.m_data.at(endDelimIt->m_line).first.virginPos(
5918 endDelimIt->m_pos + (endDelimIt->m_backslashed ? 1 : 0) +
5919 endDelimIt->m_len - 1 - (endDelimIt->m_backslashed ? 1 : 0)),
5920 po.m_fr.m_data.at(endDelimIt->m_line).second.m_lineNumber});
5921 code->setOpts(po.m_opts);
5922
5923 initLastItemWithOpts<Trait>(po, code);
5924
5925 po.m_parent->appendItem(code);
5926 }
5927
5928 po.m_wasRefLink = false;
5929 po.m_firstInParagraph = false;
5930 po.m_lastText = nullptr;
5931}
5932
5933template<class Trait>
5934inline typename Parser<Trait>::Delims::const_iterator
5935Parser<Trait>::checkForInlineCode(typename Delims::const_iterator it,
5936 typename Delims::const_iterator last,
5937 TextParsingOpts<Trait> &po)
5938{
5939 const auto len = it->m_len;
5940 const auto start = it;
5941
5942 po.m_wasRefLink = false;
5943 po.m_firstInParagraph = false;
5944
5945 ++it;
5946
5947 for (; it != last; ++it) {
5948 if (it->m_line <= po.m_lastTextLine) {
5949 const auto p = skipSpaces<Trait>(0, po.m_fr.m_data.at(it->m_line).first.asString());
5950 const auto withoutSpaces = po.m_fr.m_data.at(it->m_line).first.asString().sliced(p);
5951
5952 if ((it->m_type == Delimiter::HorizontalLine && withoutSpaces[0] == Trait::latin1ToChar('-')) ||
5953 it->m_type == Delimiter::H1 || it->m_type == Delimiter::H2) {
5954 break;
5955 } else if (it->m_type == Delimiter::InlineCode && (it->m_len - (it->m_backslashed ? 1 : 0)) == len) {
5956 if (!po.m_collectRefLinks) {
5957 makeText(start->m_line, start->m_pos, po);
5958
5959 po.m_pos = start->m_pos + start->m_len;
5960
5961 makeInlineCode(start->m_line, start->m_pos + start->m_len, it->m_line,
5962 it->m_pos + (it->m_backslashed ? 1 : 0), po, start, it);
5963
5964 po.m_line = it->m_line;
5965 po.m_pos = it->m_pos + it->m_len;
5966 }
5967
5968 return it;
5969 }
5970 } else {
5971 break;
5972 }
5973 }
5974
5975 if (!po.m_collectRefLinks) {
5976 makeText(start->m_line, start->m_pos + start->m_len, po);
5977 }
5978
5979 return start;
5980}
5981
5982template<class Trait>
5983inline std::pair<typename MdBlock<Trait>::Data, typename Parser<Trait>::Delims::const_iterator>
5984Parser<Trait>::readTextBetweenSquareBrackets(typename Delims::const_iterator start,
5985 typename Delims::const_iterator it,
5986 typename Delims::const_iterator last,
5987 TextParsingOpts<Trait> &po,
5988 bool doNotCreateTextOnFail,
5989 WithPosition *pos)
5990{
5991 if (it != last && it->m_line <= po.m_lastTextLine) {
5992 if (start->m_line == it->m_line) {
5993 const auto p = start->m_pos + start->m_len;
5994 const auto n = it->m_pos - p;
5995
5996 if (pos) {
5997 long long int startPos, startLine, endPos, endLine;
5998 std::tie(startPos, startLine) = nextPosition(po.m_fr,
5999 po.m_fr.m_data[start->m_line].first.virginPos(
6000 start->m_pos + start->m_len - 1),
6001 po.m_fr.m_data[start->m_line].second.m_lineNumber);
6002 std::tie(endPos, endLine) =
6003 prevPosition(po.m_fr, po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
6004 po.m_fr.m_data[it->m_line].second.m_lineNumber);
6005
6006 *pos = {startPos, startLine, endPos, endLine};
6007 }
6008
6009 return {{{po.m_fr.m_data.at(start->m_line).first.sliced(p, n),
6010 {po.m_fr.m_data.at(start->m_line).second.m_lineNumber}}}, it};
6011 } else {
6012 if (it->m_line - start->m_line < 3) {
6013 typename MdBlock<Trait>::Data res;
6014 res.push_back({po.m_fr.m_data.at(start->m_line).first.sliced(
6015 start->m_pos + start->m_len), po.m_fr.m_data.at(start->m_line).second});
6016
6017 long long int i = start->m_line + 1;
6018
6019 for (; i <= it->m_line; ++i) {
6020 if (i == it->m_line) {
6021 res.push_back({po.m_fr.m_data.at(i).first.sliced(0, it->m_pos),
6022 po.m_fr.m_data.at(i).second});
6023 } else {
6024 res.push_back({po.m_fr.m_data.at(i).first, po.m_fr.m_data.at(i).second});
6025 }
6026 }
6027
6028 if (pos) {
6029 long long int startPos, startLine, endPos, endLine;
6030 std::tie(startPos, startLine) = nextPosition(po.m_fr,
6031 po.m_fr.m_data[start->m_line].first.virginPos(
6032 start->m_pos + start->m_len - 1),
6033 po.m_fr.m_data[start->m_line].second.m_lineNumber);
6034 std::tie(endPos, endLine) =
6035 prevPosition(po.m_fr, po.m_fr.m_data[it->m_line].first.virginPos(it->m_pos),
6036 po.m_fr.m_data[it->m_line].second.m_lineNumber);
6037
6038 *pos = {startPos, startLine, endPos, endLine};
6039 }
6040
6041 return {res, it};
6042 } else {
6043 if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6044 makeText(start->m_line, start->m_pos + start->m_len, po);
6045 }
6046
6047 return {{}, start};
6048 }
6049 }
6050 } else {
6051 if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6052 makeText(start->m_line, start->m_pos + start->m_len, po);
6053 }
6054
6055 return {{}, start};
6056 }
6057}
6058
6059template<class Trait>
6060inline std::pair<typename MdBlock<Trait>::Data, typename Parser<Trait>::Delims::const_iterator>
6061Parser<Trait>::checkForLinkText(typename Delims::const_iterator it,
6062 typename Delims::const_iterator last,
6063 TextParsingOpts<Trait> &po,
6064 WithPosition *pos)
6065{
6066 const auto start = it;
6067
6068 long long int brackets = 0;
6069
6070 const bool collectRefLinks = po.m_collectRefLinks;
6071 po.m_collectRefLinks = true;
6072 long long int l = po.m_line, p = po.m_pos;
6073
6074 for (it = std::next(it); it != last; ++it) {
6075 bool quit = false;
6076
6077 switch (it->m_type) {
6078 case Delimiter::SquareBracketsClose: {
6079 if (!brackets)
6080 quit = true;
6081 else
6082 --brackets;
6083 } break;
6084
6085 case Delimiter::SquareBracketsOpen:
6086 case Delimiter::ImageOpen:
6087 ++brackets;
6088 break;
6089
6090 case Delimiter::InlineCode:
6091 it = checkForInlineCode(it, last, po);
6092 break;
6093
6094 case Delimiter::Less:
6095 it = checkForAutolinkHtml(it, last, po, false);
6096 break;
6097
6098 default:
6099 break;
6100 }
6101
6102 if (quit) {
6103 break;
6104 }
6105 }
6106
6107 const auto r = readTextBetweenSquareBrackets(start, it, last, po, false, pos);
6108
6109 po.m_collectRefLinks = collectRefLinks;
6110 resetHtmlTag(po.m_html);
6111 po.m_line = l;
6112 po.m_pos = p;
6113
6114 return r;
6115}
6116
6117template<class Trait>
6118inline std::pair<typename MdBlock<Trait>::Data, typename Parser<Trait>::Delims::const_iterator>
6119Parser<Trait>::checkForLinkLabel(typename Delims::const_iterator it,
6120 typename Delims::const_iterator last,
6121 TextParsingOpts<Trait> &po,
6122 WithPosition *pos)
6123{
6124 const auto start = it;
6125
6126 for (it = std::next(it); it != last; ++it) {
6127 bool quit = false;
6128
6129 switch (it->m_type) {
6130 case Delimiter::SquareBracketsClose: {
6131 quit = true;
6132 } break;
6133
6134 case Delimiter::SquareBracketsOpen:
6135 case Delimiter::ImageOpen: {
6136 it = last;
6137 quit = true;
6138 } break;
6139
6140 default:
6141 break;
6142 }
6143
6144 if (quit)
6145 break;
6146 }
6147
6148 return readTextBetweenSquareBrackets(start, it, last, po, true, pos);
6149}
6150
6151template<class Trait>
6152inline typename Trait::String
6153Parser<Trait>::toSingleLine(const typename MdBlock<Trait>::Data &d)
6154{
6155 typename Trait::String res;
6156 bool first = true;
6157
6158 for (const auto &s : d) {
6159 if (!first) {
6160 res.push_back(Trait::latin1ToChar(' '));
6161 }
6162 res.push_back(s.first.asString().simplified());
6163 first = false;
6164 }
6165
6166 return res;
6167}
6168
6169template<class Trait>
6170inline std::shared_ptr<Link<Trait>>
6171Parser<Trait>::makeLink(const typename Trait::String &url,
6172 const typename MdBlock<Trait>::Data &text,
6173 TextParsingOpts<Trait> &po,
6174 bool doNotCreateTextOnFail,
6175 long long int startLine,
6176 long long int startPos,
6177 long long int lastLine,
6178 long long int lastPos,
6179 const WithPosition &textPos,
6180 const WithPosition &urlPos)
6181{
6182 MD_UNUSED(doNotCreateTextOnFail)
6183
6184 typename Trait::String u = (url.startsWith(Trait::latin1ToString("#")) ?
6185 url : removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(url)));
6186
6187 if (!u.isEmpty()) {
6188 if (!u.startsWith(Trait::latin1ToString("#"))) {
6189 const auto checkForFile = [&](typename Trait::String &url,
6190 const typename Trait::String &ref = {}) -> bool {
6191 if (Trait::fileExists(url)) {
6192 url = Trait::absoluteFilePath(url);
6193
6194 if (!po.m_collectRefLinks) {
6195 po.m_linksToParse.push_back(url);
6196 }
6197
6198 if (!ref.isEmpty()) {
6199 url = ref + Trait::latin1ToString("/") + url;
6200 }
6201
6202 return true;
6203 } else if (Trait::fileExists(url, po.m_workingPath)) {
6204 url = Trait::absoluteFilePath(po.m_workingPath + Trait::latin1ToString("/") + url);
6205
6206 if (!po.m_collectRefLinks) {
6207 po.m_linksToParse.push_back(url);
6208 }
6209
6210 if (!ref.isEmpty()) {
6211 url = ref + Trait::latin1ToString("/") + url;
6212 }
6213
6214 return true;
6215 } else {
6216 return false;
6217 }
6218 };
6219
6220 if (!checkForFile(u) && u.contains(Trait::latin1ToChar('#'))) {
6221 const auto i = u.indexOf(Trait::latin1ToChar('#'));
6222 const auto ref = u.sliced(i);
6223 u = u.sliced(0, i);
6224
6225 if (!checkForFile(u, ref)) {
6226 u = u + ref;
6227 }
6228 }
6229 } else
6230 u = u + (po.m_workingPath.isEmpty() ? typename Trait::String() :
6231 Trait::latin1ToString("/") + po.m_workingPath) + Trait::latin1ToString("/") +
6232 po.m_fileName;
6233 }
6234
6235 std::shared_ptr<Link<Trait>> link(new Link<Trait>);
6236 link->setUrl(u);
6237 link->setOpts(po.m_opts);
6238 link->setTextPos(textPos);
6239 link->setUrlPos(urlPos);
6240
6241 MdBlock<Trait> block = {text, 0};
6242
6243 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
6244
6245 RawHtmlBlock<Trait> html;
6246
6247 parseFormattedTextLinksImages(block,
6248 std::static_pointer_cast<Block<Trait>>(p),
6249 po.m_doc,
6250 po.m_linksToParse,
6251 po.m_workingPath,
6252 po.m_fileName,
6253 po.m_collectRefLinks,
6254 true,
6255 html,
6256 true);
6257
6258 if (!p->isEmpty()) {
6259 std::shared_ptr<Image<Trait>> img;
6260
6261 if (p->items().size() == 1 && p->items().at(0)->type() == ItemType::Paragraph) {
6262 const auto ip = std::static_pointer_cast<Paragraph<Trait>>(p->items().at(0));
6263
6264 for (auto it = ip->items().cbegin(), last = ip->items().cend(); it != last; ++it) {
6265 switch ((*it)->type()) {
6266 case ItemType::Link:
6267 return {};
6268
6269 case ItemType::Image: {
6270 img = std::static_pointer_cast<Image<Trait>>(*it);
6271 } break;
6272
6273 default:
6274 break;
6275 }
6276 }
6277
6278 if (img.get()) {
6279 link->setImg(img);
6280 }
6281
6282 link->setP(ip);
6283 }
6284 }
6285
6286 if (html.m_html.get()) {
6287 link->p()->appendItem(html.m_html);
6288 }
6289
6290 link->setText(toSingleLine(text));
6291 link->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
6292 link->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
6293 link->setEndColumn(po.m_fr.m_data.at(lastLine).first.virginPos(lastPos - 1));
6294 link->setEndLine(po.m_fr.m_data.at(lastLine).second.m_lineNumber);
6295
6296 initLastItemWithOpts<Trait>(po, link);
6297
6298 po.m_lastText = nullptr;
6299
6300 return link;
6301}
6302
6303template<class Trait>
6304inline bool
6305Parser<Trait>::createShortcutLink(const typename MdBlock<Trait>::Data &text,
6306 TextParsingOpts<Trait> &po,
6307 long long int startLine,
6308 long long int startPos,
6309 long long int lastLineForText,
6310 long long int lastPosForText,
6311 typename Delims::const_iterator lastIt,
6312 const typename MdBlock<Trait>::Data &linkText,
6313 bool doNotCreateTextOnFail,
6314 const WithPosition &textPos,
6315 const WithPosition &linkTextPos)
6316{
6317 const auto u = Trait::latin1ToString("#") + toSingleLine(text).toCaseFolded().toUpper();
6318 const auto url = u + Trait::latin1ToString("/") + (po.m_workingPath.isEmpty() ?
6319 typename Trait::String() : po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName;
6320
6321 po.m_wasRefLink = false;
6322 po.m_firstInParagraph = false;
6323
6324 if (po.m_doc->labeledLinks().find(url) != po.m_doc->labeledLinks().cend()) {
6325 if (!po.m_collectRefLinks) {
6326 const auto isLinkTextEmpty = toSingleLine(linkText).isEmpty();
6327
6328 const auto link = makeLink(u,
6329 removeBackslashes<Trait>(isLinkTextEmpty ? text : linkText),
6330 po,
6331 doNotCreateTextOnFail,
6332 startLine,
6333 startPos,
6334 lastIt->m_line,
6335 lastIt->m_pos + lastIt->m_len,
6336 (isLinkTextEmpty ? textPos : linkTextPos),
6337 textPos);
6338
6339 if (link.get()) {
6340 po.m_linksToParse.push_back(url);
6341 po.m_parent->appendItem(link);
6342
6343 po.m_line = lastIt->m_line;
6344 po.m_pos = lastIt->m_pos + lastIt->m_len;
6345 } else {
6346 if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6347 makeText(lastLineForText, lastPosForText, po);
6348 }
6349
6350 return false;
6351 }
6352 }
6353
6354 return true;
6355 } else if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6356 makeText(lastLineForText, lastPosForText, po);
6357 }
6358
6359 return false;
6360}
6361
6362template<class Trait>
6363inline std::shared_ptr<Image<Trait>>
6364Parser<Trait>::makeImage(const typename Trait::String &url,
6365 const typename MdBlock<Trait>::Data &text,
6366 TextParsingOpts<Trait> &po,
6367 bool doNotCreateTextOnFail,
6368 long long int startLine,
6369 long long int startPos,
6370 long long int lastLine,
6371 long long int lastPos,
6372 const WithPosition &textPos,
6373 const WithPosition &urlPos)
6374{
6375 MD_UNUSED(doNotCreateTextOnFail)
6376
6377 std::shared_ptr<Image<Trait>> img(new Image<Trait>);
6378
6379 typename Trait::String u = (url.startsWith(Trait::latin1ToString("#")) ? url :
6380 removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(url)));
6381
6382 if (Trait::fileExists(u)) {
6383 img->setUrl(u);
6384 } else if (Trait::fileExists(u, po.m_workingPath)) {
6385 img->setUrl(po.m_workingPath + Trait::latin1ToString("/") + u);
6386 } else {
6387 img->setUrl(u);
6388 }
6389
6390 MdBlock<Trait> block = {text, 0};
6391
6392 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
6393
6394 RawHtmlBlock<Trait> html;
6395
6396 parseFormattedTextLinksImages(block,
6397 std::static_pointer_cast<Block<Trait>>(p),
6398 po.m_doc,
6399 po.m_linksToParse,
6400 po.m_workingPath,
6401 po.m_fileName,
6402 po.m_collectRefLinks,
6403 true,
6404 html,
6405 true);
6406
6407 if (!p->isEmpty()) {
6408 if (p->items().size() == 1 && p->items().at(0)->type() == ItemType::Paragraph) {
6409 img->setP(std::static_pointer_cast<Paragraph<Trait>>(p->items().at(0)));
6410 }
6411 }
6412
6413 img->setText(toSingleLine(removeBackslashes<Trait>(text)));
6414 img->setStartColumn(po.m_fr.m_data.at(startLine).first.virginPos(startPos));
6415 img->setStartLine(po.m_fr.m_data.at(startLine).second.m_lineNumber);
6416 img->setEndColumn(po.m_fr.m_data.at(lastLine).first.virginPos(lastPos - 1));
6417 img->setEndLine(po.m_fr.m_data.at(lastLine).second.m_lineNumber);
6418 img->setTextPos(textPos);
6419 img->setUrlPos(urlPos);
6420
6421 initLastItemWithOpts<Trait>(po, img);
6422
6423 po.m_lastText = nullptr;
6424
6425 return img;
6426}
6427
6428template<class Trait>
6429inline bool
6430Parser<Trait>::createShortcutImage(const typename MdBlock<Trait>::Data &text,
6431 TextParsingOpts<Trait> &po,
6432 long long int startLine,
6433 long long int startPos,
6434 long long int lastLineForText,
6435 long long int lastPosForText,
6436 typename Delims::const_iterator lastIt,
6437 const typename MdBlock<Trait>::Data &linkText,
6438 bool doNotCreateTextOnFail,
6439 const WithPosition &textPos,
6440 const WithPosition &linkTextPos)
6441{
6442 const auto url = Trait::latin1ToString("#") + toSingleLine(text).toCaseFolded().toUpper() +
6443 Trait::latin1ToString("/") + (po.m_workingPath.isEmpty() ? typename Trait::String() :
6444 po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName;
6445
6446 po.m_wasRefLink = false;
6447 po.m_firstInParagraph = false;
6448
6449 const auto iit = po.m_doc->labeledLinks().find(url);
6450
6451 if (iit != po.m_doc->labeledLinks().cend()) {
6452 if (!po.m_collectRefLinks) {
6453 const auto isLinkTextEmpty = toSingleLine(linkText).isEmpty();
6454
6455 const auto img = makeImage(iit->second->url(),
6456 (isLinkTextEmpty ? text : linkText),
6457 po,
6458 doNotCreateTextOnFail,
6459 startLine,
6460 startPos,
6461 lastIt->m_line,
6462 lastIt->m_pos + lastIt->m_len,
6463 (isLinkTextEmpty ? textPos : linkTextPos),
6464 textPos);
6465
6466 po.m_parent->appendItem(img);
6467
6468 po.m_line = lastIt->m_line;
6469 po.m_pos = lastIt->m_pos + lastIt->m_len;
6470 }
6471
6472 return true;
6473 } else if (!po.m_collectRefLinks && !doNotCreateTextOnFail) {
6474 makeText(lastLineForText, lastPosForText, po);
6475 }
6476
6477 return false;
6478}
6479
6480//! Skip space in the block up to 1 new line.
6481template<class Trait>
6482inline void
6483skipSpacesUpTo1Line(long long int &line,
6484 long long int &pos,
6485 const typename MdBlock<Trait>::Data &fr)
6486{
6487 pos = skipSpaces<Trait>(pos, fr.at(line).first.asString());
6488
6489 if (pos == fr.at(line).first.length() && line + 1 < (long long int)fr.size()) {
6490 ++line;
6491 pos = skipSpaces<Trait>(0, fr.at(line).first.asString());
6492 }
6493}
6494
6495//! Read link's destination.
6496template<class Trait>
6497inline std::tuple<long long int, long long int, bool, typename Trait::String, long long int>
6498readLinkDestination(long long int line,
6499 long long int pos,
6500 const TextParsingOpts<Trait> &po,
6501 WithPosition *urlPos = nullptr)
6502{
6503 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6504
6505 const auto destLine = line;
6506 const auto &s = po.m_fr.m_data.at(line).first.asString();
6507 bool backslash = false;
6508
6509 if (pos < s.length() && line <= po.m_lastTextLine) {
6510 if (s[pos] == Trait::latin1ToChar('<')) {
6511 ++pos;
6512
6513 if (urlPos) {
6514 urlPos->setStartColumn(po.m_fr.m_data[line].first.virginPos(pos));
6515 urlPos->setStartLine(po.m_fr.m_data[line].second.m_lineNumber);
6516 }
6517
6518 const auto start = pos;
6519
6520 while (pos < s.size()) {
6521 bool now = false;
6522
6523 if (s[pos] == Trait::latin1ToChar('\\') && !backslash) {
6524 backslash = true;
6525 now = true;
6526 } else if (!backslash && s[pos] == Trait::latin1ToChar('<')) {
6527 return {line, pos, false, {}, destLine};
6528 } else if (!backslash && s[pos] == Trait::latin1ToChar('>')) {
6529 break;
6530 }
6531
6532 if (!now) {
6533 backslash = false;
6534 }
6535
6536 ++pos;
6537 }
6538
6539 if (pos < s.size() && s[pos] == Trait::latin1ToChar('>')) {
6540 if (urlPos) {
6541 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6542 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6543 }
6544
6545 ++pos;
6546
6547 return {line, pos, true, s.sliced(start, pos - start - 1), destLine};
6548 } else {
6549 return {line, pos, false, {}, destLine};
6550 }
6551 } else {
6552 long long int pc = 0;
6553
6554 const auto start = pos;
6555
6556 if (urlPos) {
6557 urlPos->setStartColumn(po.m_fr.m_data[line].first.virginPos(pos));
6558 urlPos->setStartLine(po.m_fr.m_data[line].second.m_lineNumber);
6559 }
6560
6561 while (pos < s.size()) {
6562 bool now = false;
6563
6564 if (s[pos] == Trait::latin1ToChar('\\') && !backslash) {
6565 backslash = true;
6566 now = true;
6567 } else if (!backslash && s[pos] == Trait::latin1ToChar(' ')) {
6568 if (!pc) {
6569 if (urlPos) {
6570 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6571 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6572 }
6573
6574 return {line, pos, true, s.sliced(start, pos - start), destLine};
6575 } else {
6576 return {line, pos, false, {}, destLine};
6577 }
6578 } else if (!backslash && s[pos] == Trait::latin1ToChar('(')) {
6579 ++pc;
6580 } else if (!backslash && s[pos] == Trait::latin1ToChar(')')) {
6581 if (!pc) {
6582 if (urlPos) {
6583 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6584 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6585 }
6586
6587 return {line, pos, true, s.sliced(start, pos - start), destLine};
6588 } else {
6589 --pc;
6590 }
6591 }
6592
6593 if (!now) {
6594 backslash = false;
6595 }
6596
6597 ++pos;
6598 }
6599
6600 if (urlPos) {
6601 urlPos->setEndColumn(po.m_fr.m_data[line].first.virginPos(pos - 1));
6602 urlPos->setEndLine(po.m_fr.m_data[line].second.m_lineNumber);
6603 }
6604
6605 return {line, pos, true, s.sliced(start, pos - start), destLine};
6606 }
6607 } else {
6608 return {line, pos, false, {}, destLine};
6609 }
6610}
6611
6612//! Read link's title.
6613template<class Trait>
6614inline std::tuple<long long int, long long int, bool, typename Trait::String, long long int>
6615readLinkTitle(long long int line,
6616 long long int pos,
6617 const TextParsingOpts<Trait> &po)
6618{
6619 const auto space = (pos < po.m_fr.m_data.at(line).first.length() ?
6620 po.m_fr.m_data.at(line).first[pos].isSpace() : true);
6621
6622 const auto firstLine = line;
6623
6624 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6625
6626 if (pos >= po.m_fr.m_data.at(line).first.length()) {
6627 return {line, pos, true, {}, firstLine};
6628 }
6629
6630 const auto sc = po.m_fr.m_data.at(line).first[pos];
6631
6632 if (sc != Trait::latin1ToChar('"') && sc != Trait::latin1ToChar('\'') &&
6633 sc != Trait::latin1ToChar('(') && sc != Trait::latin1ToChar(')')) {
6634 return {line, pos, (firstLine != line && line <= po.m_lastTextLine), {}, firstLine};
6635 } else if (!space && sc != Trait::latin1ToChar(')')) {
6636 return {line, pos, false, {}, firstLine};
6637 }
6638
6639 if (sc == Trait::latin1ToChar(')')) {
6640 return {line, pos, line <= po.m_lastTextLine, {}, firstLine};
6641 }
6642
6643 const auto startLine = line;
6644
6645 bool backslash = false;
6646
6647 ++pos;
6648
6649 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6650
6651 typename Trait::String title;
6652
6653 while (line < (long long int)po.m_fr.m_data.size() && pos < po.m_fr.m_data.at(line).first.length()) {
6654 bool now = false;
6655
6656 if (po.m_fr.m_data.at(line).first[pos] == Trait::latin1ToChar('\\') && !backslash) {
6657 backslash = true;
6658 now = true;
6659 } else if (sc == Trait::latin1ToChar('(') &&
6660 po.m_fr.m_data.at(line).first[pos] == Trait::latin1ToChar(')') && !backslash) {
6661 ++pos;
6662 return {line, pos, line <= po.m_lastTextLine, title, startLine};
6663 } else if (sc == Trait::latin1ToChar('(') &&
6664 po.m_fr.m_data.at(line).first[pos] == Trait::latin1ToChar('(') && !backslash) {
6665 return {line, pos, false, {}, startLine};
6666 } else if (sc != Trait::latin1ToChar('(') && po.m_fr.m_data.at(line).first[pos] == sc && !backslash) {
6667 ++pos;
6668 return {line, pos, line <= po.m_lastTextLine, title, startLine};
6669 } else {
6670 title.push_back(po.m_fr.m_data.at(line).first[pos]);
6671 }
6672
6673 if (!now) {
6674 backslash = false;
6675 }
6676
6677 ++pos;
6678
6679 if (pos == po.m_fr.m_data.at(line).first.length()) {
6680 skipSpacesUpTo1Line<Trait>(line, pos, po.m_fr.m_data);
6681 }
6682 }
6683
6684 return {line, pos, false, {}, startLine};
6685}
6686
6687template<class Trait>
6688inline std::tuple<typename Trait::String, typename Trait::String, typename Parser<Trait>::Delims::const_iterator, bool>
6689Parser<Trait>::checkForInlineLink(typename Delims::const_iterator it,
6690 typename Delims::const_iterator last,
6691 TextParsingOpts<Trait> &po,
6692 WithPosition *urlPos)
6693{
6694 long long int p = it->m_pos + it->m_len;
6695 long long int l = it->m_line;
6696 bool ok = false;
6697 typename Trait::String dest, title;
6698 long long int destStartLine = 0;
6699
6700 std::tie(l, p, ok, dest, destStartLine) = readLinkDestination<Trait>(l, p, po, urlPos);
6701
6702 if (!ok) {
6703 return {{}, {}, it, false};
6704 }
6705
6706 long long int s = 0;
6707
6708 std::tie(l, p, ok, title, s) = readLinkTitle<Trait>(l, p, po);
6709
6710 skipSpacesUpTo1Line<Trait>(l, p, po.m_fr.m_data);
6711
6712 if (!ok || (l >= (long long int)po.m_fr.m_data.size() || p >= po.m_fr.m_data.at(l).first.length() ||
6713 po.m_fr.m_data.at(l).first[p] != Trait::latin1ToChar(')'))) {
6714 return {{}, {}, it, false};
6715 }
6716
6717 for (; it != last; ++it) {
6718 if (it->m_line == l && it->m_pos == p) {
6719 return {dest, title, it, true};
6720 }
6721 }
6722
6723 return {{}, {}, it, false};
6724}
6725
6726template<class Trait>
6727inline std::tuple<typename Trait::String, typename Trait::String, typename Parser<Trait>::Delims::const_iterator, bool>
6728Parser<Trait>::checkForRefLink(typename Delims::const_iterator it,
6729 typename Delims::const_iterator last,
6730 TextParsingOpts<Trait> &po,
6731 WithPosition *urlPos)
6732{
6733 long long int p = it->m_pos + it->m_len + 1;
6734 long long int l = it->m_line;
6735 bool ok = false;
6736 typename Trait::String dest, title;
6737 long long int destStartLine = 0;
6738
6739 std::tie(l, p, ok, dest, destStartLine) = readLinkDestination<Trait>(l, p, po, urlPos);
6740
6741 if (!ok) {
6742 return {{}, {}, it, false};
6743 }
6744
6745 long long int titleStartLine = 0;
6746
6747 std::tie(l, p, ok, title, titleStartLine) = readLinkTitle<Trait>(l, p, po);
6748
6749 if (!ok) {
6750 return {{}, {}, it, false};
6751 }
6752
6753 if (!title.isEmpty()) {
6754 p = skipSpaces<Trait>(p, po.m_fr.m_data.at(l).first.asString());
6755
6756 if (titleStartLine == destStartLine && p < po.m_fr.m_data.at(l).first.length()) {
6757 return {{}, {}, it, false};
6758 } else if (titleStartLine != destStartLine && p < po.m_fr.m_data.at(l).first.length()) {
6759 l = destStartLine;
6760 p = po.m_fr.m_data.at(l).first.length();
6761 title.clear();
6762 }
6763 }
6764
6765 for (; it != last; ++it) {
6766 if (it->m_line > l || (it->m_line == l && it->m_pos >= p)) {
6767 break;
6768 }
6769 }
6770
6771 po.m_line = l;
6772 po.m_pos = p;
6773
6774 return {dest, title, std::prev(it), true};
6775}
6776
6777template<class Trait>
6778inline typename Parser<Trait>::Delims::const_iterator
6779Parser<Trait>::checkForImage(typename Delims::const_iterator it,
6780 typename Delims::const_iterator last,
6781 TextParsingOpts<Trait> &po)
6782{
6783 const auto start = it;
6784
6785 typename MdBlock<Trait>::Data text;
6786
6787 po.m_wasRefLink = false;
6788 po.m_firstInParagraph = false;
6789
6790 WithPosition textPos;
6791 std::tie(text, it) = checkForLinkText(it, last, po, &textPos);
6792
6793 if (it != start) {
6794 if (it->m_pos + it->m_len < po.m_fr.m_data.at(it->m_line).first.length()) {
6795 // Inline -> (
6796 if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('(')) {
6797 typename Trait::String url, title;
6798 typename Delims::const_iterator iit;
6799 bool ok;
6800
6801 WithPosition urlPos;
6802 std::tie(url, title, iit, ok) = checkForInlineLink(std::next(it), last, po, &urlPos);
6803
6804 if (ok) {
6805 if (!po.m_collectRefLinks) {
6806 po.m_parent->appendItem(
6807 makeImage(url, text, po, false, start->m_line, start->m_pos,
6808 iit->m_line, iit->m_pos + iit->m_len, textPos, urlPos));
6809 }
6810
6811 po.m_line = iit->m_line;
6812 po.m_pos = iit->m_pos + iit->m_len;
6813
6814 return iit;
6815 } else if (createShortcutImage(text, po, start->m_line, start->m_pos, start->m_line,
6816 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
6817 return it;
6818 }
6819 }
6820 // Reference -> [
6821 else if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('[')) {
6822 typename MdBlock<Trait>::Data label;
6823 typename Delims::const_iterator lit;
6824
6825 WithPosition labelPos;
6826 std::tie(label, lit) = checkForLinkLabel(std::next(it), last, po, &labelPos);
6827
6828 if (lit != std::next(it)) {
6829 const auto isLabelEmpty = toSingleLine(label).isEmpty();
6830
6831 if (!isLabelEmpty
6832 && createShortcutImage(label,
6833 po,
6834 start->m_line,
6835 start->m_pos,
6836 start->m_line,
6837 start->m_pos + start->m_len,
6838 lit,
6839 text,
6840 true,
6841 labelPos,
6842 textPos)) {
6843 return lit;
6844 } else if (isLabelEmpty
6845 && createShortcutImage(text,
6846 po,
6847 start->m_line,
6848 start->m_pos,
6849 start->m_line,
6850 start->m_pos + start->m_len,
6851 lit,
6852 {},
6853 false,
6854 textPos,
6855 {})) {
6856 return lit;
6857 }
6858 } else if (createShortcutImage(text, po, start->m_line, start->m_pos, start->m_line,
6859 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
6860 return it;
6861 }
6862 } else {
6863 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutImage);
6864 }
6865 } else {
6866 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutImage);
6867 }
6868 }
6869
6870 return start;
6871}
6872
6873template<class Trait>
6874inline typename Parser<Trait>::Delims::const_iterator
6875Parser<Trait>::checkForLink(typename Delims::const_iterator it,
6876 typename Delims::const_iterator last,
6877 TextParsingOpts<Trait> &po)
6878{
6879 const auto start = it;
6880
6881 typename MdBlock<Trait>::Data text;
6882
6883 const auto wasRefLink = po.m_wasRefLink;
6884 const auto firstInParagraph = po.m_firstInParagraph;
6885 po.m_wasRefLink = false;
6886 po.m_firstInParagraph = false;
6887
6888 const auto ns = skipSpaces<Trait>(0, po.m_fr.m_data.at(po.m_line).first.asString());
6889
6890 WithPosition textPos;
6891 std::tie(text, it) = checkForLinkText(it, last, po, &textPos);
6892
6893 if (it != start) {
6894 // Footnote reference.
6895 if (text.front().first.asString().startsWith(Trait::latin1ToString("^")) &&
6896 text.front().first.asString().length() > 1 && text.size() == 1 &&
6897 start->m_line == it->m_line) {
6898 if (!po.m_collectRefLinks) {
6899 std::shared_ptr<FootnoteRef<Trait>> fnr(new FootnoteRef<Trait>(
6900 Trait::latin1ToString("#") + toSingleLine(text).toCaseFolded().toUpper() +
6901 Trait::latin1ToString("/") + (po.m_workingPath.isEmpty() ? typename Trait::String() :
6902 po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName));
6903 fnr->setStartColumn(po.m_fr.m_data.at(start->m_line).first.virginPos(start->m_pos));
6904 fnr->setStartLine(po.m_fr.m_data.at(start->m_line).second.m_lineNumber);
6905 fnr->setEndColumn(po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
6906 fnr->setEndLine(po.m_fr.m_data.at(it->m_line).second.m_lineNumber);
6907 fnr->setIdPos(textPos);
6908
6909 typename Trait::String fnrText = Trait::latin1ToString("[");
6910 bool firstFnrText = true;
6911
6912 for (const auto &t : text) {
6913 if (!firstFnrText) {
6914 fnrText.push_back(Trait::latin1ToString("\n"));
6915 }
6916
6917 firstFnrText = false;
6918
6919 fnrText.push_back(t.first.asString());
6920 }
6921
6922 fnrText.push_back(Trait::latin1ToString("]"));
6923 fnr->setText(fnrText);
6924 po.m_parent->appendItem(fnr);
6925
6926 initLastItemWithOpts<Trait>(po, fnr);
6927 }
6928
6929 po.m_line = it->m_line;
6930 po.m_pos = it->m_pos + it->m_len;
6931
6932 return it;
6933 } else if (it->m_pos + it->m_len < po.m_fr.m_data.at(it->m_line).first.length()) {
6934 // Reference definition -> :
6935 if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar(':')) {
6936 // Reference definitions allowed only at start of paragraph.
6937 if ((po.m_line == 0 || wasRefLink || firstInParagraph) && ns < 4 && start->m_pos == ns) {
6938 typename Trait::String url, title;
6939 typename Delims::const_iterator iit;
6940 bool ok;
6941
6942 WithPosition labelPos;
6943
6944 std::tie(text, it) = checkForLinkLabel(start, last, po, &labelPos);
6945
6946 if (it != start && !toSingleLine(text).simplified().isEmpty()) {
6947 WithPosition urlPos;
6948 std::tie(url, title, iit, ok) = checkForRefLink(it, last, po, &urlPos);
6949
6950 if (ok) {
6951 const auto label = Trait::latin1ToString("#") +
6952 toSingleLine(text).toCaseFolded().toUpper() +
6953 Trait::latin1ToString("/") +
6954 (po.m_workingPath.isEmpty() ? typename Trait::String() :
6955 po.m_workingPath + Trait::latin1ToString("/")) + po.m_fileName;
6956
6957 std::shared_ptr<Link<Trait>> link(new Link<Trait>);
6958 link->setStartColumn(po.m_fr.m_data.at(start->m_line).first.virginPos(
6959 start->m_pos));
6960 link->setStartLine(po.m_fr.m_data.at(start->m_line).second.m_lineNumber);
6961
6962 const auto endPos = prevPosition(po.m_fr,
6963 po.m_fr.m_data.at(po.m_line).first.virginPos(po.m_pos),
6964 po.m_fr.m_data.at(po.m_line).second.m_lineNumber);
6965
6966 link->setEndColumn(endPos.first);
6967 link->setEndLine(endPos.second);
6968
6969 link->setTextPos(labelPos);
6970 link->setUrlPos(urlPos);
6971
6972 url = removeBackslashes<typename Trait::String, Trait>(
6973 replaceEntity<Trait>(url));
6974
6975 if (!url.isEmpty()) {
6976 if (Trait::fileExists(url)) {
6977 url = Trait::absoluteFilePath(url);
6978 } else if (Trait::fileExists(url, po.m_workingPath)) {
6979 url = Trait::absoluteFilePath(
6980 (po.m_workingPath.isEmpty() ? typename Trait::String() :
6981 po.m_workingPath + Trait::latin1ToString("/")) + url);
6982 }
6983 }
6984
6985 link->setUrl(url);
6986
6987 po.m_wasRefLink = true;
6988
6989 if (po.m_doc->labeledLinks().find(label) == po.m_doc->labeledLinks().cend()) {
6990 po.m_doc->insertLabeledLink(label, link);
6991 }
6992
6993 return iit;
6994 } else {
6995 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
6996 }
6997 } else {
6998 return start;
6999 }
7000 } else {
7001 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
7002 }
7003 }
7004 // Inline -> (
7005 else if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('(')) {
7006 typename Trait::String url, title;
7007 typename Delims::const_iterator iit;
7008 bool ok;
7009
7010 WithPosition urlPos;
7011 std::tie(url, title, iit, ok) = checkForInlineLink(std::next(it), last, po, &urlPos);
7012
7013 if (ok) {
7014 const auto link = makeLink(url,
7015 removeBackslashes<Trait>(text),
7016 po,
7017 false,
7018 start->m_line,
7019 start->m_pos,
7020 iit->m_line,
7021 iit->m_pos + iit->m_len,
7022 textPos,
7023 urlPos);
7024
7025 if (link.get()) {
7026 if (!po.m_collectRefLinks) {
7027 po.m_parent->appendItem(link);
7028 }
7029
7030 po.m_line = iit->m_line;
7031 po.m_pos = iit->m_pos + iit->m_len;
7032
7033 return iit;
7034 } else {
7035 return start;
7036 }
7037 } else if (createShortcutLink(text, po, start->m_line, start->m_pos, start->m_line,
7038 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
7039 return it;
7040 }
7041 }
7042 // Reference -> [
7043 else if (po.m_fr.m_data.at(it->m_line).first[it->m_pos + it->m_len] == Trait::latin1ToChar('[')) {
7044 typename MdBlock<Trait>::Data label;
7045 typename Delims::const_iterator lit;
7046
7047 WithPosition labelPos;
7048 std::tie(label, lit) = checkForLinkLabel(std::next(it), last, po, &labelPos);
7049
7050 const auto isLabelEmpty = toSingleLine(label).isEmpty();
7051
7052 if (lit != std::next(it)) {
7053 if (!isLabelEmpty
7054 && createShortcutLink(label,
7055 po,
7056 start->m_line,
7057 start->m_pos,
7058 start->m_line,
7059 start->m_pos + start->m_len,
7060 lit,
7061 text,
7062 true,
7063 labelPos,
7064 textPos)) {
7065 return lit;
7066 } else if (isLabelEmpty
7067 && createShortcutLink(text,
7068 po,
7069 start->m_line,
7070 start->m_pos,
7071 start->m_line,
7072 start->m_pos + start->m_len,
7073 lit,
7074 {},
7075 false,
7076 textPos,
7077 {})) {
7078 return lit;
7079 }
7080 } else if (createShortcutLink(text, po, start->m_line, start->m_pos, start->m_line,
7081 start->m_pos + start->m_len, it, {}, false, textPos, {})) {
7082 return it;
7083 }
7084 } else {
7085 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
7086 }
7087 } else {
7088 return checkShortcut(start, last, po, &Parser<Trait>::createShortcutLink);
7089 }
7090 }
7091
7092 return start;
7093}
7094
7095//! \return Is the given style close previous corresponding style?
7096inline bool
7097isClosingStyle(const std::vector<std::pair<Style, long long int>> &styles,
7098 Style s)
7099{
7100 const auto it = std::find_if(styles.cbegin(), styles.cend(), [&](const auto &p) {
7101 return (p.first == s);
7102 });
7103
7104 return it != styles.cend();
7105}
7106
7107//! Close style.
7108inline void
7109closeStyle(std::vector<std::pair<Style, long long int>> &styles,
7110 Style s)
7111{
7112 const auto it = std::find_if(styles.crbegin(), styles.crend(), [&](const auto &p) {
7113 return (p.first == s);
7114 });
7115
7116 if (it != styles.crend()) {
7117 styles.erase(it.base() - 1);
7118 }
7119}
7120
7121//! Apply styles.
7122inline void
7123applyStyles(int &opts,
7124 const std::vector<std::pair<Style, long long int>> &styles)
7125{
7126 opts = 0;
7127
7128 for (const auto &s : styles) {
7129 switch (s.first) {
7130 case Style::Strikethrough:
7131 opts |= StrikethroughText;
7132 break;
7133
7134 case Style::Italic1:
7135 case Style::Italic2:
7136 opts |= ItalicText;
7137 break;
7138
7139 case Style::Bold1:
7140 case Style::Bold2:
7141 opts |= BoldText;
7142 break;
7143
7144 default:
7145 break;
7146 }
7147 }
7148}
7149
7150//! Append possible emphasis.
7151inline void
7152appendPossibleDelimiter(std::vector<std::vector<std::pair<std::pair<long long int, bool>, int>>> &vars,
7153 long long int len,
7154 int type,
7155 bool leftAndRight)
7156{
7157 for (auto &v : vars) {
7158 v.push_back({{len, leftAndRight}, type});
7159 }
7160}
7161
7162//! \return Longest sequense of emphasis with more openings.
7163inline std::vector<std::pair<std::pair<long long int, bool>, int>>
7164longestSequenceWithMoreOpeningsAtStart(const std::vector<std::vector<std::pair<std::pair<long long int, bool>, int>>> &vars)
7165{
7166 size_t max = 0;
7167
7168 for (const auto &s : vars) {
7169 if (s.size() > max) {
7170 max = s.size();
7171 }
7172 }
7173
7174 std::vector<std::pair<std::pair<long long int, bool>, int>> ret;
7175
7176 size_t maxOp = 0;
7177
7178 for (const auto &s : vars) {
7179 if (s.size() == max) {
7180 size_t op = 0;
7181
7182 for (const auto &v : s) {
7183 if (v.first.first > 0) {
7184 ++op;
7185 } else {
7186 break;
7187 }
7188 }
7189
7190 if (op > maxOp) {
7191 maxOp = op;
7192 ret = s;
7193 }
7194 }
7195 }
7196
7197 return ret;
7198}
7199
7200//! Make variants of emphasies.
7201inline void
7202collectDelimiterVariants(std::vector<std::vector<std::pair<std::pair<long long int, bool>, int>>> &vars,
7203 long long int itLength,
7204 int type,
7205 bool leftFlanking,
7206 bool rightFlanking)
7207{
7208 {
7209 auto vars1 = vars;
7210 auto vars2 = vars;
7211
7212 vars.clear();
7213
7214 if (leftFlanking) {
7215 appendPossibleDelimiter(vars1, itLength, type, leftFlanking && rightFlanking);
7216 std::copy(vars1.cbegin(), vars1.cend(), std::back_inserter(vars));
7217 }
7218
7219 if (rightFlanking) {
7220 appendPossibleDelimiter(vars2, -itLength, type, leftFlanking && rightFlanking);
7221 std::copy(vars2.cbegin(), vars2.cend(), std::back_inserter(vars));
7222 }
7223 }
7224}
7225
7226template<class Trait>
7227inline void
7228Parser<Trait>::createStyles(std::vector<std::pair<Style, long long int>> &s,
7229 long long int l,
7230 typename Delimiter::DelimiterType t,
7231 long long int &count)
7232{
7233 if (t != Delimiter::Strikethrough) {
7234 if (l % 2 == 1) {
7235 s.push_back({t == Delimiter::Emphasis1 ? Style::Italic1 : Style::Italic2, 1});
7236 ++count;
7237 }
7238
7239 if (l >= 2) {
7240 for (long long int i = 0; i < l / 2; ++i) {
7241 s.push_back({t == Delimiter::Emphasis1 ? Style::Bold1 : Style::Bold2, 2});
7242 count += 2;
7243 }
7244 }
7245 } else {
7246 s.push_back({Style::Strikethrough, l});
7247 ++count;
7248 }
7249}
7250
7251template<class Trait>
7252inline bool
7253Parser<Trait>::isSequence(typename Delims::const_iterator it,
7254 long long int itLine,
7255 long long int itPos,
7256 typename Delimiter::DelimiterType t)
7257{
7258 return (itLine == it->m_line && itPos + it->m_len == it->m_pos && it->m_type == t);
7259}
7260
7261template<class Trait>
7262inline typename Parser<Trait>::Delims::const_iterator
7263Parser<Trait>::readSequence(typename Delims::const_iterator it,
7264 typename Delims::const_iterator last,
7265 long long int &line,
7266 long long int &pos,
7267 long long int &len,
7268 typename Delims::const_iterator &current)
7269{
7270 line = it->m_line;
7271 pos = it->m_pos;
7272 len = it->m_len;
7273 current = it;
7274 const auto t = it->m_type;
7275
7276 it = std::next(it);
7277
7278 while (it != last && isSequence(it, line, pos, t)) {
7279 current = it;
7280
7281 pos += it->m_len;
7282 len += it->m_len;
7283
7284 ++it;
7285 }
7286
7287 return std::prev(it);
7288}
7289
7290template<class Trait>
7291inline int
7292Parser<Trait>::emphasisToInt(typename Delimiter::DelimiterType t)
7293{
7294 switch (t) {
7295 case Delimiter::Strikethrough:
7296 return 0;
7297
7298 case Delimiter::Emphasis1:
7299 return 1;
7300
7301 case Delimiter::Emphasis2:
7302 return 2;
7303
7304 default:
7305 return -1;
7306 }
7307}
7308
7309template<class Trait>
7310inline std::pair<bool, size_t>
7311Parser<Trait>::checkEmphasisSequence(const std::vector<std::pair<std::pair<long long int, bool>, int>> &s,
7312 size_t idx)
7313{
7314 static const auto strikeType = emphasisToInt(Delimiter::Strikethrough);
7315
7316 if (s[idx].second == strikeType) {
7317 if (s[idx].first.first > 0) {
7318 const auto len = s[idx].first.first;
7319
7320 const auto it = std::find_if(s.cbegin() + idx + 1, s.cend(), [len](const auto &p) {
7321 if (p.first.first == -len && p.second == strikeType) {
7322 return true;
7323 } else {
7324 return false;
7325 }
7326 });
7327
7328 if (it != s.cend()) {
7329 return {true, std::distance(s.cbegin(), it)};
7330 } else {
7331 return {false, 0};
7332 }
7333 } else {
7334 return {false, 0};
7335 }
7336 }
7337
7338 std::vector<std::pair<std::pair<long long int, bool>, int>> st;
7339
7340 size_t i = 0;
7341
7342 for (; i <= idx; ++i) {
7343 st.push_back(s[i]);
7344 }
7345
7346 for (; i < s.size(); ++i) {
7347 if (s[i].first.first < 0) {
7348 if (checkStack(st, s[i], idx)) {
7349 return {true, i};
7350 } else if (st.size() <= idx) {
7351 return {false, 0};
7352 }
7353 } else {
7354 st.push_back(s[i]);
7355 }
7356 }
7357
7358 return {false, 0};
7359}
7360
7361template<class Trait>
7362inline std::vector<std::pair<std::pair<long long int, bool>, int>>
7363Parser<Trait>::fixSequence(const std::vector<std::pair<std::pair<long long int, bool>, int>> &s)
7364{
7365 std::vector<std::pair<std::pair<long long int, bool>, int>> tmp;
7366 std::map<int, long long int> length;
7367
7368 for (const auto &p : s) {
7369 if (p.first.first < 0 && length[p.second] + p.first.first < 0) {
7370 tmp.push_back({{-length[p.second], p.first.second}, p.second});
7371
7372 length[p.second] = 0;
7373 } else {
7374 tmp.push_back(p);
7375
7376 length[p.second] += p.first.first;
7377 }
7378 }
7379
7380 return tmp;
7381}
7382
7383template<class Trait>
7384inline std::vector<std::vector<std::pair<std::pair<long long int, bool>, int>>>
7385Parser<Trait>::closedSequences(const std::vector<std::vector<std::pair<std::pair<long long int, bool>, int>>> &vars,
7386 size_t idx)
7387{
7388 std::vector<std::vector<std::pair<std::pair<long long int, bool>, int>>> tmp;
7389
7390 const auto longest = longestSequenceWithMoreOpeningsAtStart(vars);
7391
7392 for (const auto &v : vars) {
7393 if (longest.size() == v.size()) {
7394 bool closed = false;
7395 std::tie(closed, std::ignore) = checkEmphasisSequence(v, idx);
7396
7397 if (closed) {
7398 tmp.push_back(fixSequence(v));
7399 }
7400 }
7401 }
7402
7403 return tmp;
7404}
7405
7406template<class Trait>
7407inline std::vector<std::pair<Style, long long int>>
7408Parser<Trait>::createStyles(const std::vector<std::pair<std::pair<long long int, bool>, int>> &s,
7409 size_t i,
7410 typename Delimiter::DelimiterType t,
7411 long long int &count)
7412{
7413 std::vector<std::pair<Style, long long int>> styles;
7414
7415 const size_t idx = i;
7416 long long int len = s[i].first.first;
7417
7418 size_t closeIdx = 0;
7419 std::tie(std::ignore, closeIdx) = checkEmphasisSequence(s, i);
7420
7421 for (i = closeIdx;; --i) {
7422 if (s[i].second == s[idx].second && s[i].first.first < 0) {
7423 auto l = std::abs(s[i].first.first);
7424
7425 createStyles(styles, std::min(l, len), t, count);
7426
7427 len -= std::min(l, len);
7428
7429 if (!len) {
7430 break;
7431 }
7432 }
7433
7434 if (i == 0) {
7435 break;
7436 }
7437 }
7438
7439 return styles;
7440}
7441
7442template<class Trait>
7443inline bool
7444isSkipAllEmphasis(const std::vector<std::pair<std::pair<long long int, bool>, int>> &s,
7445 size_t idx)
7446{
7447 if (s[idx].first.second) {
7448 for (size_t i = idx + 1; i < s.size(); ++i) {
7449 if (s[i].second == s[idx].second && s[i].first.first < 0) {
7450 return ((s[idx].first.first - s[i].first.first) % 3 == 0 &&
7451 !(s[idx].first.first % 3 == 0 && s[i].first.first % 3 == 0));
7452 }
7453 }
7454 }
7455
7456 return false;
7457}
7458
7459template<class Trait>
7460inline std::tuple<bool, std::vector<std::pair<Style, long long int>>, long long int, long long int>
7461Parser<Trait>::isStyleClosed(typename Delims::const_iterator it,
7462 typename Delims::const_iterator last,
7463 TextParsingOpts<Trait> &po)
7464{
7465 const auto open = it;
7466 auto current = it;
7467
7468 std::vector<std::vector<std::pair<std::pair<long long int, bool>, int>>> vars, closed;
7469 vars.push_back({});
7470
7471 long long int itLine = open->m_line, itPos = open->m_pos, itLength = open->m_len;
7472
7473 const long long int line = po.m_line, pos = po.m_pos;
7474 const bool collectRefLinks = po.m_collectRefLinks;
7475
7476 po.m_collectRefLinks = true;
7477
7478 bool first = true;
7479
7480 std::for_each(po.m_styles.cbegin(), po.m_styles.cend(), [&vars](const auto &p) {
7481 if (p.first == Style::Strikethrough) {
7482 vars.front().push_back({{p.second, false}, 0});
7483 }
7484 });
7485
7486 {
7487 {
7488 const auto c1 = std::count_if(po.m_styles.cbegin(), po.m_styles.cend(), [&](const auto &p) {
7489 return (p.first == Style::Italic1);
7490 });
7491
7492 if (c1) {
7493 vars.front().push_back({{c1, false}, 1});
7494 }
7495
7496 const auto c2 = std::count_if(po.m_styles.cbegin(),
7497 po.m_styles.cend(),
7498 [&](const auto &p) {
7499 return (p.first == Style::Bold1);
7500 }) * 2;
7501
7502 if (c2) {
7503 vars.front().push_back({{c2, false}, 1});
7504 }
7505 }
7506
7507 {
7508 const auto c1 = std::count_if(po.m_styles.cbegin(), po.m_styles.cend(), [&](const auto &p) {
7509 return (p.first == Style::Italic2);
7510 });
7511
7512 if (c1) {
7513 vars.front().push_back({{c1, false}, 2});
7514 }
7515
7516 const auto c2 = std::count_if(po.m_styles.cbegin(),
7517 po.m_styles.cend(),
7518 [&](const auto &p) {
7519 return (p.first == Style::Bold2);
7520 }) * 2;
7521
7522 if (c2) {
7523 vars.front().push_back({{c2, false}, 2});
7524 }
7525 }
7526 }
7527
7528 const auto idx = vars.front().size();
7529
7530 for (; it != last; ++it) {
7531 if (it->m_line <= po.m_lastTextLine) {
7532 switch (it->m_type) {
7533 case Delimiter::SquareBracketsOpen:
7534 it = checkForLink(it, last, po);
7535 break;
7536
7537 case Delimiter::ImageOpen:
7538 it = checkForImage(it, last, po);
7539 break;
7540
7541 case Delimiter::Less:
7542 it = checkForAutolinkHtml(it, last, po, false);
7543 break;
7544
7545 case Delimiter::Strikethrough:
7546 case Delimiter::Emphasis1:
7547 case Delimiter::Emphasis2: {
7548 it = readSequence(it, last, itLine, itPos, itLength, current);
7549
7550 if (first) {
7551 vars.front().push_back({{itLength, it->m_leftFlanking && it->m_rightFlanking},
7552 emphasisToInt(open->m_type)});
7553 first = false;
7554 } else {
7555 collectDelimiterVariants(vars, itLength, emphasisToInt(it->m_type),
7556 it->m_leftFlanking, it->m_rightFlanking);
7557 }
7558 } break;
7559
7560 case Delimiter::InlineCode:
7561 it = checkForInlineCode(it, last, po);
7562 break;
7563
7564 default:
7565 break;
7566 }
7567 } else {
7568 break;
7569 }
7570 }
7571
7572 po.m_line = line;
7573 po.m_pos = pos;
7574 po.m_collectRefLinks = collectRefLinks;
7575
7576 closed = closedSequences(vars, idx);
7577
7578 if (!closed.empty()) {
7579 long long int itCount = 0;
7580
7581 return {true, createStyles(longestSequenceWithMoreOpeningsAtStart(closed), idx,
7582 open->m_type, itCount), vars.front().at(idx).first.first, itCount};
7583 } else {
7584 return {false, {{Style::Unknown, 0}}, isSkipAllEmphasis<Trait>(vars.front(), idx) ?
7585 vars.front().at(idx).first.first : open->m_len, 1};
7586 }
7587}
7588
7589template<class Trait>
7590inline typename Parser<Trait>::Delims::const_iterator
7591Parser<Trait>::incrementIterator(typename Delims::const_iterator it,
7592 typename Delims::const_iterator last,
7593 long long int count)
7594{
7595 const auto len = std::distance(it, last);
7596
7597 if (count < len) {
7598 return it + count;
7599 } else {
7600 return it + (len - 1);
7601 }
7602}
7603
7604//! Append close style.
7605template<class Trait>
7606inline void
7608 const StyleDelim &s)
7609{
7610 if (po.m_lastItemWithStyle) {
7611 po.m_lastItemWithStyle->closeStyles().push_back(s);
7612 }
7613}
7614
7615template<class Trait>
7616inline typename Parser<Trait>::Delims::const_iterator
7617Parser<Trait>::checkForStyle(typename Delims::const_iterator first,
7618 typename Delims::const_iterator it,
7619 typename Delims::const_iterator last,
7620 TextParsingOpts<Trait> &po)
7621{
7622 long long int count = 1;
7623
7624 po.m_wasRefLink = false;
7625 po.m_firstInParagraph = false;
7626
7627 if (it->m_rightFlanking) {
7628 long long int line = it->m_line, pos = it->m_pos + it->m_len, ppos = it->m_pos;
7629 const auto t = it->m_type;
7630 long long int len = it->m_len;
7631
7632 for (auto j = std::next(it); j != last; ++j) {
7633 if (j->m_line == line && pos == j->m_pos && j->m_type == t) {
7634 len += j->m_len;
7635 pos = j->m_pos + j->m_len;
7636 ++count;
7637 } else {
7638 break;
7639 }
7640 }
7641
7642 if (it != first) {
7643 for (auto j = std::prev(it);; --j) {
7644 if (j->m_line == line && ppos - j->m_len == j->m_pos && j->m_type == t) {
7645 len += j->m_len;
7646 ppos = j->m_pos;
7647 ++count;
7648 } else {
7649 break;
7650 }
7651
7652 if (j == first) {
7653 break;
7654 }
7655 }
7656 }
7657
7658 long long int opened = 0;
7659
7660 for (auto it = po.m_styles.crbegin(), last = po.m_styles.crend(); it != last; ++it) {
7661 bool doBreak = false;
7662
7663 switch (t) {
7664 case Delimiter::Emphasis1: {
7665 if (it->first == Style::Italic1 || it->first == Style::Bold1) {
7666 opened = it->second;
7667 doBreak = true;
7668 }
7669 } break;
7670
7671 case Delimiter::Emphasis2: {
7672 if (it->first == Style::Italic2 || it->first == Style::Bold2) {
7673 opened = it->second;
7674 doBreak = true;
7675 }
7676 } break;
7677
7678 case Delimiter::Strikethrough: {
7679 if (it->first == Style::Strikethrough) {
7680 opened = it->second;
7681 doBreak = true;
7682 }
7683 } break;
7684
7685 default:
7686 break;
7687 }
7688
7689 if (doBreak)
7690 break;
7691 }
7692
7693 const bool sumMult3 = (it->m_leftFlanking ? ((opened + len) % 3 == 0) : false);
7694
7695 if (count && opened && (!sumMult3 || (count % 3 == 0 && opened % 3 == 0))) {
7696 if (count > opened) {
7697 count = opened;
7698 }
7699
7700 auto pos = po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos);
7701 const auto line = po.m_fr.m_data.at(it->m_line).second.m_lineNumber;
7702
7703 if (it->m_type == Delimiter::Strikethrough) {
7704 const auto len = it->m_len;
7705
7706 for (auto i = 0; i < count; ++i) {
7707 closeStyle(po.m_styles, Style::Strikethrough);
7708 appendCloseStyle(po, {StrikethroughText, pos, line, pos + len - 1, line});
7709 pos += len;
7710 }
7711 } else {
7712 if (count % 2 == 1) {
7713 const auto st = (it->m_type == Delimiter::Emphasis1 ? Style::Italic1 : Style::Italic2);
7714
7715 closeStyle(po.m_styles, st);
7716 appendCloseStyle(po, {ItalicText, pos, line, pos, line});
7717 ++pos;
7718 }
7719
7720 if (count >= 2) {
7721 const auto st = (it->m_type == Delimiter::Emphasis1 ? Style::Bold1 : Style::Bold2);
7722
7723 for (auto i = 0; i < count / 2; ++i) {
7724 closeStyle(po.m_styles, st);
7725 appendCloseStyle(po, {BoldText, pos, line, pos + 1, line});
7726 pos += 2;
7727 }
7728 }
7729 }
7730
7731 applyStyles(po.m_opts, po.m_styles);
7732
7733 const auto j = incrementIterator(it, last, count - 1);
7734
7735 po.m_pos = j->m_pos + j->m_len;
7736 po.m_line = j->m_line;
7737
7738 return j;
7739 }
7740 }
7741
7742 count = 1;
7743
7744 if (it->m_leftFlanking) {
7745 switch (it->m_type) {
7746 case Delimiter::Strikethrough:
7747 case Delimiter::Emphasis1:
7748 case Delimiter::Emphasis2: {
7749 bool closed = false;
7750 std::vector<std::pair<Style, long long int>> styles;
7751 long long int len = 0;
7752
7753 std::tie(closed, styles, len, count) = isStyleClosed(it, last, po);
7754
7755 if (closed) {
7756 auto pos = po.m_fr.m_data.at(it->m_line).first.virginPos(it->m_pos);
7757 const auto line = po.m_fr.m_data.at(it->m_line).second.m_lineNumber;
7758
7759 for (const auto &p : styles) {
7760 po.m_styles.push_back({p.first, p.second});
7761
7762 if (!po.m_collectRefLinks) {
7763 po.m_openStyles.push_back({styleToTextOption(p.first), pos, line,
7764 pos + p.second - 1, line});
7765 }
7766
7767 pos += p.second;
7768 }
7769
7770 po.m_pos = it->m_pos + len;
7771 po.m_line = it->m_line;
7772
7773 applyStyles(po.m_opts, po.m_styles);
7774 } else if (!po.m_collectRefLinks) {
7775 makeText(it->m_line, it->m_pos + len, po);
7776 }
7777 } break;
7778
7779 default: {
7780 if (!po.m_collectRefLinks) {
7781 makeText(it->m_line, it->m_pos + it->m_len, po);
7782 }
7783 } break;
7784 }
7785 }
7786
7787 if (!count) {
7788 count = 1;
7789 }
7790
7791 resetHtmlTag(po.m_html);
7792
7793 return incrementIterator(it, last, count - 1);
7794}
7795
7796//! Concatenate texts in block.
7797template<class Trait>
7798inline std::shared_ptr<Text<Trait>>
7801{
7802 std::shared_ptr<Text<Trait>> t(new Text<Trait>);
7803 t->setOpts(std::static_pointer_cast<Text<Trait>>(*it)->opts());
7804 t->setStartColumn((*it)->startColumn());
7805 t->setStartLine((*it)->startLine());
7806
7807 typename ItemWithOpts<Trait>::Styles close;
7808
7809 typename Trait::String data;
7810
7811 for (; it != last; ++it) {
7812 const auto tt = std::static_pointer_cast<Text<Trait>>(*it);
7813
7814 data.push_back(tt->text());
7815
7816 if (!tt->openStyles().empty()) {
7817 std::copy(tt->openStyles().cbegin(), tt->openStyles().cend(),
7818 std::back_inserter(t->openStyles()));
7819 }
7820
7821 if (!tt->closeStyles().empty()) {
7822 std::copy(tt->closeStyles().cbegin(), tt->closeStyles().cend(),
7823 std::back_inserter(close));
7824 }
7825 }
7826
7827 it = std::prev(it);
7828
7829 t->setText(data);
7830 t->setEndColumn((*it)->endColumn());
7831 t->setEndLine((*it)->endLine());
7832 t->closeStyles() = close;
7833
7834 return t;
7835}
7836
7837//! \return Is optimization type a semi one.
7838inline bool
7840{
7841 switch (t) {
7842 case OptimizeParagraphType::Semi:
7843 case OptimizeParagraphType::SemiWithoutRawData:
7844 return true;
7845
7846 default:
7847 return false;
7848 }
7849}
7850
7851//! \return Is optimization type without raw data optimization?
7852inline bool
7854{
7855 switch (t) {
7856 case OptimizeParagraphType::FullWithoutRawData:
7857 case OptimizeParagraphType::SemiWithoutRawData:
7858 return true;
7859
7860 default:
7861 return false;
7862 }
7863}
7864
7865//! Optimize Paragraph.
7866template<class Trait>
7867inline std::shared_ptr<Paragraph<Trait>>
7870 OptimizeParagraphType type = OptimizeParagraphType::Full)
7871{
7872 std::shared_ptr<Paragraph<Trait>> np(new Paragraph<Trait>);
7873 np->setStartColumn(p->startColumn());
7874 np->setStartLine(p->startLine());
7875 np->setEndColumn(p->endColumn());
7876 np->setEndLine(p->endLine());
7877
7878 int opts = TextWithoutFormat;
7879 auto start = p->items().cend();
7880 long long int line = -1;
7881 long long int auxStart = 0, auxIt = 0;
7882 bool finished = false;
7883
7884 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it) {
7885 if ((*it)->type() == ItemType::Text) {
7886 const auto t = std::static_pointer_cast<Text<Trait>>(*it);
7887
7888 if (start == last) {
7889 start = it;
7890 opts = t->opts();
7891 line = t->endLine();
7892 finished = (isSemiOptimization(type) && !t->closeStyles().empty());
7893 } else {
7894 if (opts != t->opts() || t->startLine() != line || finished ||
7895 (!t->openStyles().empty() && isSemiOptimization(type))) {
7896 if (!isWithoutRawDataOptimization(type)) {
7897 po.concatenateAuxText(auxStart, auxIt);
7898 auxIt = auxIt - (auxIt - auxStart) + 1;
7899 auxStart = auxIt;
7900 }
7901
7902 np->appendItem(concatenateText<Trait>(start, it));
7903 start = it;
7904 opts = t->opts();
7905 line = t->endLine();
7906 }
7907
7908 finished = (isSemiOptimization(type) && !t->closeStyles().empty());
7909 }
7910
7912 ++auxIt;
7913 } else {
7914 finished = false;
7915
7916 if (start != last) {
7917 if (!isWithoutRawDataOptimization(type)) {
7918 po.concatenateAuxText(auxStart, auxIt);
7919 auxIt = auxIt - (auxIt - auxStart) + 1;
7920 auxStart = auxIt;
7921 }
7922
7923 np->appendItem(concatenateText<Trait>(start, it));
7924 start = last;
7925 opts = TextWithoutFormat;
7926 line = (*it)->endLine();
7927 }
7928
7929 np->appendItem((*it));
7930 }
7931 }
7932
7933 if (start != p->items().cend()) {
7934 np->appendItem(concatenateText<Trait>(start, p->items().cend()));
7935
7936 if (!isWithoutRawDataOptimization(type)) {
7937 po.concatenateAuxText(auxStart, po.m_rawTextData.size());
7938 }
7939 }
7940
7941 p = np;
7942
7943 return p;
7944}
7945
7946template<class Trait>
7947inline void
7948Parser<Trait>::parseTableInParagraph(TextParsingOpts<Trait> &po,
7949 std::shared_ptr<Paragraph<Trait>> parent,
7950 std::shared_ptr<Document<Trait>> doc,
7951 typename Trait::StringList &linksToParse,
7952 const typename Trait::String &workingPath,
7953 const typename Trait::String &fileName,
7954 bool collectRefLinks)
7955{
7956 MdBlock<Trait> fr;
7957 std::copy(po.m_fr.m_data.cbegin() + po.m_startTableLine, po.m_fr.m_data.cend(),
7958 std::back_inserter(fr.m_data));
7959 fr.m_emptyLineAfter = po.m_fr.m_emptyLineAfter;
7960
7961 parseTable(fr, parent, doc, linksToParse, workingPath, fileName, collectRefLinks,
7962 po.m_columnsCount);
7963
7964 po.m_line = po.m_fr.m_data.size() - fr.m_data.size();
7965 po.m_pos = 0;
7966
7967 if (!fr.m_data.empty()) {
7968 po.m_detected = TextParsingOpts<Trait>::Detected::Code;
7969 }
7970}
7971
7972//! Normalize position.
7973inline void
7974normalizePos(long long int &pos,
7975 long long int &line,
7976 long long int length,
7977 long long int linesCount)
7978{
7979 if (pos != 0 && line < linesCount && pos == length) {
7980 pos = 0;
7981 ++line;
7982 }
7983}
7984
7985template<class Trait>
7986inline bool
7987Parser<Trait>::isListOrQuoteAfterHtml(TextParsingOpts<Trait> &po)
7988{
7989 if (po.m_detected == TextParsingOpts<Trait>::Detected::HTML &&
7990 ((!po.m_parent->items().empty() &&
7991 po.m_parent->items().back()->type() == ItemType::RawHtml) || po.m_tmpHtml.get())) {
7992 auto html = (po.m_tmpHtml.get() ? po.m_tmpHtml :
7993 std::static_pointer_cast<RawHtml<Trait>>(po.m_parent->items().back()));
7994
7995 bool dontClearDetection = false;
7996
7997 long long int line = po.m_line;
7998 long long int pos = po.m_pos;
7999
8000 normalizePos(pos, line, line < static_cast<long long int>(po.m_fr.m_data.size()) ?
8001 po.m_fr.m_data[line].first.length() : 0, po.m_fr.m_data.size());
8002
8003 if (pos == 0) {
8004 if (line < static_cast<long long int>(po.m_fr.m_data.size())) {
8005 const auto type = whatIsTheLine(po.m_fr.m_data[line].first);
8006
8007 switch (type) {
8008 case Parser<Trait>::BlockType::List: {
8009 int num = 0;
8010
8011 if (isOrderedList<Trait>(po.m_fr.m_data[line].first.asString(), &num)) {
8012 if (num == 1)
8013 return true;
8014 } else {
8015 return true;
8016 }
8017 } break;
8018
8019 case Parser<Trait>::BlockType::Blockquote:
8020 return true;
8021
8022 case Parser<Trait>::BlockType::ListWithFirstEmptyLine: {
8023 if (UnprotectedDocsMethods<Trait>::isFreeTag(html)) {
8024 return true;
8025 }
8026 } break;
8027
8028 case Parser<Trait>::BlockType::EmptyLine:
8029 dontClearDetection = true;
8030 break;
8031
8032 default:
8033 break;
8034 }
8035 }
8036 }
8037
8038 if (!dontClearDetection) {
8039 po.m_detected = TextParsingOpts<Trait>::Detected::Nothing;
8040 }
8041 }
8042
8043 po.m_tmpHtml.reset();
8044
8045 return false;
8046}
8047
8048//! Make Paragraph.
8049template<class Trait>
8050inline std::shared_ptr<Paragraph<Trait>>
8053{
8054 auto p = std::make_shared<Paragraph<Trait>>();
8055
8056 p->setStartColumn((*first)->startColumn());
8057 p->setStartLine((*first)->startLine());
8058
8059 for (; first != last; ++first) {
8060 p->appendItem(*first);
8061 p->setEndColumn((*first)->endColumn());
8062 p->setEndLine((*first)->endLine());
8063 }
8064
8065 return p;
8066}
8067
8068//! Split Paragraph and free HTML.
8069template<class Trait>
8070inline std::shared_ptr<Paragraph<Trait>>
8072 std::shared_ptr<Paragraph<Trait>> p,
8074 bool collectRefLinks,
8075 bool fullyOptimizeParagraphs = true)
8076{
8077 auto first = p->items().cbegin();
8078 auto it = first;
8079 auto last = p->items().cend();
8080
8081 for (; it != last; ++it) {
8082 if (first == last) {
8083 first = it;
8084 }
8085
8086 if ((*it)->type() == ItemType::RawHtml &&
8087 UnprotectedDocsMethods<Trait>::isFreeTag(std::static_pointer_cast<RawHtml<Trait>>(*it))) {
8088 auto p = makeParagraph<Trait>(first, it);
8089
8090 if (!collectRefLinks) {
8091 if (!p->isEmpty()) {
8092 parent->appendItem(optimizeParagraph<Trait>(p, po,
8093 fullyOptimizeParagraphs ?
8094 OptimizeParagraphType::FullWithoutRawData :
8095 OptimizeParagraphType::SemiWithoutRawData));
8096 }
8097
8098 parent->appendItem(*it);
8099 }
8100
8101 first = last;
8102 }
8103 }
8104
8105 if (first != last) {
8106 if (first != p->items().cbegin()) {
8107 const auto c = std::count_if(first, last, [](const auto &i) {
8108 return (i->type() == MD::ItemType::Text);
8109 });
8110 po.m_rawTextData.erase(po.m_rawTextData.cbegin(), po.m_rawTextData.cbegin() +
8111 (po.m_rawTextData.size() - c));
8112
8113 return makeParagraph<Trait>(first, last);
8114 } else {
8115 return p;
8116 }
8117 } else {
8118 po.m_rawTextData.clear();
8119
8120 return std::make_shared<Paragraph<Trait>>();
8121 }
8122}
8123
8124//! \return Last virgin position of the item.
8125template<class Trait>
8126inline long long int
8128{
8129 switch (item->type()) {
8130 case ItemType::Text:
8131 case ItemType::Link:
8132 case ItemType::Image:
8133 case ItemType::FootnoteRef:
8134 case ItemType::RawHtml:
8135 {
8136 auto i = static_cast<ItemWithOpts<Trait> *>(item);
8137
8138 if (!i->closeStyles().empty()) {
8139 return i->closeStyles().back().endColumn();
8140 } else {
8141 return i->endColumn();
8142 }
8143 }
8144 break;
8145
8146 case ItemType::Code:
8147 case ItemType::Math:
8148 {
8149 auto c = static_cast<Code<Trait> *>(item);
8150
8151 if (!c->closeStyles().empty()) {
8152 return c->closeStyles().back().endColumn();
8153 } else {
8154 return c->endDelim().endColumn();
8155 }
8156 }
8157 break;
8158
8159 default:
8160 return -1;
8161 }
8162}
8163
8164//! Make heading.
8165template<class Trait>
8166inline void
8167makeHeading(std::shared_ptr<Block<Trait>> parent,
8168 std::shared_ptr<Document<Trait>> doc,
8169 std::shared_ptr<Paragraph<Trait>> p,
8170 long long int lastColumn,
8171 long long int lastLine,
8172 int level,
8173 const typename Trait::String &workingPath,
8174 const typename Trait::String &fileName,
8175 bool collectRefLinks,
8176 const WithPosition &delim,
8178{
8179 if (!collectRefLinks) {
8180 if (p->items().back()->type() == ItemType::LineBreak) {
8181 auto lb = std::static_pointer_cast<LineBreak<Trait>>(p->items().back());
8182 const auto lineBreakBySpaces = lb->text().simplified().isEmpty();
8183
8184 p = makeParagraph<Trait>(p->items().cbegin(), std::prev(p->items().cend()));
8185 const auto lineBreakPos = localPosFromVirgin(po.m_fr, lb->startColumn(), lb->startLine());
8186
8187 if (!p->isEmpty()) {
8188 if (p->items().back()->type() == ItemType::Text) {
8189 auto lt = std::static_pointer_cast<Text<Trait>>(p->items().back());
8190
8191 if (!lineBreakBySpaces) {
8192 auto text = po.m_fr.m_data.at(lineBreakPos.second).first.fullVirginString().sliced(
8193 lt->startColumn());
8194 po.m_rawTextData.back().m_str = text;
8195
8196 if (!lt->text()[0].isSpace()) {
8197 const auto notSpacePos = skipSpaces<Trait>(0, text);
8198
8199 text.remove(0, notSpacePos);
8200 }
8201
8202 lt->setText(removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(text)));
8203 }
8204
8205 lt->setEndColumn(lt->endColumn() + lb->text().length());
8206 } else {
8207 if (!lineBreakBySpaces) {
8208 const auto lastItemVirginPos = lastVirginPositionInParagraph<Trait>(p->items().back().get());
8209 const auto lastItemPos = localPosFromVirgin(po.m_fr, lastItemVirginPos, lineBreakPos.second);
8210 const auto endOfLine = po.m_fr.m_data.at(lineBreakPos.second).first.virginSubString(
8211 lastItemPos.first + 1);
8212 auto t = std::make_shared<Text<Trait>>();
8213 t->setText(endOfLine);
8214 t->setStartColumn(lastItemVirginPos + 1);
8215 t->setStartLine(lb->startLine());
8216 t->setEndColumn(lb->endColumn());
8217 t->setEndLine(lb->endLine());
8218
8219 p->appendItem(t);
8220
8221 const auto pos = localPosFromVirgin(po.m_fr, lb->startColumn(), lb->startLine());
8222
8223 po.m_rawTextData.push_back({lb->text(), pos.first, pos.second});
8224 }
8225 }
8226 }
8227 }
8228
8229 std::pair<typename Trait::String, WithPosition> label;
8230
8231 if (p->items().back()->type() == ItemType::Text) {
8232 auto t = std::static_pointer_cast<Text<Trait>>(p->items().back());
8233
8234 if (t->opts() == TextWithoutFormat) {
8235 auto text = po.m_rawTextData.back();
8236 typename Trait::InternalString tmp(text.m_str);
8237 label = findAndRemoveHeaderLabel<Trait>(tmp);
8238
8239 if (!label.first.isEmpty()) {
8240 label.first = label.first.sliced(1, label.first.length() - 2);
8241
8242 if (tmp.asString().simplified().isEmpty()) {
8243 p->removeItemAt(p->items().size() - 1);
8244 po.m_rawTextData.pop_back();
8245
8246 if (!p->items().empty()) {
8247 const auto last = std::static_pointer_cast<WithPosition>(p->items().back());
8248 p->setEndColumn(last->endColumn());
8249 p->setEndLine(last->endLine());
8250 }
8251 } else {
8252 const auto notSpacePos = tmp.virginPos(skipSpaces<Trait>(0, tmp.asString()));
8253 const auto virginLine = t->endLine();
8254
8255 if (label.second.startColumn() > notSpacePos) {
8256 auto text = tmp.fullVirginString().sliced(0, label.second.startColumn());
8257 po.m_rawTextData.back().m_str = text;
8258
8259 if (!t->text()[0].isSpace()) {
8260 const auto notSpacePos = skipSpaces<Trait>(0, text);
8261
8262 text.remove(0, notSpacePos);
8263 }
8264
8265 t->setText(removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(text)));
8266 t->setEndColumn(label.second.startColumn() - 1);
8267
8268 const auto lastPos = t->endColumn();
8269 const auto pos = localPosFromVirgin(po.m_fr, label.second.endColumn() + 1, virginLine);
8270
8271 if (pos.first != -1) {
8272 t = std::make_shared<Text<Trait>>();
8273 t->setStartColumn(label.second.endColumn() + 1);
8274 t->setStartLine(virginLine);
8275 t->setEndColumn(lastPos);
8276 t->setEndLine(virginLine);
8277 p->appendItem(t);
8278
8279 po.m_rawTextData.push_back({po.m_fr.m_data[pos.second].first.asString().sliced(pos.first),
8280 pos.first, pos.second});
8281 }
8282 }
8283
8284 const auto pos = localPosFromVirgin(po.m_fr, label.second.endColumn() + 1, virginLine);
8285
8286 if (pos.first != -1) {
8287 po.m_rawTextData.back() = {po.m_fr.m_data[pos.second].first.asString().sliced(pos.first),
8288 pos.first, pos.second};
8289
8290 auto text = po.m_rawTextData.back().m_str;
8291
8292 if (!text.simplified().isEmpty()) {
8293 if (p->items().size() == 1) {
8294 const auto ns = skipSpaces<Trait>(0, text);
8295
8296 text.remove(0, ns);
8297 }
8298
8299 t->setStartColumn(label.second.endColumn() + 1);
8300 t->setText(removeBackslashes<typename Trait::String, Trait>(replaceEntity<Trait>(text)));
8301 } else {
8302 po.m_rawTextData.pop_back();
8303 p->removeItemAt(p->items().size() - 1);
8304 }
8305 }
8306
8307 p->setEndColumn(t->endColumn());
8308 }
8309 } else {
8310 label.first.clear();
8311 }
8312
8313 label.second.setStartLine(t->startLine());
8314 label.second.setEndLine(t->endLine());
8315 }
8316 }
8317
8318 std::shared_ptr<Heading<Trait>> h(new Heading<Trait>);
8319 h->setStartColumn(p->startColumn());
8320 h->setStartLine(p->startLine());
8321 h->setEndColumn(lastColumn);
8322 h->setEndLine(lastLine);
8323 h->setLevel(level);
8324
8325 if (!p->items().empty()) {
8326 h->setText(p);
8327 }
8328
8329 h->setDelims({delim});
8330
8331 if (label.first.isEmpty() && !p->items().empty()) {
8332 label.first = Trait::latin1ToString("#") + paragraphToLabel(p.get());
8333 } else {
8334 h->setLabelPos(label.second);
8335 }
8336
8337 if (!label.first.isEmpty()) {
8338 label.first += Trait::latin1ToString("/") + (!workingPath.isEmpty() ?
8339 workingPath + Trait::latin1ToString("/") : typename Trait::String()) + fileName;
8340
8341 h->setLabel(label.first);
8342
8343 doc->insertLabeledHeading(label.first, h);
8344 }
8345
8346 parent->appendItem(h);
8347 }
8348}
8349
8350//! \return Index of text item for the given index in raw text data.
8351template<class Trait>
8352inline long long int
8353textAtIdx(std::shared_ptr<Paragraph<Trait>> p,
8354 size_t idx)
8355{
8356 size_t i = 0;
8357
8358 for (auto it = p->items().cbegin(), last = p->items().cend(); it != last; ++it) {
8359 if ((*it)->type() == ItemType::Text) {
8360 if (i == idx) {
8361 return std::distance(p->items().cbegin(), it);
8362 }
8363
8364 ++i;
8365 }
8366 }
8367
8368 return -1;
8369}
8370
8371//! Process text plugins.
8372template<class Trait>
8373inline void
8376 const TextPluginsMap<Trait> &textPlugins,
8377 bool inLink)
8378{
8379 for (const auto &plugin : textPlugins) {
8380 if (inLink && !std::get<bool>(plugin.second)) {
8381 continue;
8382 }
8383
8384 std::get<TextPluginFunc<Trait>>(plugin.second)(p, po,
8385 std::get<typename Trait::StringList>(plugin.second));
8386 }
8387}
8388
8389//! Make horizontal line.
8390template<class Trait>
8391inline void
8393 std::shared_ptr<Block<Trait>> parent)
8394{
8395 std::shared_ptr<Item<Trait>> hr(new HorizontalLine<Trait>);
8396 hr->setStartColumn(line.first.virginPos(skipSpaces<Trait>(0, line.first.asString())));
8397 hr->setStartLine(line.second.m_lineNumber);
8398 hr->setEndColumn(line.first.virginPos(line.first.length() - 1));
8399 hr->setEndLine(line.second.m_lineNumber);
8400 parent->appendItem(hr);
8401}
8402
8403template<class Trait>
8404inline void
8405Parser<Trait>::parseFormattedTextLinksImages(MdBlock<Trait> &fr,
8406 std::shared_ptr<Block<Trait>> parent,
8407 std::shared_ptr<Document<Trait>> doc,
8408 typename Trait::StringList &linksToParse,
8409 const typename Trait::String &workingPath,
8410 const typename Trait::String &fileName,
8411 bool collectRefLinks,
8412 bool ignoreLineBreak,
8413 RawHtmlBlock<Trait> &html,
8414 bool inLink)
8415
8416{
8417 if (fr.m_data.empty()) {
8418 return;
8419 }
8420
8421 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
8422 p->setStartColumn(fr.m_data.at(0).first.virginPos(0));
8423 p->setStartLine(fr.m_data.at(0).second.m_lineNumber);
8424 std::shared_ptr<Paragraph<Trait>> pt(new Paragraph<Trait>);
8425
8426 const auto delims = collectDelimiters(fr.m_data);
8427
8428 TextParsingOpts<Trait> po = {fr, p, nullptr, doc, linksToParse, workingPath, fileName,
8429 collectRefLinks, ignoreLineBreak, html, m_textPlugins};
8430
8431 if (!delims.empty()) {
8432 for (auto it = delims.cbegin(), last = delims.cend(); it != last; ++it) {
8433 if (html.m_html.get() && html.m_continueHtml) {
8434 it = finishRawHtmlTag(it, last, po, false);
8435 } else {
8436 if (isListOrQuoteAfterHtml(po)) {
8437 break;
8438 }
8439
8440 if (po.m_line > po.m_lastTextLine) {
8441 checkForTableInParagraph(po, fr.m_data.size() - 1);
8442 }
8443
8444 if (po.shouldStopParsing() && po.m_lastTextLine < it->m_line) {
8445 break;
8446 } else if (!collectRefLinks) {
8447 makeText(po.m_lastTextLine < it->m_line ? po.m_lastTextLine : it->m_line,
8448 po.m_lastTextLine < it->m_line ? po.m_lastTextPos : it->m_pos, po);
8449 } else {
8450 const auto prevLine = po.m_line;
8451
8452 po.m_line = (po.m_lastTextLine < it->m_line ? po.m_lastTextLine : it->m_line);
8453 po.m_pos = (po.m_lastTextLine < it->m_line ? po.m_lastTextPos : it->m_pos);
8454
8455 if (po.m_line > prevLine) {
8456 po.m_firstInParagraph = false;
8457 } else if (po.m_pos > skipSpaces<Trait>(0, po.m_fr.m_data[po.m_line].first.asString())) {
8458 po.m_firstInParagraph = false;
8459 }
8460 }
8461
8462 switch (it->m_type) {
8463 case Delimiter::SquareBracketsOpen: {
8464 it = checkForLink(it, last, po);
8465 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8466 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8467 } break;
8468
8469 case Delimiter::ImageOpen: {
8470 it = checkForImage(it, last, po);
8471 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8472 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8473 } break;
8474
8475 case Delimiter::Less: {
8476 it = checkForAutolinkHtml(it, last, po, true);
8477
8478 if (!html.m_html.get()) {
8479 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8480 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8481 }
8482 } break;
8483
8484 case Delimiter::Strikethrough:
8485 case Delimiter::Emphasis1:
8486 case Delimiter::Emphasis2: {
8487 if (!collectRefLinks) {
8488 it = checkForStyle(delims.cbegin(), it, last, po);
8489 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8490 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8491 }
8492 } break;
8493
8494 case Delimiter::Math: {
8495 it = checkForMath(it, last, po);
8496 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8497 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8498 } break;
8499
8500 case Delimiter::InlineCode: {
8501 if (!it->m_backslashed) {
8502 it = checkForInlineCode(it, last, po);
8503 p->setEndColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos + it->m_len - 1));
8504 p->setEndLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8505 }
8506 } break;
8507
8508 case Delimiter::HorizontalLine: {
8509 po.m_wasRefLink = false;
8510 po.m_firstInParagraph = false;
8511
8512 const auto pos = skipSpaces<Trait>(0, po.m_fr.m_data[it->m_line].first.asString());
8513 const auto withoutSpaces = po.m_fr.m_data[it->m_line].first.asString().sliced(pos);
8514
8515 auto h2 = isH2<Trait>(withoutSpaces);
8516
8517 if (!p->isEmpty()) {
8518 optimizeParagraph<Trait>(p, po, OptimizeParagraphType::Semi);
8519
8520 checkForTextPlugins<Trait>(p, po, m_textPlugins, inLink);
8521
8522 if (it->m_line - 1 >= 0) {
8523 p->setEndColumn(fr.m_data.at(it->m_line - 1).first.virginPos(
8524 fr.m_data.at(it->m_line - 1).first.length() - 1));
8525 p->setEndLine(fr.m_data.at(it->m_line - 1).second.m_lineNumber);
8526 }
8527
8528 p = splitParagraphsAndFreeHtml(parent, p, po, collectRefLinks, m_fullyOptimizeParagraphs);
8529
8530 if (!p->isEmpty()) {
8531 if (!collectRefLinks) {
8532 if (!h2 || (p->items().size() == 1 &&
8533 p->items().front()->type() == ItemType::LineBreak)) {
8534 parent->appendItem(p);
8535
8536 h2 = false;
8537 } else {
8538 makeHeading(parent,
8539 doc,
8540 optimizeParagraph<Trait>(p, po, defaultParagraphOptimization()),
8541 fr.m_data[it->m_line].first.virginPos(it->m_pos + it->m_len - 1),
8542 fr.m_data[it->m_line].second.m_lineNumber,
8543 2,
8544 workingPath,
8545 fileName,
8546 collectRefLinks,
8547 {po.m_fr.m_data[it->m_line].first.virginPos(pos),
8548 fr.m_data[it->m_line].second.m_lineNumber,
8549 po.m_fr.m_data[it->m_line].first.virginPos(
8550 lastNonSpacePos<Trait>(po.m_fr.m_data[it->m_line].first.asString())),
8551 fr.m_data[it->m_line].second.m_lineNumber},
8552 po);
8553
8554 po.m_checkLineOnNewType = true;
8555 }
8556 }
8557 } else {
8558 h2 = false;
8559 }
8560 } else {
8561 h2 = false;
8562 }
8563
8564 p.reset(new Paragraph<Trait>);
8565 po.m_rawTextData.clear();
8566
8567 if (it->m_line + 1 < static_cast<long long int>(fr.m_data.size())) {
8568 p->setStartColumn(fr.m_data.at(it->m_line + 1).first.virginPos(0));
8569 p->setStartLine(fr.m_data.at(it->m_line + 1).second.m_lineNumber);
8570 }
8571
8572 po.m_parent = p;
8573 po.m_line = it->m_line;
8574 po.m_pos = it->m_pos + it->m_len;
8575
8576 if (!h2 && !collectRefLinks) {
8577 makeHorLine<Trait>(fr.m_data[it->m_line], parent);
8578 }
8579 } break;
8580
8581 case Delimiter::H1:
8582 case Delimiter::H2: {
8583 po.m_wasRefLink = false;
8584 po.m_firstInParagraph = false;
8585
8586 optimizeParagraph<Trait>(p, po, OptimizeParagraphType::Semi);
8587
8588 checkForTextPlugins<Trait>(p, po, m_textPlugins, inLink);
8589
8590 if (it->m_line - 1 >= 0) {
8591 p->setEndColumn(fr.m_data.at(it->m_line - 1).first.virginPos(
8592 fr.m_data.at(it->m_line - 1).first.length() - 1));
8593 p->setEndLine(fr.m_data.at(it->m_line - 1).second.m_lineNumber);
8594 }
8595
8596 p = splitParagraphsAndFreeHtml(parent, p, po, collectRefLinks,
8597 m_fullyOptimizeParagraphs);
8598
8599 if (!p->isEmpty() && !((p->items().size() == 1 &&
8600 p->items().front()->type() == ItemType::LineBreak))) {
8601 makeHeading(parent,
8602 doc,
8603 optimizeParagraph<Trait>(p, po, defaultParagraphOptimization()),
8604 fr.m_data[it->m_line].first.virginPos(it->m_pos + it->m_len - 1),
8605 fr.m_data[it->m_line].second.m_lineNumber,
8606 it->m_type == Delimiter::H1 ? 1 : 2,
8607 workingPath,
8608 fileName,
8609 collectRefLinks,
8610 {po.m_fr.m_data[it->m_line].first.virginPos(skipSpaces<Trait>(
8611 0, po.m_fr.m_data[it->m_line].first.asString())),
8612 fr.m_data[it->m_line].second.m_lineNumber,
8613 po.m_fr.m_data[it->m_line].first.virginPos(lastNonSpacePos<Trait>(
8614 po.m_fr.m_data[it->m_line].first.asString())),
8615 fr.m_data[it->m_line].second.m_lineNumber},
8616 po);
8617
8618 po.m_checkLineOnNewType = true;
8619
8620 p.reset(new Paragraph<Trait>);
8621 po.m_rawTextData.clear();
8622
8623 if (it->m_line + 1 < static_cast<long long int>(fr.m_data.size())) {
8624 p->setStartColumn(fr.m_data.at(it->m_line + 1).first.virginPos(0));
8625 p->setStartLine(fr.m_data.at(it->m_line + 1).second.m_lineNumber);
8626 }
8627
8628 po.m_line = it->m_line;
8629 po.m_pos = it->m_pos + it->m_len;
8630 } else if (p->startColumn() == -1) {
8631 p->setStartColumn(fr.m_data.at(it->m_line).first.virginPos(it->m_pos));
8632 p->setStartLine(fr.m_data.at(it->m_line).second.m_lineNumber);
8633 }
8634
8635 po.m_parent = p;
8636 } break;
8637
8638 default: {
8639 if (!po.shouldStopParsing()) {
8640 po.m_wasRefLink = false;
8641 po.m_firstInParagraph = false;
8642
8643 if (!collectRefLinks) {
8644 makeText(it->m_line, it->m_pos + it->m_len, po);
8645 } else {
8646 po.m_line = it->m_line;
8647 po.m_pos = it->m_pos + it->m_len;
8648 }
8649 }
8650 } break;
8651 }
8652
8653 if (po.shouldStopParsing()) {
8654 break;
8655 }
8656
8657 if (po.m_checkLineOnNewType) {
8658 if (po.m_line + 1 < static_cast<long long int>(po.m_fr.m_data.size())) {
8659 const auto type = Parser<Trait>::whatIsTheLine(po.m_fr.m_data[po.m_line + 1].first);
8660
8661 if (type == Parser<Trait>::BlockType::CodeIndentedBySpaces) {
8662 po.m_detected = TextParsingOpts<Trait>::Detected::Code;
8663
8664 break;
8665 }
8666 }
8667
8668 po.m_checkLineOnNewType = false;
8669 }
8670 }
8671 }
8672 } else {
8673 if (html.m_html.get() && html.m_continueHtml) {
8674 finishRawHtmlTag(delims.cend(), delims.cend(), po, false);
8675 }
8676 }
8677
8678 if (po.m_lastTextLine == -1) {
8679 checkForTableInParagraph(po, po.m_fr.m_data.size() - 1);
8680 }
8681
8682 if (po.m_detected == TextParsingOpts<Trait>::Detected::Table) {
8683 if (!collectRefLinks) {
8684 makeText(po.m_lastTextLine, po.m_lastTextPos, po);
8685 }
8686
8687 parseTableInParagraph(po, pt, doc, linksToParse, workingPath, fileName, collectRefLinks);
8688 }
8689
8690 while (po.m_detected == TextParsingOpts<Trait>::Detected::HTML &&
8691 po.m_line < static_cast<long long int>(po.m_fr.m_data.size())) {
8692 if (!isListOrQuoteAfterHtml(po)) {
8693 if (!collectRefLinks) {
8694 makeText(po.m_line, po.m_fr.m_data[po.m_line].first.length(), po);
8695 }
8696
8697 po.m_pos = 0;
8698 ++po.m_line;
8699 } else {
8700 break;
8701 }
8702 }
8703
8704 if (po.m_detected == TextParsingOpts<Trait>::Detected::Nothing &&
8705 po.m_line <= static_cast<long long int>(po.m_fr.m_data.size() - 1)) {
8706 if (!collectRefLinks) {
8707 makeText(po.m_fr.m_data.size() - 1, po.m_fr.m_data.back().first.length(), po);
8708 }
8709 }
8710
8711 if (!p->isEmpty()) {
8712 optimizeParagraph<Trait>(p, po, OptimizeParagraphType::Semi);
8713
8714 checkForTextPlugins<Trait>(p, po, m_textPlugins, inLink);
8715
8716 p = splitParagraphsAndFreeHtml(parent, p, po, collectRefLinks, m_fullyOptimizeParagraphs);
8717
8718 if (!p->isEmpty() && !collectRefLinks) {
8719 parent->appendItem(optimizeParagraph<Trait>(p, po, defaultParagraphOptimization()));
8720 }
8721
8722 po.m_rawTextData.clear();
8723 }
8724
8725 if (!pt->isEmpty() && !collectRefLinks) {
8726 parent->appendItem(pt->items().front());
8727 }
8728
8729 normalizePos(po.m_pos, po.m_line, po.m_line < static_cast<long long int>(po.m_fr.m_data.size()) ?
8730 po.m_fr.m_data[po.m_line].first.length() : 0, po.m_fr.m_data.size());
8731
8732 if (po.m_detected != TextParsingOpts<Trait>::Detected::Nothing &&
8733 po.m_line < static_cast<long long int>(po.m_fr.m_data.size())) {
8734 typename MdBlock<Trait>::Data tmp;
8735 std::copy(fr.m_data.cbegin() + po.m_line, fr.m_data.cend(), std::back_inserter(tmp));
8736
8737 StringListStream<Trait> stream(tmp);
8738
8739 Parser<Trait>::parse(stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
8740 }
8741}
8742
8743template<class Trait>
8744inline void
8745Parser<Trait>::parseFootnote(MdBlock<Trait> &fr,
8746 std::shared_ptr<Block<Trait>>,
8747 std::shared_ptr<Document<Trait>> doc,
8748 typename Trait::StringList &linksToParse,
8749 const typename Trait::String &workingPath,
8750 const typename Trait::String &fileName,
8751 bool collectRefLinks)
8752{
8753 {
8754 const auto it = (std::find_if(fr.m_data.rbegin(), fr.m_data.rend(), [](const auto &s) {
8755 return !s.first.isEmpty();
8756 })).base();
8757
8758 if (it != fr.m_data.end()) {
8759 fr.m_data.erase(it, fr.m_data.end());
8760 }
8761 }
8762
8763 if (!fr.m_data.empty()) {
8764 std::shared_ptr<Footnote<Trait>> f(new Footnote<Trait>);
8765 f->setStartColumn(fr.m_data.front().first.virginPos(0));
8766 f->setStartLine(fr.m_data.front().second.m_lineNumber);
8767 f->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
8768 f->setEndLine(fr.m_data.back().second.m_lineNumber);
8769
8770 const auto delims = collectDelimiters(fr.m_data);
8771
8772 RawHtmlBlock<Trait> html;
8773
8774 TextParsingOpts<Trait> po = {fr, f, nullptr, doc, linksToParse, workingPath, fileName,
8775 collectRefLinks, false, html, m_textPlugins};
8776 po.m_lastTextLine = fr.m_data.size();
8777 po.m_lastTextPos = fr.m_data.back().first.length();
8778
8779 if (!delims.empty() && delims.cbegin()->m_type == Delimiter::SquareBracketsOpen &&
8780 !delims.cbegin()->m_isWordBefore) {
8781 typename MdBlock<Trait>::Data id;
8782 typename Delims::const_iterator it = delims.cend();
8783
8784 po.m_line = delims.cbegin()->m_line;
8785 po.m_pos = delims.cbegin()->m_pos;
8786
8787 std::tie(id, it) = checkForLinkText(delims.cbegin(), delims.cend(), po);
8788
8789 if (!toSingleLine(id).isEmpty() &&
8790 id.front().first.asString().startsWith(Trait::latin1ToString("^")) &&
8791 it != delims.cend() &&
8792 fr.m_data.at(it->m_line).first.length() > it->m_pos + 2 &&
8793 fr.m_data.at(it->m_line).first[it->m_pos + 1] == Trait::latin1ToChar(':') &&
8794 fr.m_data.at(it->m_line).first[it->m_pos + 2].isSpace()) {
8795 f->setIdPos({fr.m_data[delims.cbegin()->m_line].first.virginPos(delims.cbegin()->m_pos),
8796 fr.m_data[delims.cbegin()->m_line].second.m_lineNumber,
8797 fr.m_data.at(it->m_line).first.virginPos(it->m_pos + 1),
8798 fr.m_data.at(it->m_line).second.m_lineNumber});
8799
8800 {
8801 typename MdBlock<Trait>::Data tmp;
8802 std::copy(fr.m_data.cbegin() + it->m_line, fr.m_data.cend(),
8803 std::back_inserter(tmp));
8804 fr.m_data = tmp;
8805 }
8806
8807 fr.m_data.front().first = fr.m_data.front().first.sliced(it->m_pos + 3);
8808
8809 for (auto it = fr.m_data.begin(), last = fr.m_data.end(); it != last; ++it) {
8810 if (it->first.asString().startsWith(Trait::latin1ToString(" "))) {
8811 it->first = it->first.sliced(4);
8812 }
8813 }
8814
8815 StringListStream<Trait> stream(fr.m_data);
8816
8817 parse(stream, f, doc, linksToParse, workingPath, fileName, collectRefLinks);
8818
8819 if (!f->isEmpty()) {
8820 doc->insertFootnote(Trait::latin1ToString("#") + toSingleLine(id) +
8821 Trait::latin1ToString("/") + (!workingPath.isEmpty() ?
8822 workingPath + Trait::latin1ToString("/") : typename Trait::String()) + fileName,
8823 f);
8824 }
8825 }
8826 }
8827 }
8828}
8829
8830template<class Trait>
8831inline void
8832Parser<Trait>::parseBlockquote(MdBlock<Trait> &fr,
8833 std::shared_ptr<Block<Trait>> parent,
8834 std::shared_ptr<Document<Trait>> doc,
8835 typename Trait::StringList &linksToParse,
8836 const typename Trait::String &workingPath,
8837 const typename Trait::String &fileName,
8838 bool collectRefLinks,
8839 RawHtmlBlock<Trait> &)
8840{
8841 const long long int pos = fr.m_data.front().first.asString().indexOf(Trait::latin1ToChar('>'));
8842 long long int extra = 0;
8843
8844 if (pos > -1) {
8845 typename Blockquote<Trait>::Delims delims;
8846
8847 long long int i = 0, j = 0;
8848
8849 BlockType bt = BlockType::EmptyLine;
8850
8851 for (auto it = fr.m_data.begin(), last = fr.m_data.end(); it != last; ++it, ++i) {
8852 const auto ns = skipSpaces<Trait>(0, it->first.asString());
8853 const auto gt = (ns < it->first.length() ? (it->first[ns] == Trait::latin1ToChar('>') ? ns : -1) : -1);
8854
8855 if (gt > -1) {
8856 const auto dp = it->first.virginPos(gt);
8857 delims.push_back({dp, it->second.m_lineNumber, dp, it->second.m_lineNumber});
8858
8859 if (it == fr.m_data.begin()) {
8860 extra = gt + (it->first.length() > gt + 1 ?
8861 (it->first[gt + 1] == Trait::latin1ToChar(' ') ? 1 : 0) : 0) + 1;
8862 }
8863
8864 it->first = it->first.sliced(gt + (it->first.length() > gt + 1 ?
8865 (it->first[gt + 1] == Trait::latin1ToChar(' ') ? 1 : 0) : 0) + 1);
8866
8867 bt = whatIsTheLine(it->first);
8868 }
8869 // Process lazyness...
8870 else {
8871 if (ns < 4 && isHorizontalLine<Trait>(it->first.asString().sliced(ns))) {
8872 break;
8873 }
8874
8875 const auto tmpBt = whatIsTheLine(it->first);
8876
8877 if (isListType(tmpBt)) {
8878 break;
8879 }
8880
8881 if (bt == BlockType::Text) {
8882 if (isH1<Trait>(it->first.asString())) {
8883 const auto p = it->first.asString().indexOf(Trait::latin1ToChar('='));
8884
8885 it->first.insert(p, Trait::latin1ToChar('\\'));
8886
8887 continue;
8888 } else if (isH2<Trait>(it->first.asString())) {
8889 const auto p = it->first.asString().indexOf(Trait::latin1ToChar('-'));
8890
8891 it->first.insert(p, Trait::latin1ToChar('\\'));
8892
8893 continue;
8894 }
8895 }
8896
8897 if ((bt == BlockType::Text || bt == BlockType::Blockquote || bt == BlockType::List)
8898 && (tmpBt == BlockType::Text || tmpBt == BlockType::CodeIndentedBySpaces)) {
8899 continue;
8900 } else {
8901 break;
8902 }
8903 }
8904 }
8905
8906 typename MdBlock<Trait>::Data tmp;
8907
8908 for (; j < i; ++j) {
8909 tmp.push_back(fr.m_data.at(j));
8910 }
8911
8912 StringListStream<Trait> stream(tmp);
8913
8914 std::shared_ptr<Blockquote<Trait>> bq(new Blockquote<Trait>);
8915 bq->setStartColumn(fr.m_data.at(0).first.virginPos(0) - extra);
8916 bq->setStartLine(fr.m_data.at(0).second.m_lineNumber);
8917 bq->setEndColumn(fr.m_data.at(j - 1).first.virginPos(fr.m_data.at(j - 1).first.length() - 1));
8918 bq->setEndLine(fr.m_data.at(j - 1).second.m_lineNumber);
8919 bq->delims() = delims;
8920
8921 parse(stream, bq, doc, linksToParse, workingPath, fileName, collectRefLinks);
8922
8923 if (!collectRefLinks) {
8924 parent->appendItem(bq);
8925 }
8926
8927 if (i < (long long int)fr.m_data.size()) {
8928 tmp.clear();
8929
8930 std::copy(fr.m_data.cbegin() + i, fr.m_data.cend(), std::back_inserter(tmp));
8931
8932 StringListStream<Trait> stream(tmp);
8933
8934 parse(stream, parent, doc, linksToParse, workingPath, fileName, collectRefLinks);
8935 }
8936 }
8937}
8938
8939//! \return Is the given string a new list item.
8940template<class Trait>
8941inline bool
8942isListItemAndNotNested(const typename Trait::String &s,
8943 long long int indent)
8944{
8945 long long int p = skipSpaces<Trait>(0, s);
8946
8947 if (p >= indent || p == s.size()) {
8948 return false;
8949 }
8950
8951 bool space = false;
8952
8953 if (p + 1 >= s.size()) {
8954 space = true;
8955 } else {
8956 space = s[p + 1].isSpace();
8957 }
8958
8959 if (p < 4) {
8960 if (s[p] == Trait::latin1ToChar('*') && space) {
8961 return true;
8962 } else if (s[p] == Trait::latin1ToChar('-') && space) {
8963 return true;
8964 } else if (s[p] == Trait::latin1ToChar('+') && space) {
8965 return true;
8966 } else {
8967 return isOrderedList<Trait>(s);
8968 }
8969 } else
8970 return false;
8971}
8972
8973//! \return Indent.
8974template<class Trait>
8975inline std::pair<long long int, long long int>
8976calculateIndent(const typename Trait::String &s,
8977 long long int p)
8978{
8979 return {0, skipSpaces<Trait>(p, s)};
8980}
8981
8982//! \return List item data.
8983template<class Trait>
8984inline std::tuple<bool, long long int, typename Trait::Char, bool>
8985listItemData(const typename Trait::String &s,
8986 bool wasText)
8987{
8988 long long int p = skipSpaces<Trait>(0, s);
8989
8990 if (p == s.size()) {
8991 return {false, 0, typename Trait::Char(), false};
8992 }
8993
8994 bool space = false;
8995
8996 if (p + 1 >= s.size()) {
8997 space = true;
8998 } else {
8999 space = s[p + 1].isSpace();
9000 }
9001
9002 if (p < 4) {
9003 if (s[p] == Trait::latin1ToChar('*') && space) {
9004 return {true, p + 2, Trait::latin1ToChar('*'),
9005 p + 2 < s.size() ? !s.sliced(p + 2).isEmpty() : false};
9006 } else if (s[p] == Trait::latin1ToChar('-')) {
9007 if (isH2<Trait>(s) && wasText) {
9008 return {false, p + 2, Trait::latin1ToChar('-'), false};
9009 } else if (space) {
9010 return {true, p + 2, Trait::latin1ToChar('-'),
9011 p + 2 < s.size() ? !s.sliced(p + 2).isEmpty() : false};
9012 }
9013 } else if (s[p] == Trait::latin1ToChar('+') && space) {
9014 return {true, p + 2, Trait::latin1ToChar('+'),
9015 p + 2 < s.size() ? !s.sliced(p + 2).isEmpty() : false};
9016 } else {
9017 int d = 0, l = 0;
9018 typename Trait::Char c;
9019
9020 if (isOrderedList<Trait>(s, &d, &l, &c)) {
9021 return {true, p + l + 2, c,
9022 p + l + 2 < s.size() ? !s.sliced(p + l + 2).isEmpty() : false};
9023 } else {
9024 return {false, 0, typename Trait::Char(), false};
9025 }
9026 }
9027 }
9028
9029 return {false, 0, typename Trait::Char(), false};
9030}
9031
9032//! Set last position of the item.
9033template<class Trait>
9034inline void
9035setLastPos(std::shared_ptr<Item<Trait>> item,
9036 long long int pos,
9037 long long int line)
9038{
9039 item->setEndColumn(pos);
9040 item->setEndLine(line);
9041}
9042
9043//! Update last position of all parent.
9044template<class Trait>
9045inline void
9047{
9048 if (html.m_parent != html.m_topParent) {
9049 const auto it = html.m_toAdjustLastPos.find(html.m_parent);
9050
9051 if (it != html.m_toAdjustLastPos.end()) {
9052 for (auto &i : it->second) {
9053 i.first->setEndColumn(html.m_html->endColumn());
9054 i.first->setEndLine(html.m_html->endLine());
9055 }
9056 }
9057 }
9058}
9059
9060template<class Trait>
9061inline long long int
9062Parser<Trait>::parseList(MdBlock<Trait> &fr,
9063 std::shared_ptr<Block<Trait>> parent,
9064 std::shared_ptr<Document<Trait>> doc,
9065 typename Trait::StringList &linksToParse,
9066 const typename Trait::String &workingPath,
9067 const typename Trait::String &fileName,
9068 bool collectRefLinks,
9069 RawHtmlBlock<Trait> &html)
9070{
9071 bool resetTopParent = false;
9072 long long int line = -1;
9073
9074 if (!html.m_topParent) {
9075 html.m_topParent = parent;
9076 resetTopParent = true;
9077 }
9078
9079 const auto p = skipSpaces<Trait>(0, fr.m_data.front().first.asString());
9080
9081 if (p != fr.m_data.front().first.length()) {
9082 std::shared_ptr<List<Trait>> list(new List<Trait>);
9083
9084 typename MdBlock<Trait>::Data listItem;
9085 auto it = fr.m_data.begin();
9086 listItem.push_back(*it);
9087 list->setStartColumn(it->first.virginPos(p));
9088 list->setStartLine(it->second.m_lineNumber);
9089 ++it;
9090
9091 long long int indent = 0;
9092 typename Trait::Char marker;
9093
9094 std::tie(std::ignore, indent, marker, std::ignore) =
9095 listItemData<Trait>(listItem.front().first.asString(), false);
9096
9097 html.m_blocks.push_back({list, list->startColumn() + indent});
9098
9099 if (!collectRefLinks) {
9100 html.m_toAdjustLastPos.insert({list, html.m_blocks});
9101 }
9102
9103 bool updateIndent = false;
9104
9105 auto addListMakeNew = [&]() {
9106 if (!list->isEmpty() && !collectRefLinks) {
9107 parent->appendItem(list);
9108 }
9109
9110 html.m_blocks.pop_back();
9111
9112 list.reset(new List<Trait>);
9113
9114 html.m_blocks.push_back({list, indent});
9115
9116 if (!collectRefLinks) {
9117 html.m_toAdjustLastPos.insert({list, html.m_blocks});
9118 }
9119 };
9120
9121 auto processLastHtml = [&](std::shared_ptr<ListItem<Trait>> resItem) {
9122 if (html.m_html && resItem) {
9123 auto htmlParent = (resItem->startLine() == html.m_html->startLine() ||
9124 html.m_html->startColumn() >= resItem->startColumn() + indent ?
9125 resItem : html.findParent(html.m_html->startColumn()));
9126
9127 if (!htmlParent) {
9128 htmlParent = html.m_topParent;
9129 }
9130
9131 if (htmlParent != resItem) {
9132 addListMakeNew();
9133 }
9134
9135 if (!collectRefLinks) {
9136 htmlParent->appendItem(html.m_html);
9137 updateLastPosInList<Trait>(html);
9138 }
9139
9140 resetHtmlTag<Trait>(html);
9141 }
9142 };
9143
9144 auto processListItem = [&]() {
9145 MdBlock<Trait> block = {listItem, 0};
9146
9147 std::shared_ptr<ListItem<Trait>> resItem;
9148
9149 line = parseListItem(block, list, doc, linksToParse, workingPath, fileName,
9150 collectRefLinks, html, &resItem);
9151 listItem.clear();
9152
9153 processLastHtml(resItem);
9154 };
9155
9156 for (auto last = fr.m_data.end(); it != last; ++it) {
9157 if (updateIndent) {
9158 std::tie(std::ignore, indent, marker, std::ignore) =
9159 listItemData<Trait>(it->first.asString(), false);
9160
9161 if (!collectRefLinks) {
9162 html.m_blocks.back().second = indent;
9163 }
9164
9165 updateIndent = false;
9166 }
9167
9168 const auto ns = skipSpaces<Trait>(0, it->first.asString());
9169
9170 if (isH1<Trait>(it->first.asString().sliced(ns)) && ns < indent && !listItem.empty()) {
9171 const auto p = it->first.asString().indexOf(Trait::latin1ToChar('='));
9172
9173 it->first.insert(p, Trait::latin1ToChar('\\'));
9174 } else if (isHorizontalLine<Trait>(it->first.asString().sliced(ns)) &&
9175 ns < indent && !listItem.empty()) {
9176 updateIndent = true;
9177
9178 processListItem();
9179
9180 if (!list->isEmpty()) {
9181 addListMakeNew();
9182 }
9183
9184 if (!collectRefLinks) {
9185 makeHorLine<Trait>(*it, parent);
9186 }
9187
9188 continue;
9189 } else if (isListItemAndNotNested<Trait>(it->first.asString(), indent) &&
9190 !listItem.empty()) {
9191 typename Trait::Char tmpMarker;
9192 std::tie(std::ignore, indent, tmpMarker, std::ignore) =
9193 listItemData<Trait>(it->first.asString(), false);
9194
9195 processListItem();
9196
9197 if (tmpMarker != marker) {
9198 if (!list->isEmpty()) {
9199 addListMakeNew();
9200 }
9201
9202 marker = tmpMarker;
9203 }
9204 }
9205
9206 if (line > 0) {
9207 break;
9208 }
9209
9210 listItem.push_back(*it);
9211
9212 if (list->startColumn() == -1) {
9213 list->setStartColumn(
9214 it->first.virginPos(std::min(it->first.length() ?
9215 it->first.length() - 1 : 0, skipSpaces<Trait>(0, it->first.asString()))));
9216 list->setStartLine(it->second.m_lineNumber);
9217
9218 if (!collectRefLinks) {
9219 html.m_blocks.back().second += list->startColumn();
9220 }
9221 }
9222 }
9223
9224 if (!listItem.empty()) {
9225 MdBlock<Trait> block = {listItem, 0};
9226 line = parseListItem(block, list, doc, linksToParse, workingPath, fileName,
9227 collectRefLinks, html);
9228 }
9229
9230 if (!list->isEmpty() && !collectRefLinks) {
9231 parent->appendItem(list);
9232 }
9233
9234 html.m_blocks.pop_back();
9235 }
9236
9237 if (resetTopParent) {
9238 html.m_topParent.reset();
9239 }
9240
9241 return line;
9242}
9243
9244template<class Trait>
9245inline long long int
9246Parser<Trait>::parseListItem(MdBlock<Trait> &fr,
9247 std::shared_ptr<Block<Trait>> parent,
9248 std::shared_ptr<Document<Trait>> doc,
9249 typename Trait::StringList &linksToParse,
9250 const typename Trait::String &workingPath,
9251 const typename Trait::String &fileName,
9252 bool collectRefLinks,
9253 RawHtmlBlock<Trait> &html,
9254 std::shared_ptr<ListItem<Trait>> *resItem)
9255{
9256 {
9257 const auto it = (std::find_if(fr.m_data.rbegin(), fr.m_data.rend(), [](const auto &s) {
9258 return !s.first.isEmpty();
9259 })).base();
9260
9261 if (it != fr.m_data.end()) {
9262 fr.m_data.erase(it, fr.m_data.end());
9263 }
9264 }
9265
9266 const auto p = skipSpaces<Trait>(0, fr.m_data.front().first.asString());
9267
9268 std::shared_ptr<ListItem<Trait>> item(new ListItem<Trait>);
9269
9270 item->setStartColumn(fr.m_data.front().first.virginPos(p));
9271 item->setStartLine(fr.m_data.front().second.m_lineNumber);
9272
9273 int i = 0, len = 0;
9274
9275 if (isOrderedList<Trait>(fr.m_data.front().first.asString(), &i, &len)) {
9276 item->setListType(ListItem<Trait>::Ordered);
9277 item->setStartNumber(i);
9278 item->setDelim({item->startColumn(), item->startLine(), item->startColumn() + len, item->startLine()});
9279 } else {
9280 item->setListType(ListItem<Trait>::Unordered);
9281 item->setDelim({item->startColumn(), item->startLine(), item->startColumn(), item->startLine()});
9282 }
9283
9284 if (item->listType() == ListItem<Trait>::Ordered) {
9285 item->setOrderedListPreState(i == 1 ? ListItem<Trait>::Start : ListItem<Trait>::Continue);
9286 }
9287
9288 typename MdBlock<Trait>::Data data;
9289
9290 auto it = fr.m_data.begin();
9291 ++it;
9292
9293 int pos = 1;
9294
9295 long long int indent = 0;
9296 bool wasText = false;
9297
9298 std::tie(std::ignore, indent, std::ignore, wasText) =
9299 listItemData<Trait>(fr.m_data.front().first.asString(), wasText);
9300
9301 html.m_blocks.push_back({item, item->startColumn() + indent});
9302
9303 if (!collectRefLinks) {
9304 html.m_toAdjustLastPos.insert({item, html.m_blocks});
9305 }
9306
9307 const auto firstNonSpacePos = calculateIndent<Trait>(
9308 fr.m_data.front().first.asString(), indent).second;
9309
9310 if (firstNonSpacePos - indent < 4) {
9311 indent = firstNonSpacePos;
9312 }
9313
9314 if (indent < fr.m_data.front().first.length()) {
9315 data.push_back({fr.m_data.front().first.right(fr.m_data.front().first.length() - indent),
9316 fr.m_data.front().second});
9317 }
9318
9319 bool taskList = false;
9320 bool checked = false;
9321
9322 if (!data.empty()) {
9323 auto p = skipSpaces<Trait>(0, data.front().first.asString());
9324
9325 if (p < data.front().first.length()) {
9326 if (data.front().first[p] == Trait::latin1ToChar('[')) {
9327 const auto startTaskDelimPos = data.front().first.virginPos(p);
9328
9329 ++p;
9330
9331 if (p < data.front().first.length()) {
9332 if (data.front().first[p] == Trait::latin1ToChar(' ') ||
9333 data.front().first[p].toLower() == Trait::latin1ToChar('x')) {
9334 if (data.front().first[p].toLower() == Trait::latin1ToChar('x')) {
9335 checked = true;
9336 }
9337
9338 ++p;
9339
9340 if (p < data.front().first.length()) {
9341 if (data.front().first[p] == Trait::latin1ToChar(']')) {
9342 item->setTaskDelim({startTaskDelimPos, item->startLine(), data.front().first.virginPos(p), item->startLine()});
9343
9344 taskList = true;
9345
9346 data[0].first = data[0].first.sliced(p + 1);
9347 }
9348 }
9349 }
9350 }
9351 }
9352 }
9353 }
9354
9355 if (taskList) {
9356 item->setTaskList();
9357 item->setChecked(checked);
9358 }
9359
9360 bool fensedCode = false;
9361 typename Trait::String startOfCode;
9362 bool wasEmptyLine = false;
9363
9364 std::vector<std::pair<RawHtmlBlock<Trait>, long long int>> htmlToAdd;
9365 long long int line = -1;
9366
9367 auto parseStream = [&] (StringListStream<Trait> &stream)
9368 {
9369 const auto tmpHtml = html;
9370 html = parse(stream, item, doc, linksToParse, workingPath, fileName, collectRefLinks, false, true);
9371 html.m_topParent = tmpHtml.m_topParent;
9372 html.m_blocks = tmpHtml.m_blocks;
9373 html.m_toAdjustLastPos = tmpHtml.m_toAdjustLastPos;
9374 };
9375
9376 for (auto last = fr.m_data.end(); it != last; ++it, ++pos) {
9377 if (!fensedCode) {
9378 fensedCode = isCodeFences<Trait>(it->first.asString().startsWith(
9379 typename Trait::String(indent, Trait::latin1ToChar(' '))) ?
9380 it->first.asString().sliced(indent) : it->first.asString());
9381
9382 if (fensedCode) {
9383 startOfCode = startSequence<Trait>(it->first.asString());
9384 }
9385 } else if (fensedCode &&
9386 isCodeFences<Trait>(it->first.asString().startsWith(
9387 typename Trait::String(indent, Trait::latin1ToChar(' '))) ?
9388 it->first.asString().sliced(indent) : it->first.asString(),
9389 true) && startSequence<Trait>(it->first.asString()).contains(startOfCode)) {
9390 fensedCode = false;
9391 }
9392
9393 if (!fensedCode) {
9394 long long int newIndent = 0;
9395 bool ok = false;
9396
9397 std::tie(ok, newIndent, std::ignore, wasText) = listItemData<Trait>(
9398 it->first.asString().startsWith(typename Trait::String(indent, Trait::latin1ToChar(' '))) ?
9399 it->first.asString().sliced(indent) : it->first.asString(),
9400 wasText);
9401
9402 if (ok) {
9403 StringListStream<Trait> stream(data);
9404
9405 parseStream(stream);
9406
9407 data.clear();
9408
9409 if (html.m_html.get()) {
9410 html.m_parent = html.findParent(html.m_html->startColumn());
9411
9412 if (!html.m_parent) {
9413 html.m_parent = html.m_topParent;
9414 }
9415
9416 if (html.m_continueHtml) {
9417 MdBlock<Trait> tmp;
9418 tmp.m_emptyLineAfter = fr.m_emptyLineAfter;
9419 std::copy(it, last, std::back_inserter(tmp.m_data));
9420
9421 parseText(tmp, html.m_parent, doc, linksToParse, workingPath, fileName,
9422 collectRefLinks, html);
9423
9424 break;
9425 }
9426
9427 htmlToAdd.push_back({html, html.m_parent->items().size()});
9428 updateLastPosInList<Trait>(html);
9429 resetHtmlTag<Trait>(html);
9430 }
9431
9432 if (!htmlToAdd.empty() && htmlToAdd.back().first.m_parent == html.m_topParent) {
9433 line = it->second.m_lineNumber;
9434
9435 break;
9436 } else {
9437 typename MdBlock<Trait>::Data nestedList;
9438 nestedList.push_back(*it);
9439 ++it;
9440
9441 wasEmptyLine = false;
9442
9443 for (; it != last; ++it) {
9444 const auto ns = skipSpaces<Trait>(0, it->first.asString());
9445 std::tie(ok, std::ignore, std::ignore, wasText) =
9446 listItemData<Trait>((ns >= indent ? it->first.asString().sliced(indent) :
9447 it->first.asString()), wasText);
9448
9449 if (ok) {
9450 wasEmptyLine = false;
9451 }
9452
9453 if (ok || ns >= indent + newIndent || ns == it->first.length() || !wasEmptyLine) {
9454 nestedList.push_back(*it);
9455 } else {
9456 break;
9457 }
9458
9459 wasEmptyLine = (ns == it->first.length());
9460
9461 wasText = (wasEmptyLine ? false : wasText);
9462 }
9463
9464 for (auto it = nestedList.begin(), last = nestedList.end(); it != last; ++it) {
9465 it->first = it->first.sliced(std::min(skipSpaces<Trait>(
9466 0, it->first.asString()), indent));
9467 }
9468
9469 while (!nestedList.empty() &&
9470 nestedList.back().first.asString().isEmpty()) {
9471 nestedList.pop_back();
9472 }
9473
9474 MdBlock<Trait> block = {nestedList, 0};
9475
9476 line = parseList(block, item, doc, linksToParse, workingPath, fileName,
9477 collectRefLinks, html);
9478
9479 if (line >= 0) {
9480 break;
9481 }
9482
9483 for (; it != last; ++it) {
9484 if (it->first.asString().startsWith(typename Trait::String(
9485 indent, Trait::latin1ToChar(' ')))) {
9486 it->first = it->first.sliced(indent);
9487 }
9488
9489 data.push_back(*it);
9490 }
9491
9492 break;
9493 }
9494 } else {
9495 if (it->first.asString().startsWith(typename Trait::String(
9496 indent, Trait::latin1ToChar(' ')))) {
9497 it->first = it->first.sliced(indent);
9498 }
9499
9500 data.push_back(*it);
9501
9502 wasEmptyLine = (skipSpaces<Trait>(0, it->first.asString()) == it->first.length());
9503
9504 wasText = !wasEmptyLine;
9505 }
9506 } else {
9507 if (it->first.asString().startsWith(typename Trait::String(
9508 indent, Trait::latin1ToChar(' ')))) {
9509 it->first = it->first.sliced(indent);
9510 }
9511
9512 data.push_back(*it);
9513 }
9514 }
9515
9516 if (!data.empty()) {
9517 StringListStream<Trait> stream(data);
9518
9519 parseStream(stream);
9520
9521 if (html.m_html) {
9522 html.m_parent = html.findParent(html.m_html->startColumn());
9523
9524 if (!html.m_parent) {
9525 html.m_parent = html.m_topParent;
9526 }
9527 }
9528 }
9529
9530 if (!collectRefLinks) {
9531 parent->appendItem(item);
9532
9533 long long int i = 0;
9534
9535 for (auto &h : htmlToAdd) {
9536 if (h.first.m_parent != h.first.m_topParent) {
9537 h.first.m_parent->insertItem(h.second + i, h.first.m_html);
9538
9539 ++i;
9540
9541 updateLastPosInList(h.first);
9542 } else {
9543 html = h.first;
9544
9545 break;
9546 }
9547 }
9548
9549 long long int htmlStartColumn = -1;
9550 long long int htmlStartLine = -1;
9551
9552 if (html.m_html) {
9553 std::tie(htmlStartColumn, htmlStartLine) =
9554 localPosFromVirgin<Trait>(fr, html.m_html->startColumn(), html.m_html->startLine());
9555 }
9556
9557 long long int localLine = (html.m_html ? htmlStartLine : fr.m_data.size() - 1);
9558
9559 if (html.m_html) {
9560 if (skipSpaces<Trait>(0, fr.m_data[localLine].first.asString()) >= htmlStartColumn) {
9561 --localLine;
9562 }
9563 }
9564
9565 const auto lastLine = fr.m_data[localLine].second.m_lineNumber;
9566
9567 const auto lastColumn = fr.m_data[localLine].first.virginPos(
9568 fr.m_data[localLine].first.length() ? fr.m_data[localLine].first.length() - 1 : 0);
9569
9570 item->setEndColumn(lastColumn);
9571 item->setEndLine(lastLine);
9572 parent->setEndColumn(lastColumn);
9573 parent->setEndLine(lastLine);
9574 }
9575
9576 if (resItem) {
9577 *resItem = item;
9578 }
9579
9580 html.m_blocks.pop_back();
9581
9582 return line;
9583}
9584
9585template<class Trait>
9586inline void
9587Parser<Trait>::parseCode(MdBlock<Trait> &fr,
9588 std::shared_ptr<Block<Trait>> parent,
9589 bool collectRefLinks)
9590{
9591 if (!collectRefLinks) {
9592 const auto indent = skipSpaces<Trait>(0, fr.m_data.front().first.asString());
9593
9594 if (indent != fr.m_data.front().first.length()) {
9595 WithPosition startDelim, endDelim, syntaxPos;
9596 typename Trait::String syntax;
9597 isStartOfCode<Trait>(fr.m_data.front().first.asString(), &syntax, &startDelim, &syntaxPos);
9598 syntax = replaceEntity<Trait>(syntax);
9599 startDelim.setStartLine(fr.m_data.front().second.m_lineNumber);
9600 startDelim.setEndLine(startDelim.startLine());
9601 startDelim.setStartColumn(fr.m_data.front().first.virginPos(startDelim.startColumn()));
9602 startDelim.setEndColumn(fr.m_data.front().first.virginPos(startDelim.endColumn()));
9603
9604 if (syntaxPos.startColumn() != -1) {
9605 syntaxPos.setStartLine(startDelim.startLine());
9606 syntaxPos.setEndLine(startDelim.startLine());
9607 syntaxPos.setStartColumn(fr.m_data.front().first.virginPos(syntaxPos.startColumn()));
9608 syntaxPos.setEndColumn(fr.m_data.front().first.virginPos(syntaxPos.endColumn()));
9609 }
9610
9611 const long long int startPos = fr.m_data.front().first.virginPos(indent);
9612 const long long int emptyColumn = fr.m_data.front().first.virginPos(fr.m_data.front().first.length());
9613 const long long int startLine = fr.m_data.front().second.m_lineNumber;
9614 const long long int endPos = fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1);
9615 const long long int endLine = fr.m_data.back().second.m_lineNumber;
9616
9617 fr.m_data.erase(fr.m_data.cbegin());
9618
9619 {
9620 const auto it = std::prev(fr.m_data.cend());
9621
9622 if (it->second.m_lineNumber > -1) {
9623 endDelim.setStartColumn(it->first.virginPos(skipSpaces<Trait>(0, it->first.asString())));
9624 endDelim.setStartLine(it->second.m_lineNumber);
9625 endDelim.setEndLine(endDelim.startLine());
9626 endDelim.setEndColumn(it->first.virginPos(it->first.length() - 1));
9627 }
9628
9629 fr.m_data.erase(it);
9630 }
9631
9632 if (syntax.toLower() == Trait::latin1ToString("math")) {
9633 typename Trait::String math;
9634 bool first = true;
9635
9636 for (const auto &l : std::as_const(fr.m_data)) {
9637 if (!first) {
9638 math.push_back(Trait::latin1ToChar('\n'));
9639 }
9640
9641 math.push_back(l.first.virginSubString());
9642
9643 first = false;
9644 }
9645
9646 if (!collectRefLinks) {
9647 std::shared_ptr<Paragraph<Trait>> p(new Paragraph<Trait>);
9648 p->setStartColumn(startPos);
9649 p->setStartLine(startLine);
9650 p->setEndColumn(endPos);
9651 p->setEndLine(endLine);
9652
9653 std::shared_ptr<Math<Trait>> m(new Math<Trait>);
9654
9655 if (!fr.m_data.empty()) {
9656 m->setStartColumn(fr.m_data.front().first.virginPos(0));
9657 m->setStartLine(fr.m_data.front().second.m_lineNumber);
9658 m->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
9659 m->setEndLine(fr.m_data.back().second.m_lineNumber);
9660 } else {
9661 m->setStartColumn(emptyColumn);
9662 m->setStartLine(startLine);
9663 m->setEndColumn(emptyColumn);
9664 m->setEndLine(startLine);
9665 }
9666
9667 m->setInline(false);
9668 m->setExpr(math);
9669 m->setStartDelim(startDelim);
9670 m->setEndDelim(endDelim);
9671 m->setSyntaxPos(syntaxPos);
9672 m->setFensedCode(true);
9673 p->appendItem(m);
9674
9675 parent->appendItem(p);
9676 }
9677 } else {
9678 parseCodeIndentedBySpaces(fr, parent, collectRefLinks, indent, syntax, emptyColumn,
9679 startLine, true, startDelim, endDelim, syntaxPos);
9680 }
9681 }
9682 }
9683}
9684
9685template<class Trait>
9686inline void
9687Parser<Trait>::parseCodeIndentedBySpaces(MdBlock<Trait> &fr,
9688 std::shared_ptr<Block<Trait>> parent,
9689 bool collectRefLinks,
9690 int indent,
9691 const typename Trait::String &syntax,
9692 long long int emptyColumn,
9693 long long int startLine,
9694 bool fensedCode,
9695 const WithPosition &startDelim,
9696 const WithPosition &endDelim,
9697 const WithPosition &syntaxPos)
9698{
9699 if (!collectRefLinks) {
9700 typename Trait::String code;
9701 long long int startPos = 0;
9702 bool first = true;
9703
9704 for (const auto &l : std::as_const(fr.m_data)) {
9705 const auto ns = skipSpaces<Trait>(0, l.first.asString());
9706 if (first) {
9707 startPos = ns;
9708 }
9709 first = false;
9710
9711 code.push_back((indent > 0 ? l.first.virginSubString(ns < indent ? ns : indent) +
9712 typename Trait::String(Trait::latin1ToChar('\n')) :
9713 typename Trait::String(l.first.virginSubString()) +
9714 typename Trait::String(Trait::latin1ToChar('\n'))));
9715 }
9716
9717 if (!code.isEmpty()) {
9718 code.remove(code.length() - 1, 1);
9719 }
9720
9721 std::shared_ptr<Code<Trait>> codeItem(new Code<Trait>(code, fensedCode, false));
9722 codeItem->setSyntax(syntax);
9723 codeItem->setStartDelim(startDelim);
9724 codeItem->setEndDelim(endDelim);
9725 codeItem->setSyntaxPos(syntaxPos);
9726
9727 if (!fr.m_data.empty()) {
9728 codeItem->setStartColumn(fr.m_data.front().first.virginPos(startPos));
9729 codeItem->setStartLine(fr.m_data.front().second.m_lineNumber);
9730 codeItem->setEndColumn(fr.m_data.back().first.virginPos(fr.m_data.back().first.length() - 1));
9731 codeItem->setEndLine(fr.m_data.back().second.m_lineNumber);
9732 } else {
9733 codeItem->setStartColumn(emptyColumn);
9734 codeItem->setStartLine(startLine);
9735 codeItem->setEndColumn(emptyColumn);
9736 codeItem->setEndLine(startLine);
9737 }
9738
9739 if (fensedCode) {
9740 parent->appendItem(codeItem);
9741 } else if (!parent->items().empty() && parent->items().back()->type() == ItemType::Code) {
9742 auto c = std::static_pointer_cast<Code<Trait>>(parent->items().back());
9743
9744 if (!c->isFensedCode()) {
9745 auto line = c->endLine();
9746 auto text = c->text();
9747
9748 for (; line < codeItem->startLine(); ++line) {
9749 text.push_back(Trait::latin1ToString("\n"));
9750 }
9751
9752 text.push_back(codeItem->text());
9753 c->setText(text);
9754 c->setEndColumn(codeItem->endColumn());
9755 c->setEndLine(codeItem->endLine());
9756 } else {
9757 parent->appendItem(codeItem);
9758 }
9759 } else {
9760 parent->appendItem(codeItem);
9761 }
9762 }
9763}
9764
9765} /* namespace MD */
9766
9767#endif // MD4QT_MD_PARSER_HPP_INCLUDED
Abstract block (storage of child items).
Definition doc.h:603
const Items & items() const
Definition doc.h:629
Blockquote.
Definition doc.h:836
Code.
Definition doc.h:1269
Document.
Definition doc.h:1774
Footnote.
Definition doc.h:1727
Heading.
Definition doc.h:710
Horizontal line.
Definition doc.h:364
Image.
Definition doc.h:1183
Base class for items that can have style options.
Definition doc.h:259
void setOpts(int o)
Set style options.
Definition doc.h:287
const Styles & closeStyles() const
Definition doc.h:305
const Styles & openStyles() const
Definition doc.h:293
int opts() const
Definition doc.h:281
typename Trait::template Vector< StyleDelim > Styles
Type of list of emphasis.
Definition doc.h:278
Base class for item in Markdown document.
Definition doc.h:177
virtual ItemType type() const =0
Line break.
Definition doc.h:570
List.
Definition doc.h:1039
Paragraph.
Definition doc.h:679
Markdown parser.
Definition parser.h:1422
void removeTextPlugin(int id)
Remove text plugin.
Definition parser.h:1483
friend struct PrivateAccess
Used in tests.
Definition parser.h:2075
~Parser()=default
void addTextPlugin(int id, TextPluginFunc< Trait > plugin, bool processInLinks, const typename Trait::StringList &userData)
Add text plugin.
Definition parser.h:1468
std::shared_ptr< Document< Trait > > parse(const typename Trait::String &fileName, bool recursive=true, const typename Trait::StringList &ext={Trait::latin1ToString("md"), Trait::latin1ToString("markdown")}, bool fullyOptimizeParagraphs=true)
Definition parser.h:2091
Raw HTML.
Definition doc.h:440
Wrapper for typename Trait::StringList to be behaved like a stream.
Definition parser.h:272
Trait::InternalString lineAt(long long int pos)
Definition parser.h:292
bool atEnd() const
Definition parser.h:280
long long int size() const
Definition parser.h:296
StringListStream(typename MdBlock< Trait >::Data &stream)
Definition parser.h:274
Trait::InternalString readLine()
Definition parser.h:284
long long int currentLineNumber() const
Definition parser.h:288
Emphasis in the Markdown document.
Definition doc.h:216
Table.
Definition doc.h:1564
Alignment
Alignment.
Definition doc.h:1610
@ AlignCenter
Center.
Definition doc.h:1616
@ AlignLeft
Left.
Definition doc.h:1612
@ AlignRight
Right.
Definition doc.h:1614
Text item in Paragraph.
Definition doc.h:513
Base for any thing with start and end position.
Definition doc.h:76
void setEndColumn(long long int c)
Set end column.
Definition doc.h:137
long long int startColumn() const
Definition doc.h:101
void setStartColumn(long long int c)
Set start column.
Definition doc.h:125
long long int startLine() const
Definition doc.h:107
long long int endColumn() const
Definition doc.h:113
long long int endLine() const
Definition doc.h:119
Q_SCRIPTABLE Q_NOREPLY void start()
QAction * end(const QObject *recvr, const char *slot, QObject *parent)
KIOCORE_EXPORT CopyJob * link(const QList< QUrl > &src, const QUrl &destDir, JobFlags flags=DefaultFlags)
QString path(const QString &relativePath)
VehicleSection::Type type(QStringView coachNumber, QStringView coachClassification)
KIOCORE_EXPORT QStringList list(const QString &fileClass)
KGuiItem open()
KGuiItem quit()
KGuiItem back(BidiMode useBidi=IgnoreRTL)
QString label(StandardShortcut id)
Definition algo.h:17
bool isSkipAllEmphasis(const std::vector< std::pair< std::pair< long long int, bool >, int > > &s, size_t idx)
Definition parser.h:7444
TextOption
Text option.
Definition doc.h:199
@ ItalicText
Italic text.
Definition doc.h:205
@ StrikethroughText
Strikethrough.
Definition doc.h:207
@ TextWithoutFormat
No format.
Definition doc.h:201
@ BoldText
Bold text.
Definition doc.h:203
void closeStyle(std::vector< std::pair< Style, long long int > > &styles, Style s)
Close style.
Definition parser.h:7109
bool isOrderedList(const typename Trait::String &s, int *num=nullptr, int *len=nullptr, typename Trait::Char *delim=nullptr, bool *isFirstLineEmpty=nullptr)
Definition parser.h:141
Trait::String paragraphToLabel(Paragraph< Trait > *p)
Convert Paragraph to label.
Definition parser.h:3564
std::pair< long long int, long long int > prevPosition(const MdBlock< Trait > &fr, long long int pos, long long int line)
Definition parser.h:4029
std::pair< long long int, long long int > nextPosition(const MdBlock< Trait > &fr, long long int pos, long long int line)
Definition parser.h:4052
bool checkForEndHtmlComments(const typename Trait::String &line, long long int pos)
Definition parser.h:2318
bool isH1(const typename Trait::String &s)
Definition parser.h:4013
bool isEmail(const typename Trait::String &url)
Definition parser.h:1119
bool isLineBreak(const typename Trait::String &s)
Definition parser.h:4319
TextOption styleToTextOption(Style s)
Definition parser.h:893
std::shared_ptr< Text< Trait > > concatenateText(typename Block< Trait >::Items::const_iterator it, typename Block< Trait >::Items::const_iterator last)
Concatenate texts in block.
Definition parser.h:7799
bool isH(const typename Trait::String &s, const typename Trait::Char &c)
Definition parser.h:3980
std::tuple< bool, long long int, typename Trait::Char, bool > listItemData(const typename Trait::String &s, bool wasText)
Definition parser.h:8985
bool isClosingStyle(const std::vector< std::pair< Style, long long int > > &styles, Style s)
Definition parser.h:7097
bool isSemiOptimization(OptimizeParagraphType t)
Definition parser.h:7839
long long int skipSpaces(long long int i, const typename Trait::String &line)
Skip spaces in line from position i.
Definition parser.h:71
Trait::InternalString prepareTableData(typename Trait::InternalString s)
Prepare data in table cell for parsing.
Definition parser.h:3772
void makeTextObject(const typename Trait::String &text, TextParsingOpts< Trait > &po, long long int startPos, long long int startLine, long long int endPos, long long int endLine, bool doRemoveSpacesAtEnd=false)
Make text item.
Definition parser.h:4358
static const Trait::String s_canBeEscaped
Characters that can be escaped.
Definition parser.h:484
void collectDelimiterVariants(std::vector< std::vector< std::pair< std::pair< long long int, bool >, int > > > &vars, long long int itLength, int type, bool leftFlanking, bool rightFlanking)
Make variants of emphasies.
Definition parser.h:7202
int isTableHeader(const typename Trait::String &s)
Definition parser.h:3448
void initLastItemWithOpts(TextParsingOpts< Trait > &po, std::shared_ptr< ItemWithOpts< Trait > > item)
Initialize item with style information and set it as last item.
Definition parser.h:4347
std::tuple< long long int, long long int, bool, typename Trait::String, long long int > readLinkDestination(long long int line, long long int pos, const TextParsingOpts< Trait > &po, WithPosition *urlPos=nullptr)
Read link's destination.
Definition parser.h:6498
Trait::StringList splitString(const typename Trait::String &str, const typename Trait::Char &ch)
Split string.
void removeSpacesAtEnd(String &s)
Remove spaces at the end of string s.
Definition parser.h:86
bool isListItemAndNotNested(const typename Trait::String &s, long long int indent)
Definition parser.h:8942
std::pair< bool, bool > readUnquotedHtmlAttrValue(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr)
Read HTML attribute value.
Definition parser.h:4606
void resetHtmlTag(RawHtmlBlock< Trait > &html)
Reset pre-stored HTML.
Definition parser.h:228
static const char * s_startComment
Starting HTML comment string.
Definition parser.h:47
long long int processGitHubAutolinkExtension(std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, long long int idx)
Process GitHub autolinks for the text with index idx.
Definition parser.h:1268
void setLastPos(std::shared_ptr< Item< Trait > > item, long long int pos, long long int line)
Set last position of the item.
Definition parser.h:9035
std::shared_ptr< Paragraph< Trait > > optimizeParagraph(std::shared_ptr< Paragraph< Trait > > &p, TextParsingOpts< Trait > &po, OptimizeParagraphType type=OptimizeParagraphType::Full)
Optimize Paragraph.
Definition parser.h:7868
void appendPossibleDelimiter(std::vector< std::vector< std::pair< std::pair< long long int, bool >, int > > > &vars, long long int len, int type, bool leftAndRight)
Append possible emphasis.
Definition parser.h:7152
WithPosition findAndRemoveClosingSequence(typename Trait::InternalString &s)
Find and remove closing sequence of "#" in heading.
Definition parser.h:3619
std::shared_ptr< Paragraph< Trait > > splitParagraphsAndFreeHtml(std::shared_ptr< Block< Trait > > parent, std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, bool collectRefLinks, bool fullyOptimizeParagraphs=true)
Split Paragraph and free HTML.
Definition parser.h:8071
bool isFootnote(const typename Trait::String &s)
Definition parser.h:352
void githubAutolinkPlugin(std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, const typename Trait::StringList &)
GitHub autolinks plugin.
Definition parser.h:1400
void replaceTabs(typename Trait::InternalString &s)
Replace tabs with spaces (just for internal simpler use).
Definition parser.h:2448
bool isCodeFences(const typename Trait::String &s, bool closing=false)
Definition parser.h:392
long long int lineBreakLength(const typename Trait::String &s)
Definition parser.h:4327
bool indentInList(const std::vector< long long int > *indents, long long int indent, bool codeIndentedBySpaces)
Definition parser.h:51
std::pair< long long int, long long int > localPosFromVirgin(const MdBlock< Trait > &fr, long long int virginColumn, long long int virginLine)
Definition parser.h:1082
OptimizeParagraphType
Type of the paragraph's optimization.
Definition parser.h:845
@ Semi
Semi optimization, optimization won't concatenate text items if style delimiters will be in the middl...
@ SemiWithoutRawData
Semi optimization, but raw text data won't be concatenated (will be untouched).
@ Full
Full optimization.
@ FullWithoutRawData
Full optimization, but raw text data won't be concatenated (will be untouched).
std::shared_ptr< Paragraph< Trait > > makeParagraph(typename Block< Trait >::Items::const_iterator first, typename Block< Trait >::Items::const_iterator last)
Make Paragraph.
Definition parser.h:8051
bool isH2(const typename Trait::String &s)
Definition parser.h:4021
Trait::String readEscapedSequence(long long int i, const typename Trait::String &str, long long int *endPos=nullptr)
Skip escaped sequence of characters till first space.
Definition parser.h:447
std::function< void(std::shared_ptr< Paragraph< Trait > >, TextParsingOpts< Trait > &, const typename Trait::StringList &)> TextPluginFunc
Functor type for text plugin.
Definition parser.h:921
void normalizePos(long long int &pos, long long int &line, long long int length, long long int linesCount)
Normalize position.
Definition parser.h:7974
std::pair< bool, bool > readHtmlAttrValue(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr)
Read HTML attribute value.
Definition parser.h:4630
std::pair< long long int, long long int > calculateIndent(const typename Trait::String &s, long long int p)
Definition parser.h:8976
void makeHorLine(const typename MdBlock< Trait >::Line &line, std::shared_ptr< Block< Trait > > parent)
Make horizontal line.
Definition parser.h:8392
void makeText(long long int lastLine, long long int lastPos, TextParsingOpts< Trait > &po)
Make text item.
Definition parser.h:4475
void applyStyles(int &opts, const std::vector< std::pair< Style, long long int > > &styles)
Apply styles.
Definition parser.h:7123
std::tuple< bool, long long int, long long int, bool, typename Trait::String > isHtmlTag(long long int line, long long int pos, TextParsingOpts< Trait > &po, int rule)
Definition parser.h:4859
bool isStartOfCode(const typename Trait::String &str, typename Trait::String *syntax=nullptr, WithPosition *delim=nullptr, WithPosition *syntaxPos=nullptr)
Definition parser.h:518
Trait::String stringToLabel(const typename Trait::String &s)
Convert string to label.
Definition parser.h:3543
int isTableAlignment(const typename Trait::String &s)
Definition parser.h:701
bool isColumnAlignment(const typename Trait::String &s)
Definition parser.h:628
void makeHeading(std::shared_ptr< Block< Trait > > parent, std::shared_ptr< Document< Trait > > doc, std::shared_ptr< Paragraph< Trait > > p, long long int lastColumn, long long int lastLine, int level, const typename Trait::String &workingPath, const typename Trait::String &fileName, bool collectRefLinks, const WithPosition &delim, TextParsingOpts< Trait > &po)
Make heading.
Definition parser.h:8167
void checkForTableInParagraph(TextParsingOpts< Trait > &po, long long int lastLine)
Check for table in paragraph.
Definition parser.h:4445
std::tuple< long long int, long long int, bool, typename Trait::String, long long int > readLinkTitle(long long int line, long long int pos, const TextParsingOpts< Trait > &po)
Read link's title.
Definition parser.h:6615
bool isOnlyHtmlTagsAfterOrClosedRule1(long long int line, long long int pos, TextParsingOpts< Trait > &po, int rule)
Definition parser.h:4787
void checkForTextPlugins(std::shared_ptr< Paragraph< Trait > > p, TextParsingOpts< Trait > &po, const TextPluginsMap< Trait > &textPlugins, bool inLink)
Process text plugins.
Definition parser.h:8374
@ Bold2
"__"
@ Unknown
Unknown.
@ Strikethrough
"~"
@ Bold1
"**"
@ Italic1
"*"
@ Italic2
"_"
Trait::String virginSubstr(const MdBlock< Trait > &fr, const WithPosition &virginPos)
Definition parser.h:1015
std::map< int, std::tuple< TextPluginFunc< Trait >, bool, typename Trait::StringList > > TextPluginsMap
Type of the map of text plugins.
Definition parser.h:931
void eatRawHtml(long long int line, long long int pos, long long int toLine, long long int toPos, TextParsingOpts< Trait > &po, bool finish, int htmlRule, bool onLine, bool continueEating=false)
Read HTML data.
Definition parser.h:5065
void skipSpacesInHtml(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr)
Skip spaces.
Definition parser.h:4587
@ Table
Table.
@ PageBreak
Page break.
@ Anchor
Anchor.
@ ListItem
List item.
@ Image
Image.
@ TableRow
Table row.
@ TableCell
Table cell.
@ Paragraph
Paragraph.
std::vector< std::pair< std::pair< long long int, bool >, int > > longestSequenceWithMoreOpeningsAtStart(const std::vector< std::vector< std::pair< std::pair< long long int, bool >, int > > > &vars)
Definition parser.h:7164
void appendCloseStyle(TextParsingOpts< Trait > &po, const StyleDelim &s)
Append close style.
Definition parser.h:7607
void makeTextObjectWithLineBreak(const typename Trait::String &text, TextParsingOpts< Trait > &po, long long int startPos, long long int startLine, long long int endPos, long long int endLine)
Make text item with line break.
Definition parser.h:4420
std::pair< typename Trait::InternalStringList, std::vector< long long int > > splitTableRow(const typename Trait::InternalString &s)
Split table's row on cells.
Definition parser.h:3782
bool isSetextHeadingBetween(const TextParsingOpts< Trait > &po, long long int startLine, long long int endLine)
Definition parser.h:4840
long long int textAtIdx(std::shared_ptr< Paragraph< Trait > > p, size_t idx)
Definition parser.h:8353
long long int listLevel(const std::vector< long long int > &indents, long long int pos)
Definition parser.h:3222
long long int posOfListItem(const typename Trait::String &s, bool ordered)
Definition parser.h:3180
bool isGitHubAutolink(const typename Trait::String &url)
bool isHorizontalLine(const typename Trait::String &s)
Definition parser.h:585
bool isValidUrl(const typename Trait::String &url)
Trait::String startSequence(const typename Trait::String &line)
Definition parser.h:118
bool checkStack(std::vector< std::pair< std::pair< long long int, bool >, int > > &s, const std::pair< std::pair< long long int, bool >, int > &v, size_t idx)
Definition parser.h:307
TextPlugin
ID of text plugin.
Definition parser.h:862
@ UnknownPluginID
Unknown plugin.
Definition parser.h:864
@ UserDefinedPluginID
First user defined plugin ID.
Definition parser.h:868
@ GitHubAutoLinkPluginID
GitHub's autolinks plugin.
Definition parser.h:866
long long int lastNonSpacePos(const typename Trait::String &line)
Definition parser.h:104
long long int lastVirginPositionInParagraph(Item< Trait > *item)
Definition parser.h:8127
void skipSpacesUpTo1Line(long long int &line, long long int &pos, const typename MdBlock< Trait >::Data &fr)
Skip space in the block up to 1 new line.
Definition parser.h:6483
bool isHtmlComment(const typename Trait::String &s)
Definition parser.h:717
void resolveLinks(typename Trait::StringList &linksToParse, std::shared_ptr< Document< Trait > > doc)
Resolve links in the document.
Definition parser.h:3080
bool isWithoutRawDataOptimization(OptimizeParagraphType t)
Definition parser.h:7853
Trait::String replaceEntity(const typename Trait::String &s)
Replace entities in the string with corresponding character.
Definition parser.h:750
static const std::map< typename Trait::String, const char16_t * > s_entityMap
String removeBackslashes(const String &s)
Remove backslashes from the string.
Definition parser.h:490
Trait::String removeLineBreak(const typename Trait::String &s)
Remove line break from the end of string.
Definition parser.h:4335
void updateLastPosInList(const RawHtmlBlock< Trait > &html)
Update last position of all parent.
Definition parser.h:9046
std::pair< typename Trait::String, WithPosition > findAndRemoveHeaderLabel(typename Trait::InternalString &s)
Find and remove heading label.
Definition parser.h:3513
std::pair< bool, bool > readHtmlAttr(long long int &l, long long int &p, const typename MdBlock< Trait >::Data &fr, bool checkForSpace)
Read HTML attribute.
Definition parser.h:4687
void checkForHtmlComments(const typename Trait::InternalString &line, StringListStream< Trait > &stream, MdLineData::CommentDataMap &res)
Collect information about HTML comments.
Definition parser.h:2333
FeedPtr parse(const DocumentSource &src, const QString &formatHint=QString())
bool isSpace(char32_t ucs4)
char16_t & unicode()
bool isEmpty() const const
void push_back(parameter_type value)
void clear()
QString first(qsizetype n) const const
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
qsizetype length() const const
void push_back(QChar ch)
QString & remove(QChar ch, Qt::CaseSensitivity cs)
QString sliced(qsizetype pos) const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
QString toCaseFolded() const const
QString toUpper() const const
const QChar * unicode() const const
bool contains(QLatin1StringView str, Qt::CaseSensitivity cs) const const
qsizetype size() const const
SkipEmptyParts
Internal structure for block of text in Markdown.
Definition parser.h:256
Data m_data
Definition parser.h:260
typename Trait::template Vector< Line > Data
Definition parser.h:258
std::pair< typename Trait::InternalString, MdLineData > Line
Definition parser.h:257
long long int m_emptyLinesBefore
Definition parser.h:261
bool m_emptyLineAfter
Definition parser.h:262
Internal structure for auxiliary information about a line in Markdown.
Definition parser.h:242
long long int m_lineNumber
Definition parser.h:243
std::pair< char, bool > CommentData
Definition parser.h:244
std::map< long long int, CommentData > CommentDataMap
Definition parser.h:245
CommentDataMap m_htmlCommentData
Definition parser.h:247
Internal structure for pre-storing HTML.
Definition parser.h:202
int m_htmlBlockType
Definition parser.h:209
std::unordered_map< std::shared_ptr< Block< Trait > >, SequenceOfBlock > m_toAdjustLastPos
Definition parser.h:208
SequenceOfBlock m_blocks
Definition parser.h:207
std::vector< std::pair< std::shared_ptr< Block< Trait > >, long long int > > SequenceOfBlock
Definition parser.h:206
std::shared_ptr< RawHtml< Trait > > m_html
Definition parser.h:203
std::shared_ptr< Block< Trait > > findParent(long long int indent) const
Definition parser.h:214
bool m_continueHtml
Definition parser.h:210
std::shared_ptr< Block< Trait > > m_topParent
Definition parser.h:205
std::shared_ptr< Block< Trait > > m_parent
Definition parser.h:204
Internal structure for auxiliary options for parser.
Definition parser.h:941
bool shouldStopParsing() const
Definition parser.h:983
RawHtmlBlock< Trait > & m_html
Definition parser.h:951
std::shared_ptr< Document< Trait > > m_doc
Definition parser.h:945
long long int m_pos
Definition parser.h:997
bool m_checkLineOnNewType
Definition parser.h:955
ItemWithOpts< Trait >::Styles m_openStyles
Definition parser.h:1004
void concatenateAuxText(long long int start, long long int end)
Definition parser.h:967
Trait::StringList & m_linksToParse
Definition parser.h:946
bool m_firstInParagraph
Definition parser.h:956
long long int m_lastTextPos
Definition parser.h:1000
long long int m_line
Definition parser.h:996
Trait::String m_fileName
Definition parser.h:948
long long int m_startTableLine
Definition parser.h:998
std::shared_ptr< ItemWithOpts< Trait > > m_lastItemWithStyle
Definition parser.h:1005
std::shared_ptr< Block< Trait > > m_parent
Definition parser.h:943
std::shared_ptr< RawHtml< Trait > > m_tmpHtml
Definition parser.h:944
std::shared_ptr< Text< Trait > > m_lastText
Definition parser.h:953
const TextPluginsMap< Trait > & m_textPlugins
Definition parser.h:952
std::vector< std::pair< Style, long long int > > m_styles
Definition parser.h:1003
MdBlock< Trait > & m_fr
Definition parser.h:942
Trait::String m_workingPath
Definition parser.h:947
Detected m_detected
Definition parser.h:980
std::vector< TextData > m_rawTextData
Definition parser.h:964
long long int m_lastTextLine
Definition parser.h:999
static bool isFreeTag(std::shared_ptr< RawHtml< Trait > > html)
Definition parser.h:4089
static void setFreeTag(std::shared_ptr< RawHtml< Trait > > html, bool on)
Definition parser.h:4095
#define MD_DISABLE_COPY(Class)
Macro for disabling copy.
Definition utils.h:17
#define MD_UNUSED(x)
Avoid "unused parameter" warnings.
Definition utils.h:26
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Wed Nov 6 2024 12:12:28 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.