6#ifndef MD4QT_MD_TRAITS_HPP_INCLUDED
7#define MD4QT_MD_TRAITS_HPP_INCLUDED
9#ifdef MD4QT_ICU_STL_SUPPORT
22#include <unicode/uchar.h>
23#include <unicode/unistr.h>
26#include <uriparser/Uri.h>
30#ifdef MD4QT_QT_SUPPORT
32#ifndef MD4QT_ICU_STL_SUPPORT
54template<
class String,
class Char,
class Latin1Char>
92 if (pos + len > m_str.length() || len < 0) {
93 len = m_str.length() - pos;
97 return (m_str.isEmpty() ? m_virginStr : String());
101 String startStr, endStr;
103 if (m_virginStr[virginStartPos] == Latin1Char(
'\t')) {
104 const auto spaces = countOfSpacesForTab(virginStartPos);
106 for (
long long int i = 1; i < spaces; ++i) {
107 if (
virginPos(pos + i) != virginStartPos) {
108 startStr = String(i, Latin1Char(
' '));
115 auto virginEndPos =
virginPos(pos + len - 1,
true);
117 if (m_virginStr[virginEndPos] == Latin1Char(
'\t')) {
118 const auto spaces = countOfSpacesForTab(virginEndPos);
120 for (
long long int i = 1; i < spaces; ++i) {
121 if (
virginPos(pos + len - 1 - i) != virginEndPos) {
122 endStr = String(i, Latin1Char(
' '));
129 return startStr + m_virginStr.sliced(virginStartPos, virginEndPos - virginStartPos + 1) + endStr;
139 bool end =
false)
const
141 for (
auto it = m_changedPos.crbegin(), last = m_changedPos.crend(); it != last; ++it) {
142 pos = virginPosImpl(pos, *it, end);
150 return m_str[position];
156 const auto len = m_str.length();
158 m_str.remove(pos, size);
159 m_str.insert(pos, with);
161 if (with.length() != size) {
162 m_changedPos.push_back({{0, len}, {}});
163 m_changedPos.back().second.push_back({pos, size, with.size()});
174 const auto len = m_str.length();
176 for (
long long int i = 0; i < m_str.size();) {
177 long long int p = m_str.indexOf(what, i);
180 tmp.push_back(m_str.sliced(i, p - i));
185 if (what.size() != with.size()) {
187 m_changedPos.push_back({{0, len}, {}});
191 m_changedPos.back().second.push_back({p, what.size(), with.size()});
194 tmp.push_back(m_str.sliced(i));
200 std::swap(m_str, tmp);
208 const auto len = m_str.length();
210 m_str.remove(pos, size);
212 m_changedPos.push_back({{0, len}, {}});
213 m_changedPos.back().second.push_back({pos, size, 0});
221 return m_str.isEmpty();
227 return m_str.length();
237 const auto len = m_str.length();
240 result.m_str.clear();
244 long long int spaces = 0;
249 while (i <
length() && m_str[i].isSpace()) {
257 result.m_changedPos.push_back({{0, len}, {}});
261 if (i - tmp > 1 || first) {
262 result.m_changedPos.back().second.push_back({tmp, i - tmp, (first ? 0 : 1)});
268 while (i !=
length() && !m_str[i].isSpace()) {
269 result.m_str.push_back(m_str[i]);
277 result.m_str.push_back(Latin1Char(
' '));
280 if (!result.
isEmpty() && result.m_str[result.
length() - 1] == Latin1Char(
' ')) {
281 result.m_str.remove(result.
length() - 1, 1);
284 result.m_changedPos.back().second.back().m_len = 0;
285 }
else if (spaces == 1) {
286 result.m_changedPos.back().second.push_back({m_str.length() - spaces, spaces, 0});
296 std::vector<InternalStringT> result;
297 const auto len = m_str.length();
300 for (
long long int i = 0; i < m_str.length(); ++i) {
303 is.m_changedPos.push_back({{i, len}, {}});
305 result.push_back(is);
311 long long int pos = 0;
312 long long int fpos = 0;
314 while ((fpos = m_str.indexOf(sep.
asString(), pos)) != -1 && fpos <
length()) {
315 if (fpos - pos > 0) {
317 is.m_str = m_str.sliced(pos, fpos - pos);
318 is.m_changedPos.push_back({{pos, len}, {}});
320 result.push_back(is);
323 pos = fpos + sep.
length();
326 if (pos < m_str.length()) {
328 is.m_str = m_str.sliced(pos, m_str.length() - pos);
329 is.m_changedPos.push_back({{pos, len}, {}});
331 result.push_back(is);
341 const auto oldLen = m_str.length();
342 tmp.m_str = tmp.m_str.sliced(pos, (len == -1 ? tmp.m_str.length() - pos : len));
343 tmp.m_changedPos.push_back({{pos, oldLen}, {}});
344 if (len != -1 && len <
length() - pos) {
345 tmp.m_changedPos.back().second.push_back({pos + len,
length() - pos - len, 0});
355 const auto len = m_str.length();
356 tmp.m_str = tmp.m_str.right(n);
357 tmp.m_changedPos.push_back({{
length() - n, len}, {}});
365 return insert(pos, String(1, ch));
371 const auto len = m_str.length();
372 const auto ilen = s.length();
374 m_str.insert(pos, s);
376 m_changedPos.push_back({{0, len}, {}});
377 m_changedPos.back().second.push_back({pos, 1, ilen + 1});
390 long long int m_pos = -1;
391 long long int m_oldLen = -1;
392 long long int m_len = -1;
396 struct LengthAndStartPos {
397 long long int m_firstPos = 0;
398 long long int m_length = 0;
402 std::vector<std::pair<LengthAndStartPos, std::vector<ChangedPos>>> m_changedPos;
405 long long int virginPosImpl(
long long int pos,
406 const std::pair<LengthAndStartPos, std::vector<ChangedPos>> &changed,
409 for (
const auto &c : changed.second) {
410 const auto startPos = c.m_pos;
411 const auto endPos = startPos + c.m_len - 1;
413 if (pos >= startPos && pos <= endPos) {
414 const auto oldEndPos = startPos + c.m_oldLen - 1;
416 if (pos > oldEndPos || end) {
417 return oldEndPos + changed.first.m_firstPos;
419 return pos + changed.first.m_firstPos;
421 }
else if (pos > endPos) {
422 pos += c.m_oldLen - c.m_len;
428 pos += changed.first.m_firstPos;
430 return (pos > changed.first.m_length ? changed.first.m_length : pos);
433 long long int countOfSpacesForTab(
long long int virginPos)
const
437 for (
const auto &v : std::as_const(m_changedPos)) {
438 p += v.first.m_firstPos;
444 for (
const auto &c : std::as_const(v.second)) {
457#ifdef MD4QT_ICU_STL_SUPPORT
477 operator UChar32()
const
484 bool unicodeSpace =
false;
486 const auto type = u_charType(m_ch);
489 case U_SPACE_SEPARATOR:
490 case U_LINE_SEPARATOR:
491 case U_PARAGRAPH_SEPARATOR:
499 return m_ch == 0x20 || (m_ch <= 0x0D && m_ch >= 0x09) ||
500 (m_ch > 127 && (m_ch == 0x85 || m_ch == 0xA0 || unicodeSpace));
505 return (u_charType(m_ch) == U_DECIMAL_DIGIT_NUMBER);
520 const auto type = u_charType(m_ch);
523 case U_UPPERCASE_LETTER:
524 case U_LOWERCASE_LETTER:
525 case U_TITLECASE_LETTER:
526 case U_MODIFIER_LETTER:
542 const auto type = u_charType(m_ch);
545 case U_DASH_PUNCTUATION:
546 case U_START_PUNCTUATION:
547 case U_END_PUNCTUATION:
548 case U_CONNECTOR_PUNCTUATION:
549 case U_OTHER_PUNCTUATION:
550 case U_INITIAL_PUNCTUATION:
551 case U_FINAL_PUNCTUATION:
561 const auto type = u_charType(m_ch);
565 case U_CURRENCY_SYMBOL:
566 case U_MODIFIER_SYMBOL:
577 return icu::UnicodeString(1, m_ch, 1).
toLower().char32At(0);
582 return m_ch == other.m_ch;
587 return m_ch != other.m_ch;
637 : icu::
UnicodeString((int32_t)count, (UChar32)ch, (int32_t)count)
650 icu::UnicodeString::append((UChar32)ch);
655 icu::UnicodeString::append(str);
663 int toInt(
bool *ok =
nullptr,
int base = 10)
const
668 const auto result = std::stoi(tmp,
nullptr, base);
673 }
catch (
const std::invalid_argument &) {
677 }
catch (
const std::out_of_range &) {
688 return (icu::UnicodeString::indexOf((UChar32)ch) != -1);
693 return (icu::UnicodeString::indexOf(str) != -1);
706 while (i < length() &&
UnicodeChar(char32At(i)).isSpace()) {
710 while (i != length() && !
UnicodeChar(char32At(i)).isSpace()) {
722 if (!result.isEmpty() && result[result.
size() - 1] ==
UnicodeChar(
' ')) {
723 result.remove(result.
size() - 1, 1);
731 std::vector<UnicodeString> result;
736 while ((fpos = indexOf(ch, pos)) != -1 && fpos < length()) {
737 if (fpos - pos > 0) {
738 icu::UnicodeString tmp;
739 extract(pos, fpos - pos, tmp);
740 result.push_back(tmp);
746 if (pos < length()) {
747 icu::UnicodeString tmp;
748 extract(pos, length() - pos, tmp);
749 result.push_back(tmp);
755 std::vector<UnicodeString>
split(
char ch)
const
762 for (int32_t pos = 0; (pos = indexOf(before, pos)) != -1; pos += after.
size()) {
763 icu::UnicodeString::replace(pos, 1, after);
771 for (int32_t pos = 0; (pos = indexOf(before, pos)) != -1; pos += after.
size()) {
772 icu::UnicodeString::replace(pos, before.length(), after);
780 icu::UnicodeString tmp;
781 extract((int32_t)pos, (int32_t)(len == -1 ? length() - pos : len), tmp);
788 icu::UnicodeString tmp;
789 extract(length() - (int32_t)n, (int32_t)n, tmp);
796 icu::UnicodeString tmp = *
this;
804 icu::UnicodeString tmp = *
this;
812 icu::UnicodeString tmp = *
this;
820 icu::UnicodeString::remove();
836 std::string uriString;
837 uriStr.toUTF8String(uriString);
839 if (uriParseSingleUriA(&uri, uriString.c_str(), NULL) == URI_SUCCESS) {
841 m_relative = !(uri.scheme.first && uri.scheme.afterLast);
844 m_scheme = UnicodeString(std::string(uri.scheme.first,
845 uri.scheme.afterLast - uri.scheme.first).c_str());
848 if (uri.hostText.first && uri.hostText.afterLast) {
849 m_host = UnicodeString(std::string(uri.hostText.first,
850 uri.hostText.afterLast - uri.hostText.first).c_str());
853 uriFreeUriMembersA(&uri);
897 template<
class T,
class U>
898 using Map = std::map<T, U>;
919 if (u_charType(c) == U_SPACE_SEPARATOR) {
921 }
else if (c == 0x09 || c == 0x0A || c == 0x0C || c == 0x0D) {
956 (workingPath.isEmpty() ? fileName :
String(workingPath +
"/" + fileName)).toUTF8String(path);
960 const auto result = std::filesystem::exists(path, er);
962 return (er ?
false : result);
969 fileName.toUTF8String(path);
973 const auto result = std::filesystem::exists(path, er);
975 return (er ?
false : result);
982 path.toUTF8String(tmp);
984 auto p = std::filesystem::canonical(tmp, er).u8string();
986 std::replace(p.begin(), p.end(),
'\\',
'/');
988 return (er ?
"" : UnicodeString::fromUTF8(p));
1003 return where.lastIndexOf(what, 0, from + 1);
1010#ifdef MD4QT_QT_SUPPORT
1021 template<
class T,
class U>
1045 }
else if (c == 0x09 || c == 0x0A || c == 0x0C || c == 0x0D) {
1080 QString() : workingPath + latin1ToString(
"/")) + fileName);
Internal string, used to get virgin (original) string from transformed string.
Char operator[](long long int position) const
long long int virginPos(long long int pos, bool end=false) const
const String & asString() const
InternalStringT(const String &s)
long long int length() const
std::vector< InternalStringT > split(const InternalStringT &sep) const
Split string.
InternalStringT simplified() const
InternalStringT & insert(long long int pos, const String &s)
Insert string.
String virginSubString(long long int pos=0, long long int len=-1) const
InternalStringT & replace(const String &what, const String &with)
Replace string.
InternalStringT & remove(long long int pos, long long int size)
Remove sub-string.
InternalStringT sliced(long long int pos, long long int len=-1) const
InternalStringT & replaceOne(long long int pos, long long int size, const String &with)
Replace substring.
const String & fullVirginString() const
InternalStringT right(long long int n) const
InternalStringT & insert(long long int pos, Char ch)
Insert one character.
Wrapper for UChar32 to be used with MD::Parser.
UnicodeChar toLower() const
bool operator==(const UnicodeChar &other) const
bool isLetterOrNumber() const
bool operator!=(const UnicodeChar &other) const
Wrapper for icu::UnicodeString to be used with MD::Parser.
UnicodeString(const UnicodeChar &ch)
std::vector< UnicodeString > split(char ch) const
UnicodeString right(long long int n) const
void push_back(const UnicodeString &str)
UnicodeString toCaseFolded() const
void push_back(const UnicodeChar &ch)
bool contains(const UnicodeChar &ch) const
UnicodeString toUpper() const
UnicodeString sliced(long long int pos, long long int len=-1) const
UnicodeString(long long int count, char ch)
std::vector< UnicodeString > split(const UnicodeChar &ch) const
UnicodeString & replace(const UnicodeString &before, const UnicodeString &after)
bool contains(const UnicodeString &str) const
int toInt(bool *ok=nullptr, int base=10) const
UnicodeString(const icu::UnicodeString &str)
UnicodeString simplified() const
UnicodeString toLower() const
~UnicodeString() override=default
UnicodeString(const char16_t *str)
UnicodeString(const char *str)
UnicodeString & replace(const UnicodeChar &before, const UnicodeString &after)
UnicodeString(const std::string &str)
UnicodeChar operator[](long long int position) const
UrlUri(const UnicodeString &uriStr)
UnicodeString scheme() const
UnicodeString host() const
Category category(char32_t ucs4)
QString absoluteFilePath() const const
bool exists() const const
QString fromUtf16(const char16_t *unicode, qsizetype size)
QString fromUtf8(QByteArrayView str)
bool isEmpty() const const
qsizetype lastIndexOf(QChar ch, Qt::CaseSensitivity cs) const const
Trait to use this library with QString.
static bool isUnicodeWhitespace(const QChar &ch)
static String utf16ToString(const char16_t *u16)
Convert UTF-16 into trait's string.
static void appendUcs4(String &str, char32_t ch)
Add UCS4 to string.
static String absoluteFilePath(const String &path)
static long long int lastIndexOf(const String &where, const String &what, long long int from)
Search for last occurrence of string.
static String latin1ToString(const char *latin1)
Convert Latin1 into trait's string.
std::vector< InternalString > InternalStringList
static Char latin1ToChar(char latin1)
Convert Latin1 char into trait's char.
static bool fileExists(const String &fileName)
static bool fileExists(const String &fileName, const String &workingPath)
static String utf8ToString(const char *utf8)
Convert UTF8 into trait's string.
Trait to use this library with std::string.
static String latin1ToString(const char *latin1)
Convert Latin1 into trait's string.
std::vector< InternalString > InternalStringList
static long long int lastIndexOf(const String &where, const String &what, long long int from)
Search for last occurrence of string.
static String absoluteFilePath(const String &path)
static bool fileExists(const String &fileName)
static String utf16ToString(const char16_t *u16)
Convert UTF-16 into trait's string.
std::vector< String > StringList
static bool isUnicodeWhitespace(const UnicodeChar &ch)
static void appendUcs4(String &str, char32_t ch)
Add UCS4 to string.
static Char latin1ToChar(char latin1)
Convert Latin1 char into trait's char.
static String utf8ToString(const char *utf8)
Convert UTF8 into trait's string.
static bool fileExists(const String &fileName, const String &workingPath)