From dd3fcad4e1b26912ae90fb320387368a51abc7ae Mon Sep 17 00:00:00 2001 From: Aleksey Dobrunov Date: Thu, 16 Jan 2025 23:04:45 +0500 Subject: [PATCH] fix read non-latin string from hrc --- src/colorer/strings/icu/Encodings.cpp | 9 ++++++++- src/colorer/strings/icu/Encodings.h | 1 + src/colorer/strings/legacy/Encodings.cpp | 6 ++++++ src/colorer/strings/legacy/Encodings.h | 1 + src/colorer/xml/libxml2/LibXmlReader.cpp | 19 ++++++++++--------- src/colorer/xml/libxml2/LibXmlReader.h | 2 +- 6 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/colorer/strings/icu/Encodings.cpp b/src/colorer/strings/icu/Encodings.cpp index 340d5b7..b8f72c8 100644 --- a/src/colorer/strings/icu/Encodings.cpp +++ b/src/colorer/strings/icu/Encodings.cpp @@ -1,4 +1,5 @@ #include "colorer/strings/icu/Encodings.h" +#include #include "colorer/Exception.h" #include "unicode/ucnv.h" #include "unicode/ustring.h" @@ -24,7 +25,13 @@ uUnicodeString Encodings::toUnicodeString(char* data, int32_t len) uUnicodeString Encodings::fromUTF8(char* data, int32_t len) { - return std::make_unique(data , len , ENC_UTF8); + return std::make_unique(data, len, ENC_UTF8); +} + +uUnicodeString Encodings::fromUTF8(unsigned char* data) +{ + const auto c = reinterpret_cast(data); + return fromUTF8(c,strlen(c)); } int Encodings::toUTF8Bytes(UChar wc, byte* dest) diff --git a/src/colorer/strings/icu/Encodings.h b/src/colorer/strings/icu/Encodings.h index b79abc4..3f8590d 100644 --- a/src/colorer/strings/icu/Encodings.h +++ b/src/colorer/strings/icu/Encodings.h @@ -10,6 +10,7 @@ class Encodings static uUnicodeString toUnicodeString(char* data, int32_t len); static uUnicodeString fromUTF8(char* data, int32_t len); + static uUnicodeString fromUTF8(unsigned char* data); static int toUTF8Bytes(UChar, byte*); }; diff --git a/src/colorer/strings/legacy/Encodings.cpp b/src/colorer/strings/legacy/Encodings.cpp index 23d2bc1..a9a0046 100644 --- a/src/colorer/strings/legacy/Encodings.cpp +++ b/src/colorer/strings/legacy/Encodings.cpp @@ -177,4 +177,10 @@ uUnicodeString Encodings::fromUTF8(char* data, int32_t len) return std::make_unique(data, len, Encodings::ENC_UTF8); } +uUnicodeString Encodings::fromUTF8(unsigned char* data) +{ + const auto c = reinterpret_cast(data); + return fromUTF8(c,strlen(c)); +} + diff --git a/src/colorer/strings/legacy/Encodings.h b/src/colorer/strings/legacy/Encodings.h index ca9c0d9..50cd3cb 100644 --- a/src/colorer/strings/legacy/Encodings.h +++ b/src/colorer/strings/legacy/Encodings.h @@ -46,6 +46,7 @@ class Encodings static uUnicodeString toUnicodeString(char* data, int32_t len); static uUnicodeString fromUTF8(char* data, int32_t len); + static uUnicodeString fromUTF8(unsigned char* data); }; #endif diff --git a/src/colorer/xml/libxml2/LibXmlReader.cpp b/src/colorer/xml/libxml2/LibXmlReader.cpp index ee53b96..62fa729 100644 --- a/src/colorer/xml/libxml2/LibXmlReader.cpp +++ b/src/colorer/xml/libxml2/LibXmlReader.cpp @@ -56,8 +56,8 @@ bool LibXmlReader::populateNode(xmlNode* node, XMLNode& result) result.name = UnicodeString(reinterpret_cast(node->name)); const auto text_string = getElementText(node); - if (!text_string.isEmpty()) { - result.text = text_string; + if (text_string && !text_string->isEmpty()) { + result.text = UnicodeString(*text_string.get()); } getChildren(node, result); getAttributes(node, result.attributes); @@ -67,22 +67,22 @@ bool LibXmlReader::populateNode(xmlNode* node, XMLNode& result) return false; } -UnicodeString LibXmlReader::getElementText(const xmlNode* node) +uUnicodeString LibXmlReader::getElementText(const xmlNode* node) { for (const xmlNode* child = node->children; child != nullptr; child = child->next) { if (child->type == XML_CDATA_SECTION_NODE) { - return {reinterpret_cast(child->content)}; + return Encodings::fromUTF8(child->content); } if (child->type == XML_TEXT_NODE) { - auto temp_string = UnicodeString(reinterpret_cast(child->content)); - temp_string.trim(); - if (temp_string.isEmpty()) { + auto temp_string = Encodings::fromUTF8(child->content); + temp_string->trim(); + if (temp_string->isEmpty()) { continue; } return temp_string; } } - return {u""}; + return nullptr; } void LibXmlReader::getChildren(xmlNode* node, XMLNode& result) @@ -107,7 +107,8 @@ void LibXmlReader::getAttributes(const xmlNode* node, std::unordered_mapproperties; attr != nullptr; attr = attr->next) { const auto content = xmlNodeGetContent(attr->children); - data.try_emplace(reinterpret_cast(attr->name), reinterpret_cast(content)); + auto decoded_string = Encodings::fromUTF8(content); + data.try_emplace(reinterpret_cast(attr->name), *decoded_string.get()); xmlFree(content); } } diff --git a/src/colorer/xml/libxml2/LibXmlReader.h b/src/colorer/xml/libxml2/LibXmlReader.h index 21a83c8..160f1cc 100644 --- a/src/colorer/xml/libxml2/LibXmlReader.h +++ b/src/colorer/xml/libxml2/LibXmlReader.h @@ -30,7 +30,7 @@ class LibXmlReader static void getAttributes(const xmlNode* node, std::unordered_map& data); void getChildren(xmlNode* node, XMLNode& result); bool populateNode(xmlNode* node, XMLNode& result); - static UnicodeString getElementText(const xmlNode* node); + static uUnicodeString getElementText(const xmlNode* node); /* the name of the file that is being processed */ static uUnicodeString current_file;