diff --git a/src/common/String.cpp b/src/common/String.cpp new file mode 100644 index 000000000..0dc4a3426 --- /dev/null +++ b/src/common/String.cpp @@ -0,0 +1,89 @@ +#include +#include +#include +#include + +#include "String.h" + +std::string ByteString::ConversionError::formatError(ByteString::value_type const *at, ByteString::value_type const *upto) +{ + std::stringstream ss; + ss << "Could not convert sequence to UTF-8:"; + for(int i = 0; i < 4 && at + i < upto; i++) + ss << " " << std::hex << (unsigned int)std::make_unsigned::type(at[i]); + return ss.str(); +} + + +std::codecvt_utf8 convert(1); + +String ByteString::FromUtf8(bool ignoreError) const +{ + std::vector destination = std::vector(size(), String::value_type()); + std::codecvt_utf8::state_type state; + + ByteString::value_type const *from = data(), *from_next; + String::value_type *to = destination.data(), *to_next; + + while(true) + { + std::codecvt_utf8::result result = convert.in(state, from, data() + size(), from_next, to, destination.data() + destination.size(), to_next); + from = from_next; + to = to_next; + if(result == std::codecvt_base::ok || result == std::codecvt_base::noconv) + { + destination.resize(to - destination.data()); + return String(destination.data(), destination.size()); + } + else if(result == std::codecvt_base::partial && to == destination.data() + destination.size()) + { + String::value_type *old_data = destination.data(); + destination.resize(2 * destination.size()); + to = destination.data() + (to - old_data); + } + else + { + if(!ignoreError) + throw ConversionError(from, data() + size()); + + if(to == destination.data() + destination.size()) + { + String::value_type *old_data = destination.data(); + destination.resize(2 * destination.size()); + to = destination.data() + (to - old_data); + } + *(to++) = std::make_unsigned::type(*(from++)); + } + } +} + +ByteString String::ToUtf8() const +{ + std::vector destination = std::vector(size(), ByteString::value_type()); + std::codecvt_utf8::state_type state; + + String::value_type const *from = data(), *from_next; + ByteString::value_type *to = destination.data(), *to_next; + + while(true) + { + std::codecvt_utf8::result result = convert.out(state, from, data() + size(), from_next, to, destination.data() + destination.size(), to_next); + from = from_next; + to = to_next; + if(result == std::codecvt_base::ok || result == std::codecvt_base::noconv) + { + destination.resize(to - destination.data()); + return ByteString(destination.data(), destination.size()); + } + else if(result == std::codecvt_base::error) + { + throw ByteString::ConversionError(true); + } + else if(result == std::codecvt_base::partial) + { + ByteString::value_type *old_data = destination.data(); + destination.resize(2 * destination.size()); + to = destination.data() + (to - old_data); + } + } +} diff --git a/src/common/String.h b/src/common/String.h new file mode 100644 index 000000000..2d452eae3 --- /dev/null +++ b/src/common/String.h @@ -0,0 +1,60 @@ +#ifndef TPT_STRING +#define TPT_STRING + +#include + +class String; + +class ByteString : public std::basic_string +{ +public: + inline ByteString(): std::basic_string() {} + inline ByteString(size_type count, value_type ch): std::basic_string(count, ch) {} + inline ByteString(value_type const *ch, size_type count): std::basic_string(ch, count) {} + inline ByteString(value_type const *ch): std::basic_string(ch) {} + template inline ByteString(It first, It last): std::basic_string(first, last) {} + inline ByteString(ByteString const &other): std::basic_string(other) {} + inline ByteString(ByteString &&other): std::basic_string(std::move(other)) {} + + ByteString &operator=(ByteString const &other) { std::basic_string::operator=(other); return *this; } + ByteString &operator=(ByteString &&other) { std::basic_string::operator=(std::move(other)); return *this; } + + class ConversionError : public std::runtime_error + { + static std::string formatError(value_type const *at, value_type const *upto); + public: + inline ConversionError(value_type const *at, value_type const *upto): std::runtime_error(formatError(at, upto)) {} + inline ConversionError(bool to): std::runtime_error(to ? "Could not convert to UTF-8" : "Could not convert from UTF-8") {} + }; + + String FromUtf8(bool ignoreError = true) const; + inline String FromAscii() const; +}; + +class String : public std::basic_string +{ +public: + inline String(): std::basic_string() {} + inline String(size_type count, value_type ch): std::basic_string(count, ch) {} + inline String(value_type const *ch, size_type count): std::basic_string(ch, count) {} + inline String(value_type const *ch): std::basic_string(ch) {} + template inline String(It first, It last): std::basic_string(first, last) {} + inline String(String const &other): std::basic_string(other) {} + inline String(String &&other): std::basic_string(std::move(other)) {} + + String &operator=(String const &other) { std::basic_string::operator=(other); return *this; } + String &operator=(String &&other) { std::basic_string::operator=(std::move(other)); return *this; } + + template inline String(ByteString::value_type const (&ch)[N]): std::basic_string(ByteString(ch, N).FromAscii()) {} + + ByteString ToUtf8() const; +}; + +inline String ByteString::FromAscii() const +{ + String destination = String(size(), String::value_type()); + for(size_t i = 0; i < size(); i++) + destination[i] = typename String::value_type(operator[](i)); + return destination; +} +#endif