UTF8 conversion for XStringW

This commit is contained in:
jief 2020-02-24 16:07:24 +03:00
parent 4ab2c1179f
commit 40dbcf08a9
8 changed files with 285 additions and 38 deletions

View File

@ -51,8 +51,8 @@ class XObjArrayNC
virtual ~XObjArrayNC(); virtual ~XObjArrayNC();
protected: protected:
XObjArrayNC(const XObjArrayNC<TYPE> &anObjArrayNC) { throw "Intentionally not defined"; } XObjArrayNC(const XObjArrayNC<TYPE> &anObjArrayNC) { DebugLog(2, "Intentionally not defined"); CpuDeadLoop(); }
const XObjArrayNC<TYPE> &operator =(const XObjArrayNC<TYPE> &anObjArrayNC) { throw "Intentionally not defined"; } const XObjArrayNC<TYPE> &operator =(const XObjArrayNC<TYPE> &anObjArrayNC) { DebugLog(2, "Intentionally not defined"); CpuDeadLoop(); }
xsize _getLen() const { return _Len; } xsize _getLen() const { return _Len; }
public: public:

View File

@ -21,12 +21,8 @@
#include "XToolsCommon.h" #include "XToolsCommon.h"
#include "XStringW.h" #include "XStringW.h"
//extern "C" {
// #include <Library/MemoryAllocationLib.h>
// #include <Library/BaseMemoryLib.h>
//}
#include <Platform.h> #include <Platform.h>
//#include "refit/IO.h" #include "printf_lite.h"
UINTN XStringWGrowByDefault = 1024; UINTN XStringWGrowByDefault = 1024;
const XStringW NullXStringW; const XStringW NullXStringW;
@ -83,6 +79,14 @@ DBG("Constructor(const wchar_t aChar)\n");
StrnCpy(&aChar, 1); StrnCpy(&aChar, 1);
} }
XStringW::XStringW(const char* S)
{
DBG("Constructor(const char* S)\n");
xsize newLen = StrLenInWChar(S, AsciiStrLen(S));
Init(newLen);
utf8ToWChar(m_data, m_size+1, S, AsciiStrLen(S)); // m_size doesn't count the NULL terminator
}
//xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx //xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
// Destructor // Destructor
//xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx //xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
@ -220,8 +224,21 @@ XStringW XStringW::SubStringReplace(wchar_t c1, wchar_t c2)
return Result; return Result;
} }
void XStringW::vSPrintf(const wchar_t *format, VA_LIST va) static XStringW* sprintfBuf;
void transmitSprintf(const wchar_t* buf, size_t nbyte)
{ {
(*sprintfBuf).StrnCat(buf, nbyte);
}
void XStringW::vSPrintf(const char* format, VA_LIST va)
{
SetLength(0);
sprintfBuf = this;
vprintf_with_callback(format, va, transmitSprintf);
// This is an attempt to use _PPrint from IO.c. Problem is : you have to allocate the memory BEFORE calling it.
// POOL_PRINT spc; // POOL_PRINT spc;
// PRINT_STATE ps; // PRINT_STATE ps;
// //
@ -240,11 +257,11 @@ void XStringW::vSPrintf(const wchar_t *format, VA_LIST va)
// VA_END(ps.args); // VA_END(ps.args);
} }
void XStringW::SPrintf(const wchar_t *format, ...) void XStringW::SPrintf(const char* format, ...)
{ {
VA_LIST va; VA_LIST va;
VA_START (va, format); VA_START (va, format);
vSPrintf(format, va); vSPrintf(format, va);
VA_END(va); VA_END(va);
} }
@ -464,7 +481,7 @@ const XStringW &XStringW::operator +=(const wchar_t *S)
// Functions // Functions
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
XStringW SPrintf(const wchar_t *format, ...) XStringW SPrintf(const char* format, ...)
{ {
VA_LIST va; VA_LIST va;
XStringW str; XStringW str;

View File

@ -11,11 +11,7 @@
#include "XToolsCommon.h" #include "XToolsCommon.h"
#include <Platform.h> #include <Platform.h>
//#include "XConstStringW.h" #include "utf8Conversion.h"
//extern "C" {
// #include <Library/BaseLib.h>
//}
#define LPATH_SEPARATOR L'\\' #define LPATH_SEPARATOR L'\\'
@ -37,6 +33,8 @@ public:
XStringW(const wchar_t* S, UINTN count); XStringW(const wchar_t* S, UINTN count);
XStringW(const wchar_t); XStringW(const wchar_t);
XStringW(const char*);
~XStringW(); ~XStringW();
protected: protected:
@ -89,8 +87,8 @@ public:
void Insert(UINTN pos, const XStringW& Str); void Insert(UINTN pos, const XStringW& Str);
void vSPrintf(const wchar_t *format, VA_LIST va); void vSPrintf(const char* format, VA_LIST va);
void SPrintf(const wchar_t *format, ...); void SPrintf(const char* format, ...) __attribute__ ((__format__ (__printf__, 2, 3)));
const XStringW &operator =(const XStringW &aString); const XStringW &operator =(const XStringW &aString);
const XStringW &operator =(const wchar_t* S); const XStringW &operator =(const wchar_t* S);
@ -101,7 +99,7 @@ public:
const XStringW &operator += (wchar_t); const XStringW &operator += (wchar_t);
XStringW SubString(UINTN pos, UINTN count) const; XStringW SubString(UINTN pos, UINTN count) const;
UINTN IdxOf(wchar_t c, UINTN Pos = 0) const; UINTN IdxOf(wchar_t c, UINTN Pos = 0) const;
UINTN IdxOf(const XStringW& S, UINTN Pos = 0) const; UINTN IdxOf(const XStringW& S, UINTN Pos = 0) const;
UINTN RIdxOf(const wchar_t c, UINTN Pos = MAX_XSIZE) const; UINTN RIdxOf(const wchar_t c, UINTN Pos = MAX_XSIZE) const;
UINTN RIdxOf(const XStringW& S, UINTN Pos = MAX_XSIZE) const; UINTN RIdxOf(const XStringW& S, UINTN Pos = MAX_XSIZE) const;
@ -116,7 +114,7 @@ public:
void Replace(wchar_t c1, wchar_t c2); void Replace(wchar_t c1, wchar_t c2);
XStringW SubStringReplace(wchar_t c1, wchar_t c2); XStringW SubStringReplace(wchar_t c1, wchar_t c2);
int Compare(const wchar_t* S) const { return StrCmp(data(), S) ; } int Compare(const wchar_t* S) const { return (int)StrCmp(data(), S) ; }
bool Equal(const wchar_t* S) const { return Compare(S) == 0; }; bool Equal(const wchar_t* S) const { return Compare(S) == 0; };
bool BeginingEqual(const wchar_t* S) const { return StrnCmp(data(), S, StrLen(S)); } bool BeginingEqual(const wchar_t* S) const { return StrnCmp(data(), S, StrLen(S)); }
@ -174,7 +172,7 @@ public:
//extern const XStringW NullXStringW; //extern const XStringW NullXStringW;
XStringW SPrintf(const wchar_t *format, ...); XStringW SPrintf(const char* format, ...) __attribute__ ((__format__ (__printf__, 1, 2)));
XStringW SubString(const wchar_t *S, UINTN pos, UINTN count); XStringW SubString(const wchar_t *S, UINTN pos, UINTN count);
XStringW CleanCtrl(const XStringW &S); XStringW CleanCtrl(const XStringW &S);

View File

@ -0,0 +1 @@
/JiefLand/5.Devel/Embedded/Shared/printf_lite-master/printf_lite.cpp

View File

@ -0,0 +1 @@
/JiefLand/5.Devel/Embedded/Shared/printf_lite-master/printf_lite.h

View File

@ -0,0 +1,185 @@
//
// utf8Conversion.hpp
//
// Created by jief the 24 Feb 2020.
//
#include "utf8Conversion.h"
#include <Platform.h>
#ifndef MIN
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
#define uint16_t UINT16
#define uint32_t UINT32
#define size_t UINTN
size_t StrLenInWChar(const char *s, size_t src_len)
{
size_t dst_len = 0;
while ( *s ) {
char32_t c;
if (*s & 0x80) {
if (*(s+1) == 0) {
// Finished in the middle of an utf8 multibyte char
return dst_len;
}
if ((*(s+1) & 0xc0) != 0x80) {
s += 1;
continue;
}
if ((*s & 0xe0) == 0xe0) {
if (*(s+2) == 0) {
// Finished in the middle of an utf8 multibyte char
return dst_len;
}
if ((*(s+2) & 0xc0) != 0x80) {
s += 2;
continue;
}
if ((*s & 0xf0) == 0xf0) {
if (*(s+3) == 0) {
// Finished in the middle of an utf8 multibyte char
return dst_len;
}
if ((*s & 0xf8) != 0xf0 || (*(s+3) & 0xc0) != 0x80) {
s += 3;
continue;
}
/* 4-byte code */
c = (*s & 0x7) << 18;
c |= (*(s+1) & 0x3f) << 12;
c |= (*(s+2) & 0x3f) << 6;
c |= *(s+3) & 0x3f;
s += 4;
} else {
/* 3-byte code */
c = (*s & 0xf) << 12;
c |= (*(s+1) & 0x3f) << 6;
c |= *(s+2) & 0x3f;
s += 3;
}
} else {
/* 2-byte code */
c = (*s & 0x1f) << 6;
c |= *(s+1) & 0x3f;
s += 2;
}
} else {
/* 1-byte code */
c = *s;
s += 1;
}
#if __WCHAR_MAX__ > 0xFFFFu
dst_len++;
#else
if ( c <= 0xFFFF) {
dst_len++;
} else {
dst_len++;
dst_len++;
}
#endif
}
return dst_len;
}
#define halfBase 0x0010000UL
#define halfMask 0x3FFUL
#define halfShift 10 /* used for shifting by 10 bits */
#define UNI_SUR_HIGH_START 0xD800u
#define UNI_SUR_LOW_START 0xDC00u
void utf8ToWChar(wchar_t* dst, size_t dst_max_len, const char *s, size_t src_len)
{
if ( dst_max_len == 0 ) return;
dst_max_len -= 1;
size_t dst_len = 0;
while ( *s ) {
char32_t c;
if (*s & 0x80) {
if (*(s+1) == 0) {
// Finished in the middle of an utf8 multibyte char
goto exit;
}
if ((*(s+1) & 0xc0) != 0x80) {
s += 1;
continue;
}
if ((*s & 0xe0) == 0xe0) {
if (*(s+2) == 0) {
// Finished in the middle of an utf8 multibyte char
goto exit;
}
if ((*(s+2) & 0xc0) != 0x80) {
s += 2;
continue;
}
if ((*s & 0xf0) == 0xf0) {
if (*(s+3) == 0) {
// Finished in the middle of an utf8 multibyte char
goto exit;
}
if ((*s & 0xf8) != 0xf0 || (*(s+3) & 0xc0) != 0x80) {
s += 3;
continue;
}
/* 4-byte code */
c = (*s & 0x7) << 18;
c |= (*(s+1) & 0x3f) << 12;
c |= (*(s+2) & 0x3f) << 6;
c |= *(s+3) & 0x3f;
s += 4;
} else {
/* 3-byte code */
c = (*s & 0xf) << 12;
c |= (*(s+1) & 0x3f) << 6;
c |= *(s+2) & 0x3f;
s += 3;
}
} else {
/* 2-byte code */
c = (*s & 0x1f) << 6;
c |= *(s+1) & 0x3f;
s += 2;
}
} else {
/* 1-byte code */
c = *s;
s += 1;
}
#if __WCHAR_MAX__ > 0xFFFFu
dst[dst_len++] = c;
if ( dst_len == dst_max_len ) goto exit;
#else
if ( c <= 0xFFFF) {
dst[dst_len++] = c;
if ( dst_len == dst_max_len ) goto exit;
} else {
c -= halfBase;
dst[dst_len++] = (wchar_t)((c >> halfShift) + UNI_SUR_HIGH_START);
if ( dst_len == dst_max_len ) goto exit;
dst[dst_len++] = (wchar_t)((c & halfMask) + UNI_SUR_LOW_START);
if ( dst_len == dst_max_len ) goto exit;
}
#endif
}
exit:
dst[dst_len] = 0;
}

View File

@ -0,0 +1,14 @@
//
// utf8Conversion.hpp
//
// Created by jief the 24 Feb 2020.
//
#ifndef utf816Conversion_hpp
#define utf816Conversion_hpp
UINTN StrLenInWChar(const char *src, UINTN src_len);
void utf8ToWChar(wchar_t* dst, UINTN dst_max_len, const char *s, UINTN src_len);
#endif /* utf816Conversion_hpp */

View File

@ -1,6 +1,10 @@
#include "../cpp_foundation/XStringW.h" #include "../cpp_foundation/XStringW.h"
#include "global1.h" #include "global1.h"
#include "global2.h" #include "global2.h"
#include "../cpp_foundation/utf8Conversion.h"
//#include <wchar.h>
int XStringW_tests() int XStringW_tests()
@ -11,25 +15,52 @@ int XStringW_tests()
#endif #endif
if ( global_str1 != L"global_str1" ) return 1; if ( global_str1 != L"global_str1" ) return 1;
if ( global_str2 != L"global_str2" ) return 1; if ( global_str2 != L"global_str2" ) return 2;
XStringW str(L"1");
if ( str != L"1" ) return 3;
str.StrCat(L"2");
if ( str != L"12" ) return 4;
XStringW str2;
if ( str2.NotNull() ) return 10;
str2.StrnCpy(str.data(), 2);
if ( str2 != L"12" ) return 11;
str2.StrnCat(L"345", 2);
if ( str2 != L"1234" ) return 12;
str2.Insert(1, str);
if ( str2 != L"112234" ) return 13;
str2 += L"6";
if ( str2 != L"1122346" ) return 14;
//wchar_t c2 = L'Ň';
//printf("1=%lc\n", c2);
//const char* s1 = "𐌾";
str2.SPrintf("%c", 'a'); // signle UTF8 ascii char
if ( str2 != L"a" ) return 20;
str2.SPrintf("%ls", L"ab"); // UTF16(32) string containing ascii char
if ( str2 != L"ab" ) return 21;
str2.SPrintf("%lc", L'Ň'); // signe UTF16(32) char. (2 bytes in total if UTF16)
if ( str2 != L"Ň" ) return 22;
str2.SPrintf("%s", "Ň"); // this is a UTF8 string 2 bytes long
if ( str2 != L"Ň" ) return 23;
#if __WCHAR_MAX__ > 0xFFFFu
str2.SPrintf("%lc", L'𐌾'); // L'𐌾' // this char cannot convert to an UTF16 char. So it doesn't compile with -fshort-wchar
if ( str2 != L'𐌾' ) return 30;
#endif
str2.SPrintf("%ls", L"𐌾"); // this is a UTF8 string 4 bytes long
if ( str2 != L"𐌾" ) return 31;
str2.SPrintf("%ls", L"𐌾"); // this is a UTF16 or UTF32 string (depending of -fshort-wchar)
if ( str2 != L"𐌾" ) return 32;
{ {
XStringW str(L"1"); XStringW str3("a");
if ( str != L"1" ) return 1; if ( str3 != L"a" ) return 40;
str.StrCat(L"2"); XStringW str4("aŇ𐌾");
if ( str != L"12" ) return 1; if ( str4 != L"aŇ𐌾" ) return 41;
XStringW str2;
if ( str2.NotNull() ) return 10;
str2.StrnCpy(str.data(), 2);
if ( str2 != L"12" ) return 11;
str2.StrnCat(L"345", 2);
if ( str2 != L"1234" ) return 12;
str2.Insert(1, str);
if ( str2 != L"112234" ) return 13;
str2 += L"6";
if ( str2 != L"1122346" ) return 14;
} }
return 0; return 0;
} }