From ff91406362f0129ad2cabab2e0a3f9c4433f1fe2 Mon Sep 17 00:00:00 2001 From: jief Date: Wed, 15 Apr 2020 20:28:59 +0300 Subject: [PATCH] Update unicode_conversions --- .../CloverX64.xcodeproj/project.pbxproj | 8 - .../cpp_tests.xcodeproj/project.pbxproj | 2 - Xcode/cpp_tests/src/Platform.cpp | 4 +- rEFIt_UEFI/Platform/Posix/posix.h | 1 - rEFIt_UEFI/Platform/Posix/string.h | 5 + rEFIt_UEFI/cpp_foundation/XStringW.cpp | 2 +- .../cpp_foundation/unicode_conversions.cpp | 988 +++++++++++++----- .../cpp_foundation/unicode_conversions.h | 273 ++++- rEFIt_UEFI/cpp_unit_test/poolprint-test.cpp | 2 +- rEFIt_UEFI/cpp_unit_test/printf_lite-test.cpp | 2 +- rEFIt_UEFI/cpp_unit_test/printlib-test.cpp | 2 +- 11 files changed, 1008 insertions(+), 281 deletions(-) diff --git a/Xcode/CloverX64/CloverX64.xcodeproj/project.pbxproj b/Xcode/CloverX64/CloverX64.xcodeproj/project.pbxproj index 1f4184f4a..bb19dc1ff 100644 --- a/Xcode/CloverX64/CloverX64.xcodeproj/project.pbxproj +++ b/Xcode/CloverX64/CloverX64.xcodeproj/project.pbxproj @@ -35,10 +35,6 @@ 9A105B2624464A830006DE06 /* VectorGraphics.h in Headers */ = {isa = PBXBuildFile; fileRef = 9A105B2224464A830006DE06 /* VectorGraphics.h */; }; 9A105B2724464A830006DE06 /* VectorGraphics.h in Headers */ = {isa = PBXBuildFile; fileRef = 9A105B2224464A830006DE06 /* VectorGraphics.h */; }; 9A105B2824464A830006DE06 /* VectorGraphics.h in Headers */ = {isa = PBXBuildFile; fileRef = 9A105B2224464A830006DE06 /* VectorGraphics.h */; }; - 9A105B2924464A830006DE06 /* ftol.asm in Sources */ = {isa = PBXBuildFile; fileRef = 9A105B2324464A830006DE06 /* ftol.asm */; }; - 9A105B2A24464A830006DE06 /* ftol.asm in Sources */ = {isa = PBXBuildFile; fileRef = 9A105B2324464A830006DE06 /* ftol.asm */; }; - 9A105B2B24464A830006DE06 /* ftol.asm in Sources */ = {isa = PBXBuildFile; fileRef = 9A105B2324464A830006DE06 /* ftol.asm */; }; - 9A105B2C24464A830006DE06 /* ftol.asm in Sources */ = {isa = PBXBuildFile; fileRef = 9A105B2324464A830006DE06 /* ftol.asm */; }; 9A105B2D24464A830006DE06 /* BmLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 9A105B2424464A830006DE06 /* BmLib.h */; }; 9A105B2E24464A830006DE06 /* BmLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 9A105B2424464A830006DE06 /* BmLib.h */; }; 9A105B2F24464A830006DE06 /* BmLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 9A105B2424464A830006DE06 /* BmLib.h */; }; @@ -2121,7 +2117,6 @@ 9AC77FC224176C04005CDD5C /* XObjArray_tests.cpp in Sources */, 9AC7808224176C04005CDD5C /* memory.cpp in Sources */, 9AC7803B24176C04005CDD5C /* cpu.cpp in Sources */, - 9A105B2924464A830006DE06 /* ftol.asm in Sources */, 9AC7800124176C04005CDD5C /* LegacyBoot.cpp in Sources */, 9AC7807524176C04005CDD5C /* REFIT_MENU_SCREEN.cpp in Sources */, 9AC7801F24176C04005CDD5C /* LegacyBiosThunk.cpp in Sources */, @@ -2239,7 +2234,6 @@ 9ACFE6E924309AF80071CC93 /* XObjArray_tests.cpp in Sources */, 9ACFE6EB24309AF80071CC93 /* memory.cpp in Sources */, 9ACFE6EC24309AF80071CC93 /* cpu.cpp in Sources */, - 9A105B2C24464A830006DE06 /* ftol.asm in Sources */, 9ACFE6ED24309AF80071CC93 /* LegacyBoot.cpp in Sources */, 9ACFE6EE24309AF80071CC93 /* REFIT_MENU_SCREEN.cpp in Sources */, 9ACFE6EF24309AF80071CC93 /* LegacyBiosThunk.cpp in Sources */, @@ -2357,7 +2351,6 @@ 9AF41621242CD75C00D2644C /* memory.cpp in Sources */, 9AF41622242CD75C00D2644C /* cpu.cpp in Sources */, 9AF41623242CD75C00D2644C /* LegacyBoot.cpp in Sources */, - 9A105B2A24464A830006DE06 /* ftol.asm in Sources */, 9AF41624242CD75C00D2644C /* REFIT_MENU_SCREEN.cpp in Sources */, 9AF41625242CD75C00D2644C /* LegacyBiosThunk.cpp in Sources */, 9A09863224389A6A00826276 /* menu.cpp in Sources */, @@ -2475,7 +2468,6 @@ 9AF416FE242CDA5800D2644C /* memory.cpp in Sources */, 9AF416FF242CDA5800D2644C /* cpu.cpp in Sources */, 9AF41700242CDA5800D2644C /* LegacyBoot.cpp in Sources */, - 9A105B2B24464A830006DE06 /* ftol.asm in Sources */, 9AF41701242CDA5800D2644C /* REFIT_MENU_SCREEN.cpp in Sources */, 9AF41702242CDA5800D2644C /* LegacyBiosThunk.cpp in Sources */, 9A09863324389A6A00826276 /* menu.cpp in Sources */, diff --git a/Xcode/cpp_tests/cpp_tests.xcodeproj/project.pbxproj b/Xcode/cpp_tests/cpp_tests.xcodeproj/project.pbxproj index 1ef463c8c..97d982119 100644 --- a/Xcode/cpp_tests/cpp_tests.xcodeproj/project.pbxproj +++ b/Xcode/cpp_tests/cpp_tests.xcodeproj/project.pbxproj @@ -85,7 +85,6 @@ 9ACAB117242623EE00BDB3CF /* printf_lite.c in Sources */ = {isa = PBXBuildFile; fileRef = 9ACAB116242623EE00BDB3CF /* printf_lite.c */; }; 9ACAB1192426255C00BDB3CF /* printf_lite.c in Sources */ = {isa = PBXBuildFile; fileRef = 9ACAB116242623EE00BDB3CF /* printf_lite.c */; }; 9ACAB11A2426255C00BDB3CF /* printf_lite.c in Sources */ = {isa = PBXBuildFile; fileRef = 9ACAB116242623EE00BDB3CF /* printf_lite.c */; }; - 9AF41576242CBE7600D2644C /* poolprint-test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9AF41571242CBE7600D2644C /* poolprint-test.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -536,7 +535,6 @@ 9A0B08562402FF7700E2B470 /* XStringWArray.cpp in Sources */, 9A28CD0C241B8DD400F3D247 /* strcmp_test.cpp in Sources */, 9A28CD09241B8DD400F3D247 /* strncmp_test.cpp in Sources */, - 9AF41576242CBE7600D2644C /* poolprint-test.cpp in Sources */, 9AC780AE2417DD2F005CDD5C /* panic.cpp in Sources */, 9A0B085F240308E400E2B470 /* XStringW.cpp in Sources */, 9A0B08592402FF8200E2B470 /* all_tests.cpp in Sources */, diff --git a/Xcode/cpp_tests/src/Platform.cpp b/Xcode/cpp_tests/src/Platform.cpp index 10ed8f316..0534e4689 100755 --- a/Xcode/cpp_tests/src/Platform.cpp +++ b/Xcode/cpp_tests/src/Platform.cpp @@ -81,11 +81,11 @@ void PauseForKey(const wchar_t* msg) UINTN StrLen(const wchar_t* String) { - return wchar_len(String); + return size_of_utf_string(String); } UINTN StrLen(const char16_t* String) { - return char16_len(String); + return size_of_utf_string(String); } diff --git a/rEFIt_UEFI/Platform/Posix/posix.h b/rEFIt_UEFI/Platform/Posix/posix.h index 4c413ddf7..5655bb0bf 100755 --- a/rEFIt_UEFI/Platform/Posix/posix.h +++ b/rEFIt_UEFI/Platform/Posix/posix.h @@ -21,7 +21,6 @@ // Macros that directly map functions to BaseLib, BaseMemoryLib, and DebugLib functions // originally from OpensslLib -#define memcpy(dest,source,count) CopyMem(dest,source,(UINTN)(count)) #define memset(dest,ch,count) SetMem(dest,(UINTN)(count),(UINT8)(ch)) #define memchr(buf,ch,count) ScanMem8(buf,(UINTN)(count),(UINT8)ch) #define memcmp(buf1,buf2,count) (int)(CompareMem(buf1,buf2,(UINTN)(count))) diff --git a/rEFIt_UEFI/Platform/Posix/string.h b/rEFIt_UEFI/Platform/Posix/string.h index d29ae8c05..0e88830c9 100644 --- a/rEFIt_UEFI/Platform/Posix/string.h +++ b/rEFIt_UEFI/Platform/Posix/string.h @@ -10,4 +10,9 @@ inline void* memmove(void *dst, const void *src, size_t len) return CopyMem(dst, (void*)(src), len); } +inline void* memcpy(void *dst, const void *src, size_t len) +{ + return CopyMem(dst,src,len); +} + #endif diff --git a/rEFIt_UEFI/cpp_foundation/XStringW.cpp b/rEFIt_UEFI/cpp_foundation/XStringW.cpp index 7263d6d68..ef9125204 100755 --- a/rEFIt_UEFI/cpp_foundation/XStringW.cpp +++ b/rEFIt_UEFI/cpp_foundation/XStringW.cpp @@ -119,7 +119,7 @@ XStringW& XStringW::takeValueFrom(const wchar_t* S) XStringW& XStringW::takeValueFrom(const char* S) { - xsize newLen = utf8_string_wchar_count(S); + xsize newLen = utf_size_of_utf_string(m_data, S); Init(newLen); wchar_string_from_utf8_string(m_data, m_allocatedSize+1, S); // m_size doesn't count the NULL terminator SetLength(newLen); diff --git a/rEFIt_UEFI/cpp_foundation/unicode_conversions.cpp b/rEFIt_UEFI/cpp_foundation/unicode_conversions.cpp index 6f26834e4..4616e3810 100755 --- a/rEFIt_UEFI/cpp_foundation/unicode_conversions.cpp +++ b/rEFIt_UEFI/cpp_foundation/unicode_conversions.cpp @@ -6,34 +6,42 @@ #include "unicode_conversions.h" +#include // for memcpy + #ifndef MIN #define MIN(a, b) ((a) < (b) ? (a) : (b)) #endif - -size_t char16_len(const char16_t* s) -{ - const char16_t* p = s; - while ( *p++ ); - return (size_t)(p-s-1); -} - -size_t char32_len(const char32_t* s) -{ - const char32_t* p = s; - while ( *p++ ); - return (size_t)(p-s-1); -} - -size_t wchar_len(const wchar_t* s) -{ #if __WCHAR_MAX__ <= 0xFFFFu - return char16_len((const char16_t*)s); + #define wchar_cast char16_t #else - return char32_len((const char32_t*)s); + #define wchar_cast char32_t #endif -} + +#ifndef wchar_cast +#error wchar_cast +#endif + + +// +//size_t char32_len_from_wchar(const wchar_t* s) +//{ +//#if __WCHAR_MAX__ <= 0xFFFFu +// return char32_len((const char16_t*)s); +//#else +// return char32_len((const char32_t*)s); +//#endif +//} +// +//size_t wchar_len(const wchar_t* s) +//{ +//#if __WCHAR_MAX__ <= 0xFFFFu +// return char16_len((const char16_t*)s); +//#else +// return char32_len((const char32_t*)s); +//#endif +//} static inline int is_surrogate(char16_t uc) { return (uc - 0xd800u) < 2048u; } @@ -45,8 +53,6 @@ static inline char32_t surrogate_to_utf32(char16_t high, char16_t low) { } -/************************************************************* Char conversion *********************************************************/ - #define halfBase 0x0010000UL #define halfMask 0x3FFUL #define halfShift 10 /* used for shifting by 10 bits */ @@ -54,36 +60,130 @@ static inline char32_t surrogate_to_utf32(char16_t high, char16_t low) { #define UNI_SUR_LOW_START 0xDC00u +/************************************************************* utf8 - char32 *********************************************************/ -const char* get_char32_from_utf8_string(const char *s, char32_t* char32) +/* + * Size of an UTF32 char when represented in UTF8 + * Return value : size + */ +size_t utf8_size_of_utf32_char(char32_t c) { + if ( c == 0 ) return 0; + else if ( c <= 0x7f ) return 1; + else if ( c <= 0x7ff ) return 2; + else if ( c <= 0xFFFF ) return 3; + else return 4; +} + +/* + * Increment size and return a pointer to the next char + * Return value : pointer to the end of string or at the error + */ +const char32_t* utf8_size_of_utf32_char_ptr(const char32_t *s, size_t* size) { + if ( *s == 0 ) return s; + *size += utf8_size_of_utf32_char(*s++); + return s; +} + +/* + * Store an utf32 char in dst, if there is enough room (dst_max_len is >= size of utf32 char) + * If there is enough room, dst_max_len is decrement and dst is increment and returned + * If there isn't enough room, dst_max_len is set to 0 and dst is returned + */ +char* get_utf8_from_char32(char* dst, size_t* dst_max_len, char32_t utf32_char) +{ +#ifdef JIEF_DEBUG + char* dst_debug = dst; + (void)dst_debug; +#endif + if ( *dst_max_len <= 0 ) return dst; + /* assertion: utf32_char is a single UTF-4 value */ + + int bits = 0; // just to silence the warning + + if (utf32_char < 0x80) { + *dst++ = (char)utf32_char; + *dst_max_len -= 1; + bits = -6; + } + else if (utf32_char < 0x800) { + if ( *dst_max_len < 2 ) { + *dst_max_len = 0; + return dst; + } + *dst++ = (char)(((utf32_char >> 6) & 0x1F) | 0xC0); + *dst_max_len -= 1; + bits = 0; + } + else if (utf32_char < 0x10000) { + if ( *dst_max_len < 3 ) { + *dst_max_len = 0; + return dst; + } + *dst++ = (char)(((utf32_char >> 12) & 0x0F) | 0xE0); + *dst_max_len -= 1; + bits = 6; + } + else { + if ( *dst_max_len < 4 ) { + *dst_max_len = 0; + return dst; + } + *dst++ = (char)(((utf32_char >> 18) & 0x07) | 0xF0); + *dst_max_len -= 1; + bits = 12; + } + for ( ; /* *dst_max_len > 0 && */ bits >= 0 ; bits -= 6 ) { // no need to check dst_max_len, it's made before + *dst++ = (char)(((utf32_char >> bits) & 0x3F) | 0x80); + *dst_max_len -= 1; + } +#ifdef JIEF_DEBUG + if ( *dst_max_len > 0 ) *dst = 0; +#endif + return dst; +} + +/* +Number Bits for First Last Byte 1 Byte 2 Byte 3 cByte 4 +of bytes code point +1 7 U+0000 U+007F 0xxxxxxx +2 11 U+0080 U+07FF 110xxxxx 10xxxxxx +3 16 U+0800 U+FFFF 1110xxxx 10xxxxxx 10xxxxxx +4 21 U+10000 U+10FFFF[12] 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +*/ + +/* + * char32 will be set to 0 at the end of string or at error + * Return value : pointer to the end of string or at the error + */ +const char* get_char32_from_utf8_string(const char* s, char32_t* char32) +{ + if ( !*s ) { + *char32 = 0; + return s; + } char32_t c; if (*s & 0x80) { - if (*(s+1) == 0) { + if ((*(s+1) & 0xc0) != 0x80) { // 0xc0 = 0b11000000. Equivalent to if ( *(s+1) != 0x10xxxxxx ) // Finished in the middle of an utf8 multibyte char - return 0; + char32 = 0; + return s; } - if ((*(s+1) & 0xc0) != 0x80) { - s += 1; - return 0; - } - if ((*s & 0xe0) == 0xe0) { - if (*(s+2) == 0) { - // Finished in the middle of an utf8 multibyte char - return 0; - } - if ((*(s+2) & 0xc0) != 0x80) { - s += 2; - return 0; + if ((*s & 0xe0) == 0xe0) { // 0xe0 == 0b11100000. Equivalent to if ( *(s) == 0x111xxxxx ) + // Here, it's a 3 or 4 bytes + // Byte 3 has to be 0x10xxxxxx + if ((*(s+2) & 0xc0) != 0x80) { // 0xc0 = 0b11000000. Equivalent to if ( *(s+2) != 0x10xxxxxx ) + // Finished in the middle of an utf8 multibyte char + char32 = 0; + return s; } - if ((*s & 0xf0) == 0xf0) { - if (*(s+3) == 0) { + if ((*s & 0xf0) == 0xf0) { // 0xf0 = 0b1111xxxx. Equivalent to if ( *(s) == 0x1111xxxx ) + // Here, it's a 4 bytes + // Byte 4 has to be 0x10xxxxxx + if ((*s & 0xf8) != 0xf0 || (*(s+3) & 0xc0) != 0x80) { // 0xf8 = 0b11111xxx. Equivalent to if ( *(s) != 0x11110xxx || *(s+3) != 0x10xxxxxx ) // Finished in the middle of an utf8 multibyte char - return 0; - } - if ((*s & 0xf8) != 0xf0 || (*(s+3) & 0xc0) != 0x80) { - s += 3; - return 0; + char32 = 0; + return s; } /* 4-byte code */ c = char32_t((*s & 0x7) << 18); // & result type is int. We know it fits in 32 bits. Safe to cast to char32_t @@ -113,6 +213,226 @@ const char* get_char32_from_utf8_string(const char *s, char32_t* char32) return s; } + +/* + * Size in bytes of an utf32 string if it were converted to utf8 + * Return value : pointer to the end of string or at the error + */ +size_t utf8_size_of_utf32_string(const char32_t* s) +{ + if ( !s ) return 0; + size_t size = 0; + while ( *s ) s = utf8_size_of_utf32_char_ptr(s, &size); + return size; +} + +/* + * Size in bytes of an utf32 string of len char if it were converted to utf8 + * Return value : pointer to the end of string or at the error + */ +size_t utf8_size_of_utf32_string_len(const char32_t* s, size_t len) +{ + if ( !s || len <= 0 ) return 0; // <= in case size_t is signed + size_t size = 0; + while ( *s && len > 0 ) { + s = utf8_size_of_utf32_char_ptr(s, &size); + len --; + } + return size; +} + +size_t utf32_size_of_utf8_string(const char* s) +{ + if ( !s ) return 0; + size_t size = 0; + char32_t char32; + s = get_char32_from_utf8_string(s, &char32); + while ( char32 ) { + size += 1; + s = get_char32_from_utf8_string(s, &char32); + } + return size; +} + +size_t utf32_size_of_utf8_string_len(const char* s, size_t len) +{ + if ( !s || len <= 0 ) return 0; // <= in case size_t is signed + size_t size = 0; + char32_t char32; + s = get_char32_from_utf8_string(s, &char32); + while ( char32 && len > 0 ) { + size += 1; + s = get_char32_from_utf8_string(s, &char32); + len --; + } + return size; +} + + +size_t utf32_string_from_utf8_string(char32_t* dst, size_t dst_max_len, const char* s) +{ + if ( !s && dst_max_len == 0 ) return 0; + char32_t* p = dst; + char32_t* p_max = dst + dst_max_len - 1; + + char32_t char32; + s = get_char32_from_utf8_string(s, &char32); + while ( char32 != 0 && p < p_max ) { + *p++ = char32; + s = get_char32_from_utf8_string(s, &char32); + } + *p = 0; + return (size_t)(p-dst); +} + +size_t utf8_string_from_utf32_string(char* dst, size_t dst_max_len, const char32_t *s) +{ + if ( !s || dst_max_len <= 1 ) { + if ( dst_max_len > 0 ) *dst = 0; + return 0; + } + dst_max_len -= 1; + char* p = dst; + while ( *s && dst_max_len > 0 ) { + p = get_utf8_from_char32(p, &dst_max_len, *s++); + } + *p = 0; + return (size_t)(p-dst); +} + + + +/************************************************************* utf8 - char16 *********************************************************/ + +//static size_t utf16_size_of_utf32_char(char32_t c); + +/* + * Increment size + * Return value : pointer to the end of string or at the error + */ +const char16_t* utf8_size_of_utf16_char_ptr(const char16_t *s, size_t* size) { + char32_t c; + s = get_char32_from_utf16_string(s, &c); + if ( c == 0 ) return s; + *size += utf8_size_of_utf32_char(c); + return s; +} + +/* + * Size in bytes of an utf16 string if it were converted to utf8 + * Return value : pointer to the end of string or at the error + */ +size_t utf8_size_of_utf16_string(const char16_t* s) +{ + if ( !s ) return 0; + size_t size = 0; + while ( *s ) s = utf8_size_of_utf16_char_ptr(s, &size); + return size; +} + +/* + * Size in bytes of an utf16 string of len char if it were converted to utf8 + * Return value : pointer to the end of string or at the error + */ +size_t utf8_size_of_utf16_string_len(const char16_t* s, size_t len) +{ + if ( !s || len <= 0 ) return 0; // <= in case size_t is signed + size_t size = 0; + while ( *s && len > 0 ) { + s = utf8_size_of_utf16_char_ptr(s, &size); + len --; + } + return size; +} + +size_t utf16_size_of_utf8_string(const char* s) +{ + if ( !s ) return 0; + size_t size = 0; + + char32_t char32; + s = get_char32_from_utf8_string(s, &char32); + while ( char32 ) { + size += utf16_size_of_utf32_char(char32); + s = get_char32_from_utf8_string(s, &char32); + } + return size; +} + +size_t utf16_size_of_utf8_string_len(const char* s, size_t len) +{ + if ( !s || len <= 0 ) return 0; // <= in case size_t is signed + size_t size = 0; + + char32_t char32; + s = get_char32_from_utf8_string(s, &char32); + while ( char32 && len > 0 ) { + size += utf16_size_of_utf32_char(char32); + len --; + s = get_char32_from_utf8_string(s, &char32); + } + return size; +} + + + +size_t utf8_string_from_utf16_string(char* dst, size_t dst_max_len, const char16_t *s) +{ + if ( !s || dst_max_len <= 0 ) return 0; + char* p = dst; + dst_max_len -= 1; + while ( *s && dst_max_len > 0 ) { + char32_t utf32_char; + s = get_char32_from_utf16_string(s, &utf32_char); + p = get_utf8_from_char32(p, &dst_max_len, utf32_char); + } + *p = 0; + return (size_t)(p-dst); +} + + + +size_t utf16_string_from_utf8_string(char16_t* dst, size_t dst_max_len, const char* s) +{ + if ( dst_max_len == 0 ) return 0; + dst_max_len -= 1; + +// size_t dst_len = 0; + char16_t* p = dst; + char16_t* p_max = dst + dst_max_len; + + char32_t char32; + s = get_char32_from_utf8_string(s, &char32); + while ( char32 && p < p_max ) { + if ( char32 == 0 ) return (size_t)(p-dst); + char16_t char16_1, char16_2; + get_char16_from_char32(char32, &char16_1, &char16_2); + if ( char16_2 != 0 ) { + if ( p < p_max-1 ) { + *p++ = char16_1; + *p++ = char16_2; + }else{ + *p = 0; + return (size_t)(p-dst); + } + }else{ + *p++ = char16_1; + } + s = get_char32_from_utf8_string(s, &char32); + } + *p = 0; + return (size_t)(p-dst); +} + + +/************************************************************* utf16 - utf32 *********************************************************/ + +size_t utf16_size_of_utf32_char(char32_t c) +{ + if ( c <= 0xFFFF) return 1; + else return 2; +} + void get_char16_from_char32(char32_t char32, char16_t* char16_1, char16_t* char16_2) { if ( char32 <= 0xFFFF) { @@ -138,7 +458,11 @@ char32_t get_char32_from_char16(char16_t char16_1, char16_t char16_2) } } -const char16_t* get_char32_from_char16_string(const char16_t* s, char32_t* char32) +/* + * char32 will be set to 0 at the end of string or at error + * Return value : pointer to the end of string or at the error + */ +const char16_t* get_char32_from_utf16_string(const char16_t* s, char32_t* char32) { const char16_t char16_1 = *s++; if (!is_surrogate(char16_1)) { @@ -149,233 +473,64 @@ const char16_t* get_char32_from_char16_string(const char16_t* s, char32_t* char3 *char32 = surrogate_to_utf32(char16_1, *s++); return s; } else { - return 0; + *char32 = 0; + if ( !is_high_surrogate(char16_1) ) return s-1; + return s; } } } -/* - * dst_max_len MUST be >= 1 when called - */ -char* get_utf8_from_char32(char* dst, size_t* dst_max_len, char32_t utf32_char) -{ -#ifdef JIEF_DEBUG - char* dst_debug = dst; - (void)dst_debug; -#endif - /* assertion: utf32_char is a single UTF-4 value */ - /* assertion: dst_max_len >= 1 */ - int bits = 0; // just to silence the warning - - if (utf32_char < 0x80) { - *dst++ = (char)utf32_char; - *dst_max_len -= 1; - bits = -6; - } - else if (utf32_char < 0x800) { - *dst++ = (char)(((utf32_char >> 6) & 0x1F) | 0xC0); - *dst_max_len -= 1; - bits = 0; - } - else if (utf32_char < 0x10000) { - *dst++ = (char)(((utf32_char >> 12) & 0x0F) | 0xE0); - *dst_max_len -= 1; - bits = 6; - } - else { - *dst++ = (char)(((utf32_char >> 18) & 0x07) | 0xF0); - *dst_max_len -= 1; - bits = 12; - } - for ( ; *dst_max_len > 0 && bits >= 0 ; bits -= 6 ) { - *dst++ = (char)(((utf32_char >> bits) & 0x3F) | 0x80); - *dst_max_len -= 1; - } -#ifdef JIEF_DEBUG - *dst = 0; -#endif - return dst; -} -/************************************************************* utf8 - char16 *********************************************************/ - -size_t utf8_string_char16_count(const char *s) +size_t utf16_size_of_utf32_string(const char32_t *s) { if ( !s ) return 0; - size_t len = 0; - - while ( *s ) { - char32_t c; - s = get_char32_from_utf8_string(s, &c); - if ( c == 0 ) return len; - if ( c <= 0xFFFF) { - len += 1; - }else{ - len += 2; - } - } - return len; + size_t size = 0; + while ( *s ) size += utf16_size_of_utf32_char(*s++); + return size; } -size_t char16_string_from_utf8_string(char16_t* dst, size_t dst_max_len, const char *s) -{ - if ( dst_max_len == 0 ) return 0; - dst_max_len -= 1; - -// size_t dst_len = 0; - char16_t* p = dst; - char16_t* p_max = dst + dst_max_len; - - while ( *s && p < p_max ) { - char32_t c; - s = get_char32_from_utf8_string(s, &c); - if ( c == 0 ) return (size_t)(p-dst); - char16_t char16_1, char16_2; - get_char16_from_char32(c, &char16_1, &char16_2); - if ( char16_2 != 0 ) { - if ( p < p_max-1 ) { - *p++ = char16_1; - *p++ = char16_2; - }else{ - *p = 0; - return (size_t)(p-dst); - } - }else{ - *p++ = char16_1; - } - } - *p = 0; - return (size_t)(p-dst); -} - -size_t utf8_string_from_char16_string(char* dst, size_t dst_max_len, const char16_t *s) -{ - char* p = dst; - while ( *s && dst_max_len > 0 ) { - char32_t utf32_char; - s = get_char32_from_char16_string(s, &utf32_char); - p = get_utf8_from_char32(p, &dst_max_len, utf32_char); - } - *p = 0; - return (size_t)(p-dst); -} - - - -/************************************************************* utf8 - char32 *********************************************************/ - - -size_t utf8_string_char32_count(const char *s) +size_t utf16_size_of_utf32_string_len(const char32_t *s, size_t len) { if ( !s ) return 0; - size_t len = 0; - - while ( *s ) { - char32_t c; - s = get_char32_from_utf8_string(s, &c); - if ( c == 0 ) return len; - len += 1; + size_t size = 0; + while ( *s && len > 0 ) { + size += utf16_size_of_utf32_char(*s++); + len--; } - return len; + return size; } - -size_t char32_string_from_utf8_string(char32_t* dst, size_t dst_max_len, const char *s) -{ - if ( dst_max_len == 0 ) return 0; - char32_t* p = dst; - char32_t* p_max = dst + dst_max_len - 1; - - while ( *s && p < p_max ) { - char32_t c; - s = get_char32_from_utf8_string(s, &c); - if ( c == 0 ) return (size_t)(p-dst); - *p++ = c; - } - *p = 0; - return (size_t)(p-dst); -} - -size_t utf8_string_from_char32_string(char* dst, size_t dst_max_len, const char32_t *s) -{ - char* p = dst; - while ( *s && dst_max_len > 0 ) { - p = get_utf8_from_char32(p, &dst_max_len, *s++); - } - *p = 0; - return (size_t)(p-dst); -} - - -/************************************************************* utf8 - wchar *********************************************************/ - -size_t utf8_string_wchar_count(const char *s) -{ -#if __WCHAR_MAX__ <= 0xFFFFu - return utf8_string_char16_count(s); -#else - return utf8_string_char32_count(s); -#endif -} - -size_t wchar_string_from_utf8_string(wchar_t* dst, size_t dst_max_len, const char *s) -{ -#if __WCHAR_MAX__ <= 0xFFFFu - return char16_string_from_utf8_string((char16_t*)dst, dst_max_len, s); -#else - return char32_string_from_utf8_string((char32_t*)dst, dst_max_len, s); -#endif -} - -size_t utf8_string_from_wchar_string(char* dst, size_t dst_max_len, const wchar_t* s) -{ -#if __WCHAR_MAX__ <= 0xFFFFu - return utf8_string_from_char16_string(dst, dst_max_len, (char16_t*)s); -#else - return utf8_string_from_char32_string(dst, dst_max_len, (char32_t*)s); -#endif -} - - - - -/************************************************************* char16 - char32 *********************************************************/ - - -size_t char16_string_char32_count(const char16_t *s) +size_t utf32_size_of_utf16_string(const char16_t *s) { if ( !s ) return 0; - size_t len = 0; - - while ( *s ) { - char32_t c; - s = get_char32_from_char16_string(s, &c); - if ( c == 0 ) return len; - len += 1; + size_t size = 0; + char32_t char32; + s = get_char32_from_utf16_string(s, &char32); + while ( char32 ) { + size += 1; + s = get_char32_from_utf16_string(s, &char32); } - return len; + return size; } - -size_t utf32_string_to_char16_string(char32_t* dst, size_t dst_max_len, const char16_t *s) +size_t utf32_size_of_utf16_string_len(const char16_t *s, size_t len) { - if ( dst_max_len == 0 ) return 0; - char32_t* p = dst; - char32_t* p_max = dst + dst_max_len - 1; - - while ( *s && p < p_max ) { - char32_t c; - s = get_char32_from_char16_string(s, &c); - if ( c == 0 ) return (size_t)(p-dst); - *p++ = c; + if ( !s || len <= 0 ) return 0; // <= in case size_t is signed + size_t size = 0; + char32_t char32; + s = get_char32_from_utf16_string(s, &char32); + while ( char32 && len > 0 ) { + size += 1; + s = get_char32_from_utf16_string(s, &char32); + len --; } - *p = 0; - return (size_t)(p-dst); + return size; } -size_t utf16_string_to_char32_string(char16_t* dst, size_t dst_max_len, const char32_t *s) + +size_t utf16_string_from_utf32_string(char16_t* dst, size_t dst_max_len, const char32_t *s) { if ( dst_max_len == 0 ) return 0; char16_t* p = dst; @@ -399,3 +554,342 @@ size_t utf16_string_to_char32_string(char16_t* dst, size_t dst_max_len, const ch *p = 0; return (size_t)(p-dst); } + +size_t utf32_string_from_utf16_string(char32_t* dst, size_t dst_max_len, const char16_t *s) +{ + if ( dst_max_len == 0 ) return 0; + char32_t* p = dst; + char32_t* p_max = dst + dst_max_len - 1; + + while ( *s && p < p_max ) { + char32_t c; + s = get_char32_from_utf16_string(s, &c); + if ( c == 0 ) return (size_t)(p-dst); + *p++ = c; + } + *p = 0; + return (size_t)(p-dst); +} + + + +/************************************************************* utf8 - wchar_t *********************************************************/ + +size_t utf8_size_of_wchar_string(const wchar_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf8_size_of_utf16_string((wchar_cast*)s); +#else + return utf8_size_of_utf32_string((wchar_cast*)s); +#endif +} + +size_t utf8_size_of_wchar_string_len(const wchar_t* s, size_t len) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf8_size_of_utf16_string_len((wchar_cast*)s, len); +#else + return utf8_size_of_utf32_string_len((wchar_cast*)s, len); +#endif +} + +size_t wchar_size_of_utf8_string(const char* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_size_of_utf8_string(s); +#else + return utf32_size_of_utf8_string(s); +#endif +} + +size_t wchar_size_of_utf8_string_len(const char* s, size_t len) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_size_of_utf8_string_len(s, len); +#else + return utf32_size_of_utf8_string_len(s, len); +#endif +} + +size_t utf8_string_from_wchar_string(char* dst, size_t dst_max_len, const wchar_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf8_string_from_utf16_string(dst, dst_max_len, (char16_t*)s); +#else + return utf8_string_from_utf32_string(dst, dst_max_len, (char32_t*)s); +#endif +} + +size_t wchar_string_from_utf8_string(wchar_t* dst, size_t dst_max_len, const char* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_string_from_utf8_string((char16_t*)dst, dst_max_len, s); +#else + return utf32_string_from_utf8_string((char32_t*)dst, dst_max_len, s); +#endif +} + + +/************************************************************* utf16 - wchar_t *********************************************************/ + +size_t utf16_size_of_wchar_string(const wchar_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_size_of_utf16_string((wchar_cast*)s); +#else + return utf16_size_of_utf32_string((wchar_cast*)s); +#endif +} + +size_t utf16_size_of_wchar_string_len(const wchar_t* s, size_t len) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_size_of_utf16_string_len((wchar_cast*)s, len); +#else + return utf16_size_of_utf32_string_len((wchar_cast*)s, len); +#endif +} + +size_t wchar_size_of_utf16_string(const char16_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_size_of_utf16_string(s); +#else + return utf32_size_of_utf16_string(s); +#endif +} + +size_t wchar_size_of_utf16_string_len(const char16_t* s, size_t len) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_size_of_utf16_string_len(s, len); +#else + return utf32_size_of_utf16_string_len(s, len); +#endif +} + +size_t utf16_string_from_wchar_string(char16_t* dst, size_t dst_max_len, const wchar_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_string_from_utf16_string(dst, dst_max_len, (char16_t*)s); +#else + return utf16_string_from_utf32_string(dst, dst_max_len, (char32_t*)s); +#endif +} + +size_t wchar_string_from_utf16_string(wchar_t* dst, size_t dst_max_len, const char16_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_string_from_utf16_string((char16_t*)dst, dst_max_len, s); +#else + return utf32_string_from_utf16_string((char32_t*)dst, dst_max_len, s); +#endif +} + +/************************************************************* utf32 - wchar_t *********************************************************/ + + +size_t utf32_size_of_wchar_string(const wchar_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf32_size_of_utf16_string((wchar_cast*)s); +#else + return utf32_size_of_utf32_string((wchar_cast*)s); +#endif +} + +size_t utf32_size_of_wchar_string_len(const wchar_t* s, size_t len) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf32_size_of_utf16_string_len((wchar_cast*)s, len); +#else + return utf32_size_of_utf32_string_len((wchar_cast*)s, len); +#endif +} + + +size_t wchar_size_of_utf32_string(const char32_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_size_of_utf32_string(s); +#else + return utf32_size_of_utf32_string(s); +#endif +} + +size_t wchar_size_of_utf32_string_len(const char32_t* s, size_t len) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_size_of_utf32_string_len(s, len); +#else + return utf32_size_of_utf32_string_len(s, len); +#endif +} + +size_t utf32_string_from_wchar_string(char32_t* dst, size_t dst_max_len, const wchar_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf32_string_from_utf16_string(dst, dst_max_len, (char16_t*)s); +#else + return utf32_string_from_utf32_string(dst, dst_max_len, (char32_t*)s); +#endif +} + +size_t wchar_string_from_utf32_string(wchar_t* dst, size_t dst_max_len, const char32_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_string_from_utf32_string((char16_t*)dst, dst_max_len, s); +#else + return utf32_string_from_utf32_string((char32_t*)dst, dst_max_len, s); +#endif +} + + + + + + +/************************************************************* no conversion *********************************************************/ + +// Not efficient. Could be map to the ones provided by operating system +size_t utf8_size_of_utf8_string(const char* s) +{ + const char* p = s; + while ( *p++ ); + return (size_t)(p-s-1); +} + +size_t utf8_size_of_utf8_string_len(const char* s, size_t len) +{ + if ( !s || len <= 0 ) return 0; + char32_t char32 = 1; + const char* p = s; // = get_char32_from_utf8_string(s, &char32); + while ( char32 && len > 0 ) { + p = get_char32_from_utf8_string(p, &char32); + len -= 1; + } + return (uintptr_t(p)-uintptr_t(s)); +} + +size_t utf16_size_of_utf16_string(const char16_t* s) +{ + const char16_t* p = s; + while ( *p++ ); + return (size_t)(p-s-1); +} + +size_t utf16_size_of_utf16_string_len(const char16_t* s, size_t len) +{ + if ( !s ) return 0; + size_t size = 0; + char32_t char32; + s = get_char32_from_utf16_string(s, &char32); + while ( char32 && len > 0 ) { + size += utf16_size_of_utf32_char(char32); + s = get_char32_from_utf16_string(s, &char32); + len -= 1; + } + return size; +} + +size_t utf32_size_of_utf32_string(const char32_t* s) +{ + const char32_t* p = s; + while ( *p++ ); + return (size_t)(p-s-1); +} + +size_t utf32_size_of_utf32_string_len(const char32_t* s, size_t len) +{ + const char32_t* p = s; + while ( *p++ && len > 0 ) len -= 1; + return (size_t)(p-s-1); +} + +size_t wchar_size_of_wchar_string(const wchar_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_size_of_utf16_string((char16_t*)s); +#else + return utf32_size_of_utf32_string((char32_t*)s); +#endif +} + +size_t wchar_size_of_wchar_string_len(const wchar_t* s, size_t len) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_size_of_utf16_string_len((char16_t*)s, len); +#else + return utf32_size_of_utf32_string_len((char32_t*)s, len); +#endif +} + + + +size_t utf8_string_from_utf8_string(char* dst, size_t dst_max_len, const char *s) +{ + if ( !s || dst_max_len <= 1 ) { + if ( dst_max_len > 0 ) *dst = 0; + return 0; + } + dst_max_len -= 1; + char* p = dst; + char32_t char32; + s = get_char32_from_utf8_string(s, &char32); + while ( char32 && dst_max_len > 0 ) { + p = get_utf8_from_char32(p, &dst_max_len, char32); + s = get_char32_from_utf8_string(s, &char32); + } + *p = 0; + return uintptr_t(p)-uintptr_t(dst)-1; +} + +size_t utf16_string_from_utf16_string(char16_t* dst, size_t dst_max_len, const char16_t *s) +{ + if ( !s || dst_max_len <= 1 ) { + if ( dst_max_len > 0 ) *dst = 0; + return 0; + } + size_t s_len = utf16_size_of_utf16_string(s); + if ( dst_max_len > s_len ) dst_max_len = s_len; + else dst_max_len -= 1; + memcpy((void*)dst, (void*)s, dst_max_len * sizeof(char16_t)); + dst[dst_max_len] = 0; + return dst_max_len * sizeof(char16_t); +} + +size_t utf32_string_from_utf32_string(char32_t* dst, size_t dst_max_len, const char32_t *s) +{ + if ( !s || dst_max_len <= 1 ) { + if ( dst_max_len > 0 ) *dst = 0; + return 0; + } + size_t s_len = utf32_size_of_utf32_string(s); + if ( dst_max_len > s_len ) dst_max_len = s_len; + else dst_max_len -= 1; + memcpy((void*)dst, (void*)s, dst_max_len * sizeof(char32_t)); + dst[dst_max_len] = 0; + return dst_max_len * sizeof(char32_t); +} + +size_t wchar_string_from_wchar_string(wchar_t* dst, size_t dst_max_len, const wchar_t *s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return utf16_string_from_utf16_string((char16_t*)dst, dst_max_len, (char16_t*)s); +#else + return utf32_string_from_utf32_string((char32_t*)dst, dst_max_len, (char32_t*)s); +#endif +} + + +/****** convenience *****/ + +size_t length_of_wchar_string(const wchar_t* s) +{ +#if __WCHAR_MAX__ <= 0xFFFFu + return length_of_utf16_string((char16_t*)s); +#else + return length_of_utf32_string((char32_t*)s); +#endif +} + diff --git a/rEFIt_UEFI/cpp_foundation/unicode_conversions.h b/rEFIt_UEFI/cpp_foundation/unicode_conversions.h index 26ee3dbda..ce1ecb931 100644 --- a/rEFIt_UEFI/cpp_foundation/unicode_conversions.h +++ b/rEFIt_UEFI/cpp_foundation/unicode_conversions.h @@ -9,6 +9,7 @@ #include #include +#include #ifndef __cplusplus //typedef uint16_t wchar_t; @@ -16,34 +17,272 @@ typedef uint32_t char32_t; typedef uint16_t char16_t; #endif +#ifndef __WCHAR_MAX__ +#error __WCHAR_MAX__ +#endif -size_t char16_len(const char16_t* s); -size_t char32_len(const char32_t* s); -size_t wchar_len(const wchar_t* s); +#if __WCHAR_MAX__ <= 0xFFFFu + #define wchar_cast char16_t +#else + #define wchar_cast char32_t +#endif + +#ifndef wchar_cast +#error wchar_cast +#endif -char32_t get_char32_from_utf8(const char *s); +/* + * len means nb utf32 char + * size means nb of underlying native type (nb of char16_t, nb of char32_t, etc. + */ + + + +/****** utility *****/ + + +/****** utf8 - utf32 *****/ + +/* + * Size of an UTF32 char when represented in UTF8 + * Return value : size + */ +size_t utf8_size_of_utf32_char(char32_t c); + +/* + * Increment size and return a pointer to the next char + * Return value : pointer to the end of string or at the error + */ +const char32_t* utf8_size_of_utf32_char_ptr(const char32_t *s, size_t* size); + +/* + * Store an utf32 char in dst, if there is enough room (dst_max_len is >= size of utf32 char) + * If there is enough room, dst_max_len is decrement and dst is increment and returned + * If there isn't enough room, dst_max_len is set to 0 and dst is returned + */ +char* get_utf8_from_char32(char* dst, size_t* dst_max_len, char32_t utf32_char); + +/* + * char32 will be set to 0 at the end of string or at error + * Return value : pointer to the end of string or at the error + */ +const char* get_char32_from_utf8_string(const char* s, char32_t* char32); + + + +/* + * Size in bytes of an utf32 string if it were converted to utf8 + * Return value : pointer to the end of string or at the error + */ +size_t utf8_size_of_utf32_string(const char32_t* s); +/* + * Size in bytes of an utf32 string of len char if it were converted to utf8 + * Return value : pointer to the end of string or at the error + */ +size_t utf8_size_of_utf32_string_len(const char32_t* s, size_t len); + +size_t utf32_size_of_utf8_string(const char* s); +size_t utf32_size_of_utf8_string_len(const char* s, size_t len); + +size_t utf32_string_from_utf8_string(char32_t* dst, size_t dst_max_len, const char* s); +size_t utf8_string_from_utf32_string(char* dst, size_t dst_max_len, const char32_t *s); + + +/****** utf8 - utf16 *****/ + +/* + * Increment size + * Return value : pointer to the end of string or at the error + */ +const char16_t* utf8_size_of_utf16_char_ptr(const char16_t *s, size_t* size); + +size_t utf8_size_of_utf16_string(const char16_t* s); +size_t utf8_size_of_utf16_string_len(const char16_t* s, size_t len); + +size_t utf16_size_of_utf8_string(const char* s); +size_t utf16_size_of_utf8_string_len(const char* s, size_t len); + +size_t utf8_string_from_utf16_string(char* dst, size_t dst_max_len, const char16_t *s); +size_t utf16_string_from_utf8_string(char16_t* dst, size_t dst_max_len, const char* s); + + +/****** utf16 - utf32 *****/ + +size_t utf16_size_of_utf32_char(char32_t c); void get_char16_from_char32(char32_t char32, char16_t* char16_1, char16_t* char16_2); - -size_t utf8_string_char16_count(const char *src); -size_t char16_string_from_utf8_string(char16_t* dst, size_t dst_max_len, const char *s); -size_t utf8_string_from_char16_string(char* dst, size_t dst_max_len, const char16_t *s); - -size_t utf8_string_char32_count(const char *s); -size_t char32_string_from_utf8_string(char32_t* dst, size_t dst_max_len, const char *s); -size_t utf8_string_from_char32_string(char* dst, size_t dst_max_len, const char32_t *s); +char32_t get_char32_from_char16(char16_t char16_1, char16_t char16_2); +const char16_t* get_char32_from_utf16_string(const char16_t* s, char32_t* char32); -size_t utf8_string_wchar_count(const char *src); -size_t wchar_string_from_utf8_string(wchar_t* dst, size_t dst_max_len, const char *s); +size_t utf16_size_of_utf32_string(const char32_t *s); +size_t utf16_size_of_utf32_string_len(const char32_t *s, size_t len); +size_t utf32_size_of_utf16_string(const char16_t *s); +size_t utf32_size_of_utf16_string_len(const char16_t *s, size_t len); + +size_t utf16_string_from_utf32_string(char16_t* dst, size_t dst_max_len, const char32_t *s); +size_t utf32_string_from_utf16_string(char32_t* dst, size_t dst_max_len, const char16_t *s); + + +/****** utf8 - wchar_t *****/ + +size_t utf8_size_of_wchar_string(const wchar_t* s); +size_t utf8_size_of_wchar_string_len(const wchar_t* s, size_t len); +size_t wchar_size_of_utf8_string(const char* s); +size_t wchar_size_of_utf8_string_len(const char* s, size_t len); + size_t utf8_string_from_wchar_string(char* dst, size_t dst_max_len, const wchar_t* s); +size_t wchar_string_from_utf8_string(wchar_t* dst, size_t dst_max_len, const char* s); -size_t char16_string_char32_count(const char16_t *s); -size_t utf32_string_to_char16_string(char32_t* dst, size_t dst_max_len, const char16_t *s); -size_t utf16_string_to_char32_string(char16_t* dst, size_t dst_max_len, const char32_t *s); +/****** utf16 - wchar_t *****/ +size_t utf16_size_of_wchar_string(const wchar_t* s); +size_t utf16_size_of_wchar_string_len(const wchar_t* s, size_t len); +size_t wchar_size_of_utf16_string(const char16_t *s); +size_t wchar_size_of_utf16_string_len(const char16_t *s, size_t len); + +size_t utf16_string_from_wchar_string(char16_t* dst, size_t dst_max_len, const wchar_t* s); +size_t wchar_string_from_utf16_string(wchar_t* dst, size_t dst_max_len, const char16_t* s); + + +/****** utf32 - wchar_t *****/ +size_t utf32_size_of_wchar_string(const wchar_t* s); +size_t utf32_size_of_wchar_string_len(const wchar_t* s, size_t len); +size_t wchar_size_of_utf32_string(const char32_t *s); +size_t wchar_size_of_utf32_string_len(const char32_t *s, size_t len); + +size_t utf32_string_from_wchar_string(char32_t* dst, size_t dst_max_len, const wchar_t* s); +size_t wchar_string_from_utf32_string(wchar_t* dst, size_t dst_max_len, const char32_t* s); + + +/****** no conversion *****/ + +size_t utf8_size_of_utf8_string(const char* s); +size_t utf8_size_of_utf8_string_len(const char* s, size_t len); +size_t utf16_size_of_utf16_string(const char16_t* s); +size_t utf16_size_of_utf16_string_len(const char16_t* s, size_t len); +size_t utf32_size_of_utf32_string(const char32_t* s); +size_t utf32_size_of_utf32_string_len(const char32_t* s, size_t len); +size_t wchar_size_of_wchar_string(const wchar_t* s); +size_t wchar_size_of_wchar_string_len(const wchar_t* s, size_t len); + +size_t utf8_string_from_utf8_string(char* dst, size_t dst_max_len, const char *s); +size_t utf16_string_from_utf16_string(char16_t* dst, size_t dst_max_len, const char16_t *s); +size_t utf32_string_from_utf32_string(char32_t* dst, size_t dst_max_len, const char32_t *s); +size_t wchar_string_from_wchar_string(wchar_t* dst, size_t dst_max_len, const wchar_t *s); + +/****** convenience *****/ + +inline size_t length_of_utf8_string(const char* s) {return utf32_size_of_utf8_string(s); } +inline size_t length_of_utf16_string(const char16_t* s) {return utf32_size_of_utf16_string(s); } +inline size_t length_of_utf32_string(const char32_t* s) {return utf32_size_of_utf32_string(s); } // UTF32 length == size +size_t length_of_wchar_string(const wchar_t* s); + + +#ifdef __cplusplus + +inline const char* get_char32_from_string(const char* s, char32_t* char32) { return get_char32_from_utf8_string(s, char32); } +inline const char16_t* get_char32_from_string(const char16_t* s, char32_t* char32) { return get_char32_from_utf16_string(s, char32); } +inline const char32_t* get_char32_from_string(const char32_t* s, char32_t* char32) { *char32 = *s; return s+1; } +inline const wchar_t* get_char32_from_string(const wchar_t* s, char32_t* char32) { return (wchar_t*)get_char32_from_string((wchar_cast*)s, char32); } + + +inline size_t length_of_utf_string(const char* s) { return utf32_size_of_utf8_string(s); }; +inline size_t length_of_utf_string(const char16_t* s) { return utf32_size_of_utf16_string(s); }; +inline size_t length_of_utf_string(const char32_t* s) { return utf32_size_of_utf32_string(s); }; +inline size_t length_of_utf_string(const wchar_t* s) { return length_of_utf_string((wchar_cast*)s); }; +inline size_t size_of_utf_string(const char* s) { return utf8_size_of_utf8_string(s); } +inline size_t size_of_utf_string(const char16_t* s) { return utf16_size_of_utf16_string(s); } +inline size_t size_of_utf_string(const char32_t* s) { return utf32_size_of_utf32_string(s); } // for UTF32 size and length are equal +inline size_t size_of_utf_string(const wchar_t* s) { return size_of_utf_string((wchar_cast*)s); } + +inline size_t utf_size_of_utf_string(const char*, const char* s) { return utf8_size_of_utf8_string(s); } +inline size_t utf_size_of_utf_string(const char16_t*, const char* s) { return utf16_size_of_utf8_string(s); } +inline size_t utf_size_of_utf_string(const char32_t*, const char* s) { return utf32_size_of_utf8_string(s); } +inline size_t utf_size_of_utf_string(const wchar_t* t, const char* s) { return utf_size_of_utf_string((wchar_cast*)t, s); } + +inline size_t utf_size_of_utf_string(const char*, const char16_t* s) { return utf8_size_of_utf16_string(s); } +inline size_t utf_size_of_utf_string(const char16_t*, const char16_t* s) { return utf16_size_of_utf16_string(s); } +inline size_t utf_size_of_utf_string(const char32_t*, const char16_t* s) { return utf32_size_of_utf16_string(s); } +inline size_t utf_size_of_utf_string(const wchar_t* t, const char16_t* s) { return utf_size_of_utf_string((wchar_cast*)t, s); } + +inline size_t utf_size_of_utf_string(const char*, const char32_t* s) { return utf8_size_of_utf32_string(s); } +inline size_t utf_size_of_utf_string(const char16_t*, const char32_t* s) { return utf16_size_of_utf32_string(s); } +inline size_t utf_size_of_utf_string(const char32_t*, const char32_t* s) { return utf32_size_of_utf32_string(s); } +inline size_t utf_size_of_utf_string(const wchar_t* t, const char32_t* s) { return utf_size_of_utf_string((wchar_cast*)t, s); } + +inline size_t utf_size_of_utf_string(const char* t, const wchar_t* s) { return utf_size_of_utf_string(t, (wchar_cast*)s); } +inline size_t utf_size_of_utf_string(const char16_t* t, const wchar_t* s) { return utf_size_of_utf_string(t, (wchar_cast*)s); } +inline size_t utf_size_of_utf_string(const char32_t* t, const wchar_t* s) { return utf_size_of_utf_string(t, (wchar_cast*)s); } +inline size_t utf_size_of_utf_string(const wchar_t* t, const wchar_t* s) { return utf_size_of_utf_string(t, (wchar_cast*)s); } + + +inline size_t size_of_utf_string_len(const char* s, size_t len) { return utf8_size_of_utf8_string_len(s, len); } +inline size_t size_of_utf_string_len(const char16_t* s, size_t len) { return utf16_size_of_utf16_string_len(s, len); } +inline size_t size_of_utf_string_len(const char32_t* s, size_t len) { return utf32_size_of_utf32_string_len(s, len); } // for UTF32 size and length are equal +inline size_t size_of_utf_string_len(const wchar_t* s, size_t len) { return size_of_utf_string_len((wchar_cast*)s, len); } + +inline size_t utf_size_of_utf_string_len(const char*, const char* s, size_t len) { return utf8_size_of_utf8_string_len(s, len); } +inline size_t utf_size_of_utf_string_len(const char16_t*, const char* s, size_t len) { return utf16_size_of_utf8_string_len(s, len); } +inline size_t utf_size_of_utf_string_len(const char32_t*, const char* s, size_t len) { return utf32_size_of_utf8_string_len(s, len); } +inline size_t utf_size_of_utf_string_len(const wchar_t* t, const char* s, size_t len) { return utf_size_of_utf_string_len((wchar_cast*)t, s, len); } + +inline size_t utf_size_of_utf_string_len(const char*, const char16_t* s, size_t len) { return utf8_size_of_utf16_string_len(s, len); } +inline size_t utf_size_of_utf_string_len(const char16_t*, const char16_t* s, size_t len) { return utf16_size_of_utf16_string_len(s, len); } +inline size_t utf_size_of_utf_string_len(const char32_t*, const char16_t* s, size_t len) { return utf32_size_of_utf16_string_len(s, len); } +inline size_t utf_size_of_utf_string_len(const wchar_t* t, const char16_t* s, size_t len) { return utf_size_of_utf_string_len((wchar_cast*)t, s, len); } + +inline size_t utf_size_of_utf_string_len(const char*, const char32_t* s, size_t len) { return utf8_size_of_utf32_string_len(s, len); } +inline size_t utf_size_of_utf_string_len(const char16_t*, const char32_t* s, size_t len) { return utf16_size_of_utf32_string_len(s, len); } +inline size_t utf_size_of_utf_string_len(const char32_t*, const char32_t* s, size_t len) { return utf32_size_of_utf32_string_len(s, len); } +inline size_t utf_size_of_utf_string_len(const wchar_t* t, const char32_t* s, size_t len) { return utf_size_of_utf_string_len((wchar_cast*)t, s, len); } + +inline size_t utf_size_of_utf_string_len(const char* t, const wchar_t* s, size_t len) { return utf_size_of_utf_string_len((wchar_cast*)t, (wchar_cast*)s, len); } +inline size_t utf_size_of_utf_string_len(const char16_t* t, const wchar_t* s, size_t len) { return utf_size_of_utf_string_len((wchar_cast*)t, (wchar_cast*)s, len); } +inline size_t utf_size_of_utf_string_len(const char32_t* t, const wchar_t* s, size_t len) { return utf_size_of_utf_string_len((wchar_cast*)t, (wchar_cast*)s, len); } +inline size_t utf_size_of_utf_string_len(const wchar_t* t, const wchar_t* s, size_t len) { return utf_size_of_utf_string_len((wchar_cast*)t, (wchar_cast*)s, len); } + + + + + +inline size_t utf_string_from_utf_string(char* dst, size_t dst_max_len, const char* s) { return utf8_string_from_utf8_string(dst, dst_max_len, s); } +inline size_t utf_string_from_utf_string(char16_t* dst, size_t dst_max_len, const char* s) { return utf16_string_from_utf8_string(dst, dst_max_len, s); } +inline size_t utf_string_from_utf_string(char32_t* dst, size_t dst_max_len, const char* s) { return utf32_string_from_utf8_string(dst, dst_max_len, s); } +inline size_t utf_string_from_utf_string(wchar_t* dst, size_t dst_max_len, const char* s) { return utf_string_from_utf_string((wchar_cast*)dst, dst_max_len, s); } + +inline size_t utf_string_from_utf_string(char* dst, size_t dst_max_len, const char16_t *s) { return utf8_string_from_utf16_string(dst, dst_max_len, s); } +inline size_t utf_string_from_utf_string(char16_t* dst, size_t dst_max_len, const char16_t *s) { return utf16_string_from_utf16_string(dst, dst_max_len, s); } +inline size_t utf_string_from_utf_string(char32_t* dst, size_t dst_max_len, const char16_t *s) { return utf32_string_from_utf16_string(dst, dst_max_len, s); } +inline size_t utf_string_from_utf_string(wchar_t* dst, size_t dst_max_len, const char16_t *s) { return utf_string_from_utf_string((wchar_cast*)dst, dst_max_len, s); } + +inline size_t utf_string_from_utf_string(char* dst, size_t dst_max_len, const char32_t *s) { return utf8_string_from_utf32_string(dst, dst_max_len, s); } +inline size_t utf_string_from_utf_string(char16_t* dst, size_t dst_max_len, const char32_t *s) { return utf16_string_from_utf32_string(dst, dst_max_len, s); } +inline size_t utf_string_from_utf_string(char32_t* dst, size_t dst_max_len, const char32_t *s) { return utf32_string_from_utf32_string(dst, dst_max_len, s); } +inline size_t utf_string_from_utf_string(wchar_t* dst, size_t dst_max_len, const char32_t *s) { return utf_string_from_utf_string((wchar_cast*)dst, dst_max_len, s); } + +inline size_t utf_string_from_utf_string(char* dst, size_t dst_max_len, const wchar_t *s) { return utf_string_from_utf_string(dst, dst_max_len, (wchar_cast*)s); } +inline size_t utf_string_from_utf_string(char16_t* dst, size_t dst_max_len, const wchar_t *s) { return utf_string_from_utf_string(dst, dst_max_len, (wchar_cast*)s); } +inline size_t utf_string_from_utf_string(char32_t* dst, size_t dst_max_len, const wchar_t *s) { return utf_string_from_utf_string(dst, dst_max_len, (wchar_cast*)s); } +inline size_t utf_string_from_utf_string(wchar_t* dst, size_t dst_max_len, const wchar_t *s) { return utf_string_from_utf_string(dst, dst_max_len, (wchar_cast*)s); } + +#endif // __cplusplus + + + + + + + + + + + + + +#undef wchar_cast #endif /* utf816Conversion_hpp */ diff --git a/rEFIt_UEFI/cpp_unit_test/poolprint-test.cpp b/rEFIt_UEFI/cpp_unit_test/poolprint-test.cpp index b7bf43460..3b44b6380 100644 --- a/rEFIt_UEFI/cpp_unit_test/poolprint-test.cpp +++ b/rEFIt_UEFI/cpp_unit_test/poolprint-test.cpp @@ -68,7 +68,7 @@ static int testWPrintf(const char* label, const wchar_t* expectResult, int expe wchar_t* wbuf = spc.Str; va_end(valist); //delay_ms(10); - if ( memcmp(wbuf, expectResult, wchar_len(expectResult)*sizeof(expectResult[0])) != 0 ) { + if ( memcmp(wbuf, expectResult, size_of_utf_string(expectResult)*sizeof(expectResult[0])) != 0 ) { // loggf(F(" -> ERROR. Expect " PRILF " and get %ls\n"), expectResult, buf); // not using wprintf, it crashes sometimes, it doesn't work for short-wchar loggf(F("%s -> ERROR. Expect "), label); diff --git a/rEFIt_UEFI/cpp_unit_test/printf_lite-test.cpp b/rEFIt_UEFI/cpp_unit_test/printf_lite-test.cpp index 3cde729bf..615d903ef 100644 --- a/rEFIt_UEFI/cpp_unit_test/printf_lite-test.cpp +++ b/rEFIt_UEFI/cpp_unit_test/printf_lite-test.cpp @@ -75,7 +75,7 @@ static int testWPrintf(const char* label, const wchar_t* expectResult, int expe int vsnwprintf_ret = PRINTF_FUNCTION_NAME(PRINTF_CFUNCTION_PREFIX, vsnwprint, PRINTF_CFUNCTION_SUFFIX)(wbuf, sizeof(wbuf)/sizeof(wchar_t), format, valist); va_end(valist); //delay_ms(10); - if ( memcmp(wbuf, expectResult, wchar_len(expectResult)*sizeof(expectResult[0])) != 0 ) { + if ( memcmp(wbuf, expectResult, size_of_utf_string(expectResult)*sizeof(expectResult[0])) != 0 ) { // loggf(F(" -> ERROR. Expect " PRILF " and get %ls\n"), expectResult, buf); // not using wprintf, it crashes sometimes, it doesn't work for short-wchar loggf(F("%s -> ERROR. Expect "), label); diff --git a/rEFIt_UEFI/cpp_unit_test/printlib-test.cpp b/rEFIt_UEFI/cpp_unit_test/printlib-test.cpp index 2784a38b6..ba7708735 100644 --- a/rEFIt_UEFI/cpp_unit_test/printlib-test.cpp +++ b/rEFIt_UEFI/cpp_unit_test/printlib-test.cpp @@ -83,7 +83,7 @@ static int testWPrintf(const char* label, const wchar_t* expectResult, int expe UnicodeVSPrint(wbuf, sizeof(wbuf), format, valist); va_end(valist); //delay_ms(10); - if ( memcmp(wbuf, expectResult, wchar_len(expectResult)*sizeof(expectResult[0])) != 0 ) { + if ( memcmp(wbuf, expectResult, size_of_utf_string(expectResult)*sizeof(expectResult[0])) != 0 ) { // loggf(F(" -> ERROR. Expect " PRILF " and get %ls\n"), expectResult, buf); // not using wprintf, it crashes sometimes, it doesn't work for short-wchar loggf(F("%s -> ERROR. Expect "), label);