Reviewed-on: #2 Co-authored-by: Musa Mahmood <Musasmahmood@gmail.com> Co-committed-by: Musa Mahmood <Musasmahmood@gmail.com>
53 lines
1.8 KiB
C++
53 lines
1.8 KiB
C++
constexpr u8 trailing_bytes_for_utf8[] = {
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5};
|
|
constexpr u32 UNI_REPLACEMENT_CHAR = 0x0000FFFD;
|
|
constexpr u8 utf8_inital_byte_mask[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
|
|
constexpr u32 UNI_MAX_UTF32 = 0x7FFFFFFF;
|
|
|
|
bool character_utf8_to_utf32 (u8* data, s64 source_length, u32* utf32, s64* source_length_out) {
|
|
u8 first_character = data[0];
|
|
s32 continuation_bytes = trailing_bytes_for_utf8[first_character];
|
|
|
|
if ((continuation_bytes + 1) > source_length) {
|
|
(*utf32) = UNI_REPLACEMENT_CHAR;
|
|
(*source_length_out) = source_length;
|
|
return false;
|
|
}
|
|
|
|
u32 ch = data[0] & utf8_inital_byte_mask[continuation_bytes];
|
|
|
|
for (s64 i = 1; i < continuation_bytes + 1; i += 1) {
|
|
ch = ch << 6;
|
|
//if strict ...
|
|
ch |= data[i] & 0x3F;
|
|
}
|
|
|
|
// #if strict... {}
|
|
(*utf32) = ch;
|
|
(*source_length_out) = continuation_bytes + 1;
|
|
|
|
if (ch > UNI_MAX_UTF32) {
|
|
(*utf32) = UNI_REPLACEMENT_CHAR;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool next_utf8_to_utf32 (string& s, u32* utf32_char_out) {
|
|
s64 codepoint_source_length;
|
|
bool success = character_utf8_to_utf32(s.data, s.count, utf32_char_out, &codepoint_source_length);
|
|
|
|
s.data += codepoint_source_length;
|
|
s.count -= codepoint_source_length;
|
|
|
|
Assert(s.count >= 0);
|
|
return success;
|
|
}
|