constexpr u8 trailing_bytes_for_utf8[] = {
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5};
constexpr u32 UNI_REPLACEMENT_CHAR = 0x0000FFFD;
constexpr u8 utf8_inital_byte_mask[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
constexpr u32 UNI_MAX_UTF32        = 0x7FFFFFFF;

bool character_utf8_to_utf32 (u8* data, s64 source_length, u32* utf32, s64* source_length_out) {
  u8 first_character = data[0];
  s32 continuation_bytes = trailing_bytes_for_utf8[first_character];
  
  if ((continuation_bytes + 1) > source_length) {
    (*utf32) = UNI_REPLACEMENT_CHAR;
    (*source_length_out) = source_length;
    return false;
  }
  
  u32 ch = data[0] & utf8_inital_byte_mask[continuation_bytes];
  
  for (s64 i = 1; i < continuation_bytes + 1; i += 1) {
    ch = ch << 6;
    //if strict ...
    ch |= data[i] & 0x3F;
  }
  
  // #if strict... {}
  (*utf32) = ch;
  (*source_length_out) = continuation_bytes + 1;
  
  if (ch > UNI_MAX_UTF32) {
    (*utf32) = UNI_REPLACEMENT_CHAR;
  }
  
  return true;
}

bool next_utf8_to_utf32 (string& s, u32* utf32_char_out) {
  s64 codepoint_source_length;
  bool success = character_utf8_to_utf32(s.data, s.count, utf32_char_out, &codepoint_source_length);
  
  s.data  += codepoint_source_length;
  s.count -= codepoint_source_length;
  
  Assert(s.count >= 0);
  return success;
}