565 lines
14 KiB
C++
565 lines
14 KiB
C++
// #NOTE: All string building, printing and copying operations SHOULD null-terminate the
|
|
// strings for backwards compatibility reasons. #FIX if something doesn't follow this rule!
|
|
bool is_valid (string s) {
|
|
return (s.data != nullptr && s.count > 0);
|
|
}
|
|
|
|
bool is_c_string (string s) {
|
|
return (s.data && s.data[s.count] == '\0');
|
|
}
|
|
|
|
u8* to_c_string (string s) {
|
|
u8* result = (u8*)internal_alloc(s.count + 1);
|
|
|
|
memcpy(result, s.data, s.count);
|
|
result[s.count] = '\0';
|
|
|
|
return result;
|
|
}
|
|
|
|
string copy_string (string s) {
|
|
// Assert(s.count > 0);
|
|
if (s.count <= 0)
|
|
return "";
|
|
string str = {};
|
|
|
|
str.count = s.count;
|
|
str.data = (u8*)internal_alloc(s.count + 1);
|
|
|
|
memcpy(str.data, s.data, s.count);
|
|
|
|
str.data[str.count] = '\0'; // null-terminate for backwards compatibility?
|
|
|
|
return str;
|
|
}
|
|
|
|
force_inline string copy_string (Allocator allocator, string s) {
|
|
Assert(allocator.proc != nullptr);
|
|
push_allocator(allocator);
|
|
return copy_string(s);
|
|
}
|
|
|
|
// we should have a global atomically incrementing for "untracked" and "no context" allocations.
|
|
string copy_string_untracked (string s) { // #unsafe
|
|
if (s.count <= 0)
|
|
return "";
|
|
string str = {};
|
|
|
|
str.count = s.count;
|
|
str.data = (u8*)Aligned_Alloc(s.count + 1, default_allocator_default_alignment);
|
|
|
|
memcpy(str.data, s.data, s.count);
|
|
|
|
str.data[str.count] = '\0'; // null-terminate for backwards compatibility?
|
|
|
|
return str;
|
|
}
|
|
|
|
void string_free_untracked (string s) { // #unsafe
|
|
Aligned_Free(s.data);
|
|
}
|
|
|
|
string copy_string_no_context (string s) {
|
|
if (s.count <= 0)
|
|
return "";
|
|
string str = {};
|
|
|
|
str.count = s.count;
|
|
str.data = (u8*)default_allocator_new(s.count + 1);
|
|
|
|
memcpy(str.data, s.data, s.count);
|
|
|
|
str.data[str.count] = '\0'; // null-terminate for backwards compatibility?
|
|
|
|
return str;
|
|
}
|
|
|
|
string copy_string (char* c_string) {
|
|
string str = {};
|
|
s64 string_length = strlen(c_string);
|
|
if (string_length == 0)
|
|
return "";
|
|
|
|
str.data = NewArray<u8>(string_length + 1);
|
|
memcpy(str.data, c_string, string_length);
|
|
str.count = string_length;
|
|
|
|
str.data[str.count] = '\0'; // null-terminate for backwards compatibility?
|
|
|
|
return str;
|
|
}
|
|
|
|
string to_string (ArrayView<u8> str) {
|
|
return {str.count, str.data};
|
|
}
|
|
|
|
ArrayView<u8> to_view (string s) {
|
|
return {s.count, s.data};
|
|
}
|
|
|
|
void string_free (string& s) {
|
|
internal_free(s.data);
|
|
|
|
s.data = nullptr;
|
|
s.count = 0;
|
|
}
|
|
|
|
void string_free_no_context (string& s) {
|
|
default_allocator_free(s.data);
|
|
|
|
s.data = nullptr;
|
|
s.count = 0;
|
|
}
|
|
|
|
force_inline string string_view (string s, s64 start_index, s64 view_count) {
|
|
Assert(view_count >= 0); Assert(start_index >= 0);
|
|
if (view_count < 0 || start_index < 0 || start_index >= s.count) return "";
|
|
|
|
s64 new_count = view_count;
|
|
if (start_index + view_count > s.count) {
|
|
new_count = s.count - start_index;
|
|
}
|
|
|
|
return { new_count, s.data + start_index };
|
|
}
|
|
|
|
bool strings_match (string first_string, string second_string) {
|
|
return (first_string == second_string);
|
|
}
|
|
|
|
// #Unicode
|
|
string wide_to_utf8 (u16* source, s32 length) {
|
|
if (length == 0) return { };
|
|
|
|
s32 query_result = WideCharToMultiByte(CP_UTF8, 0, (LPCWCH)source, length,
|
|
nullptr, 0, nullptr, nullptr);
|
|
|
|
if (query_result <= 0) return { };
|
|
|
|
// Make room for a null terminator:
|
|
if (length != -1) {
|
|
query_result += 1;
|
|
}
|
|
|
|
u8* memory = NewArray<u8>(query_result);
|
|
|
|
string utf8_string;
|
|
utf8_string.count = query_result - 1; // null terminator is not counted
|
|
utf8_string.data = memory;
|
|
|
|
s32 result = WideCharToMultiByte(CP_UTF8, 0, (LPCWCH)source, length,
|
|
(LPSTR)memory, query_result, nullptr, nullptr);
|
|
if (result <= 0) {
|
|
internal_free(memory);
|
|
return { };
|
|
}
|
|
|
|
return utf8_string;
|
|
}
|
|
|
|
force_inline string wide_to_utf8 (wstring wstr) {
|
|
return wide_to_utf8(wstr.data, (s32)wstr.count);
|
|
}
|
|
|
|
wstring utf8_to_wide (string source) {
|
|
if (!source) return {};
|
|
s32 query_num_chars = MultiByteToWideChar(CP_UTF8, 0,
|
|
(LPCCH)source.data, (s32)source.count, // @Robustness: Silent failure if too long. @Cleanup.
|
|
nullptr, 0);
|
|
if (query_num_chars <= 0) return {};
|
|
|
|
wstring name_u16s = wstring(query_num_chars);
|
|
s32 result_num_chars = MultiByteToWideChar(CP_UTF8, 0,
|
|
(LPCCH)source.data, (s32)source.count, // @Robustness: Silent failure if too long. @Cleanup.
|
|
(LPWSTR)name_u16s.data, query_num_chars);
|
|
|
|
if (!result_num_chars) {
|
|
internal_free(name_u16s.data);
|
|
return {};
|
|
}
|
|
|
|
Assert(result_num_chars <= query_num_chars);
|
|
name_u16s.data[result_num_chars] = 0; // null terminate
|
|
|
|
return name_u16s;
|
|
}
|
|
|
|
force_inline string format_string_internal (string format, va_list args) {
|
|
string str = {};
|
|
|
|
str.count = (s64)vsnprintf(nullptr, (u64)0, (char*)format.data, args);
|
|
|
|
if (thread_context() != nullptr) {
|
|
str.data = NewArray<u8>(str.count + 1);
|
|
} else {
|
|
fprintf(stderr, "[Warning] Calling `format_string` without a valid context; falling back to default_allocator\n");
|
|
str.data = (u8*)default_allocator_new(str.count + 1);
|
|
}
|
|
|
|
// Note that vsnprintf always produces a null-terminated result!
|
|
str.count = (s64)vsnprintf((char*)str.data, (size_t)(str.count + 1), (char*)format.data, args);
|
|
|
|
return str;
|
|
}
|
|
|
|
string format_string (Allocator allocator, char* format, ...) { // #sprint
|
|
push_allocator(allocator);
|
|
|
|
// only for debugging, we should only be calling this with a valid thread_local context
|
|
// and a valid allocator.
|
|
Assert(thread_context() != nullptr);
|
|
Assert(allocator.proc != nullptr);
|
|
|
|
va_list args;
|
|
va_start(args, format);
|
|
string s = format_string_internal(format, args);
|
|
va_end(args);
|
|
|
|
return s;
|
|
}
|
|
|
|
string format_string (char* format, ...) { // #sprint
|
|
va_list args;
|
|
va_start(args, format);
|
|
string s = format_string_internal(format, args);
|
|
va_end(args);
|
|
|
|
return s;
|
|
}
|
|
|
|
force_inline String_Builder* new_string_builder (Arena_Reserve new_reserve, string label) {
|
|
return arena_array_new<u8>(1, new_reserve, label);
|
|
}
|
|
|
|
force_inline void append (String_Builder* sb, string s) {
|
|
array_add(*sb, ArrayView<u8>(s.count, s.data));
|
|
}
|
|
|
|
void append (String_Builder* sb, ArrayView<string> strings) {
|
|
s64 combined_length = 0;
|
|
for (s64 i = 0; i < strings.count; i += 1) {
|
|
combined_length += strings[i].count;
|
|
}
|
|
|
|
s64 final_length = sb->count + combined_length;
|
|
|
|
if (sb->allocated < final_length) {
|
|
array_reserve(*sb, final_length);
|
|
}
|
|
|
|
for (s64 i = 0; i < strings.count; i += 1) {
|
|
string s = strings[i];
|
|
array_add(*sb, ArrayView<u8>(s.count, s.data));
|
|
}
|
|
}
|
|
|
|
force_inline void append_no_add (String_Builder* sb, string s) {
|
|
array_add(*sb, ArrayView<u8>(s.count, s.data));
|
|
sb->count -= s.count;
|
|
}
|
|
|
|
// Unfortunately this follows the printf format, which is annoying.
|
|
// I'd rather have something like fmt::
|
|
void print_to_builder_internal (String_Builder* sb, string format, va_list args) {
|
|
s64 expected_final_count = max_array_size(*sb);// amount to reserve
|
|
|
|
if (sb->allocated < expected_final_count) {
|
|
array_reserve(*sb, expected_final_count);
|
|
}
|
|
|
|
s64 buffer_size = sb->allocated - sb->count; // available space
|
|
u8* current_point = &sb->data[sb->count];
|
|
|
|
s64 print_count = (s64)vsnprintf((char*)current_point, (size_t)buffer_size, (char*)format.data, args);
|
|
|
|
// maybe truncate ArenaArray to size so we're not committing a large block of memory?
|
|
// This could be quite slow, so we just hold onto the memory.
|
|
|
|
sb->count += print_count;
|
|
}
|
|
|
|
void print_to_builder (String_Builder* sb, string format, ...) {
|
|
s64 expected_final_count = max_array_size(*sb);
|
|
|
|
if (sb->allocated < expected_final_count) {
|
|
array_reserve(*sb, expected_final_count);
|
|
}
|
|
|
|
s64 buffer_size = sb->allocated - sb->count; // available space
|
|
u8* current_point = &sb->data[sb->count];
|
|
|
|
va_list args;
|
|
va_start(args, format);
|
|
s64 print_count = (s64)vsnprintf((char*)current_point, (size_t)buffer_size, (char*)format.data, args);
|
|
va_end(args);
|
|
|
|
sb->count += print_count;
|
|
}
|
|
|
|
string string_view (String_Builder* sb) {
|
|
// should probably ensure final byte is null terminated...
|
|
append_no_add(sb, "\0"); // doesn't increment sb.count
|
|
return to_string(to_view(*sb));
|
|
}
|
|
|
|
// for when we want to keep the string builder around and recycle the memory.
|
|
internal force_inline void reset_string_builder (String_Builder* sb, bool keep_memory) {
|
|
array_poison_range(*sb, 0, sb->count);
|
|
if (keep_memory) {
|
|
reset_keeping_memory(*sb);
|
|
} else {
|
|
array_reset(*sb);
|
|
}
|
|
}
|
|
|
|
force_inline string builder_to_string (String_Builder* sb) { // #copy_string
|
|
string final_string = copy_string(to_string(to_view(*sb)));
|
|
|
|
return final_string;
|
|
}
|
|
|
|
internal force_inline void free_string_builder (String_Builder* sb) {
|
|
arena_array_free(*sb);
|
|
}
|
|
|
|
char is_ascii_letter (char c) {
|
|
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
|
}
|
|
|
|
char to_lower_ascii (char c) {
|
|
if (c >= 'A' && c <= 'Z')
|
|
c = c + ('a' - 'A'); // or c += 32;
|
|
return c;
|
|
}
|
|
|
|
char to_upper_ascii (char c) {
|
|
if (c >= 'a' && c <= 'z')
|
|
c = c - ('a' - 'A'); // or c -= 32;
|
|
return c;
|
|
}
|
|
|
|
force_inline bool is_upper_ascii (char c) {
|
|
return (c >= 'A' && c <= 'Z');
|
|
}
|
|
|
|
force_inline bool is_lower_ascii (char c) {
|
|
return (c >= 'a' && c <= 'z');
|
|
}
|
|
|
|
// string to_lower_in_place (string s) { }
|
|
// Input must be ascii or utf8!
|
|
string to_lower_copy (string s_orig) {
|
|
string s = copy_string(s_orig);
|
|
for (s64 i = 0; i < s.count; i += 1) {
|
|
s.data[i] = to_lower_ascii(s.data[i]);
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
#define format_cstring(fmt, ...) \
|
|
(char*)format_string(fmt, ##__VA_ARGS__).data
|
|
|
|
bool is_any (u8 c, string chars) {
|
|
for_each(i, chars) {
|
|
if (chars.data[i] == c) return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
string trim_right (string s, string chars, bool replace_with_zeros) {
|
|
s64 count = s.count;
|
|
|
|
for_each_reverse(i, s) {
|
|
if (is_any(s.data[i], chars)) {
|
|
if (replace_with_zeros) {
|
|
s.data[i] = 0;
|
|
}
|
|
count -= 1;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return string_view(s, 0, count);
|
|
}
|
|
|
|
string trim_left (string s, string chars) {
|
|
s64 count = 0;
|
|
|
|
for_each(i, s) {
|
|
if (is_any(s.data[i], chars)) {
|
|
count += 1;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return string_view(s, count, s.count - count);
|
|
}
|
|
|
|
string trim (string s, string chars) {
|
|
string left_trim = trim_left(s, chars);
|
|
return trim_right(left_trim, chars, false);
|
|
}
|
|
|
|
force_inline u32 ctz32(u32 x) {
|
|
#if COMPILER_MSVC
|
|
return _tzcnt_u32(x);
|
|
#else
|
|
return __builtin_ctz(x);
|
|
#endif
|
|
}
|
|
|
|
s64 find_index_of_any_from_right (string s, string bytes) {
|
|
s64 cursor = s.count-1;
|
|
while (cursor >= 0) {
|
|
if (is_any(s[cursor], bytes)) return cursor;
|
|
cursor -= 1;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
s64 find_index_from_left (string s, u8 c, s64 start_offset) {
|
|
// #NOTE (confusing!): start_offset is the start of where we want to scan from,
|
|
// and the returned offset is based on the START of s.data, not from start_offset!
|
|
u8* start_p = s.data + start_offset;
|
|
s64 len = s.count - start_offset;
|
|
|
|
__m256i vneedle = _mm256_set1_epi8((char)c);
|
|
/** AVX-512BW: __m512i v = _mm512_loadu_si512(p); __mmask64 k = _mm512_cmpeq_epi8_mask(v, vneedle); if (k) return p + _tzcnt_u64(k); */
|
|
s64 i = 0;
|
|
for (; i + 32 <= s.count; i += 32) {
|
|
// Load 256-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
|
|
__m256i v = _mm256_loadu_si256((__m256i*)(start_p + i));
|
|
// Compare packed 8-bit integers in a and b for equality, and store the results in dst.
|
|
__m256i cmp = _mm256_cmpeq_epi8(v, vneedle);
|
|
// Create mask from the most significant bit of each 8-bit element in a, and store the result in dst.
|
|
u32 mask = _mm256_movemask_epi8(cmp);
|
|
|
|
if (mask) { return start_offset + i + ctz32(mask); }
|
|
}
|
|
|
|
// scalar tail:
|
|
for (; i < len; i += 1) {
|
|
if (start_p[i] == c) return start_offset + i;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
internal bool split_by_char (string s, char c, string* a, string* b) {
|
|
s64 index = find_index_from_left(s, c);
|
|
if (index == -1) {
|
|
(*a) = s;
|
|
(*b) = {};
|
|
return false;
|
|
}
|
|
|
|
(*a) = trim({index, s.data});
|
|
(*b) = trim({s.count-index, s.data + index + 1});
|
|
return true;
|
|
}
|
|
|
|
ArrayView<string> string_split (string s, u8 c) {
|
|
Array<string> results;
|
|
|
|
string remainder = s;
|
|
while (remainder.count) {
|
|
string left; string right;
|
|
bool found = split_by_char(remainder, c, &left, &right);
|
|
if (found) {
|
|
array_add(results, left);
|
|
} else {
|
|
array_add(results, remainder);
|
|
break;
|
|
}
|
|
|
|
remainder = right;
|
|
}
|
|
|
|
if (!remainder) array_add(results, string("")); // We want 'split' to return an unambiguous result (was there a match at the end or not?), and to be the inverse of 'join'. For this to happen, if there was a match at the very end, we just add an empty string. Maybe there is a more elegant way to structure the loop to make this happen.
|
|
|
|
return results;
|
|
}
|
|
|
|
void replace_chars (string s, string chars, u8 replacement) {
|
|
for_each(i, s) {
|
|
if (is_any(s[i], chars)) {
|
|
s[i] = replacement;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool begins_with (string s, string prefix) {
|
|
if (s.count < prefix.count) return false;
|
|
|
|
string t = string_view(s, 0, prefix.count);
|
|
|
|
return (t == prefix);
|
|
}
|
|
|
|
// #path manipulation:
|
|
string path_filename (string path) {
|
|
s64 index = find_index_of_any_from_right(path, "\\/");
|
|
if (index == -1) return path;
|
|
index += 1;
|
|
return string_view(path, index, path.count - index);
|
|
}
|
|
|
|
string path_strip_filename (string path) {
|
|
s64 index = find_index_of_any_from_right(path, "\\/");
|
|
if (index == -1) return "";
|
|
|
|
return string_view(path, 0, index + 1);
|
|
}
|
|
|
|
#if OS_WINDOWS
|
|
string strip_leading_backslash (string path) {
|
|
if (path == "") return "";
|
|
if (path[0] == '\\') {
|
|
return string_view(path, 1, path.count - 1);
|
|
}
|
|
|
|
return path;
|
|
}
|
|
|
|
bool is_absolute_path (string path) {
|
|
if (path.count < 3) return false;
|
|
return path[1] == ':' && (path[2] == '/' || path[2] == '\\');
|
|
}
|
|
#endif
|
|
|
|
bool path_extension (string path, string* ext) {
|
|
(*ext) = "";
|
|
s64 index = find_index_of_any_from_right(path, ".\\/");
|
|
|
|
if (index <= 0) {
|
|
return false;
|
|
}
|
|
|
|
if (path[index] != '.') {
|
|
return false;
|
|
}
|
|
|
|
u8 previous = path[index-1];
|
|
if ( (previous == '\\') || (previous == '/') ) {
|
|
return false;
|
|
}
|
|
|
|
if (previous == '.') {
|
|
if (index == 1) return false;
|
|
|
|
u8 two_previous = path[index-2];
|
|
if (two_previous == '\\' || two_previous == '/') {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
(*ext) = string_view(path, index + 1, path.count - index - 1);
|
|
return true;
|
|
}
|
|
|