// #NOTE: All string building, printing and copying operations SHOULD null-terminate the // strings for backwards compatibility reasons. #FIX if something doesn't follow this rule! bool is_valid (string s) { return (s.data != nullptr && s.count > 0); } bool is_c_string (string s) { return (s.data && s.data[s.count] == '\0'); } u8* to_c_string (string s) { u8* result = (u8*)internal_alloc(s.count + 1); memcpy(result, s.data, s.count); result[s.count] = '\0'; return result; } string copy_string (string s) { // Assert(s.count > 0); if (s.count <= 0) return ""; string str = {}; str.count = s.count; str.data = (u8*)internal_alloc(s.count + 1); memcpy(str.data, s.data, s.count); str.data[str.count] = '\0'; // null-terminate for backwards compatibility? return str; } force_inline string copy_string (Allocator allocator, string s) { Assert(allocator.proc != nullptr); push_allocator(allocator); return copy_string(s); } // we should have a global atomically incrementing for "untracked" and "no context" allocations. string copy_string_untracked (string s) { // #unsafe if (s.count <= 0) return ""; string str = {}; str.count = s.count; str.data = (u8*)Aligned_Alloc(s.count + 1, default_allocator_default_alignment); memcpy(str.data, s.data, s.count); str.data[str.count] = '\0'; // null-terminate for backwards compatibility? return str; } void string_free_untracked (string s) { // #unsafe Aligned_Free(s.data); } string copy_string_no_context (string s) { if (s.count <= 0) return ""; string str = {}; str.count = s.count; str.data = (u8*)default_allocator_new(s.count + 1); memcpy(str.data, s.data, s.count); str.data[str.count] = '\0'; // null-terminate for backwards compatibility? return str; } string copy_string (char* c_string) { string str = {}; s64 string_length = strlen(c_string); if (string_length == 0) return ""; str.data = NewArray(string_length + 1); memcpy(str.data, c_string, string_length); str.count = string_length; str.data[str.count] = '\0'; // null-terminate for backwards compatibility? return str; } string to_string (ArrayView str) { return {str.count, str.data}; } ArrayView to_view (string s) { return {s.count, s.data}; } void string_free (string& s) { internal_free(s.data); s.data = nullptr; s.count = 0; } void string_free_no_context (string& s) { default_allocator_free(s.data); s.data = nullptr; s.count = 0; } force_inline string string_view (string s, s64 start_index, s64 view_count) { Assert(view_count >= 0); Assert(start_index >= 0); if (view_count < 0 || start_index < 0 || start_index >= s.count) return ""; s64 new_count = view_count; if (start_index + view_count > s.count) { new_count = s.count - start_index; } return { new_count, s.data + start_index }; } bool strings_match (string first_string, string second_string) { return (first_string == second_string); } // #Unicode string wide_to_utf8 (u16* source, s32 length) { if (length == 0) return { }; s32 query_result = WideCharToMultiByte(CP_UTF8, 0, (LPCWCH)source, length, nullptr, 0, nullptr, nullptr); if (query_result <= 0) return { }; // Make room for a null terminator: if (length != -1) { query_result += 1; } u8* memory = NewArray(query_result); string utf8_string; utf8_string.count = query_result - 1; // null terminator is not counted utf8_string.data = memory; s32 result = WideCharToMultiByte(CP_UTF8, 0, (LPCWCH)source, length, (LPSTR)memory, query_result, nullptr, nullptr); if (result <= 0) { internal_free(memory); return { }; } return utf8_string; } force_inline string wide_to_utf8 (wstring wstr) { return wide_to_utf8(wstr.data, (s32)wstr.count); } wstring utf8_to_wide (string source) { if (!source) return {}; s32 query_num_chars = MultiByteToWideChar(CP_UTF8, 0, (LPCCH)source.data, (s32)source.count, // @Robustness: Silent failure if too long. @Cleanup. nullptr, 0); if (query_num_chars <= 0) return {}; wstring name_u16s = wstring(query_num_chars); s32 result_num_chars = MultiByteToWideChar(CP_UTF8, 0, (LPCCH)source.data, (s32)source.count, // @Robustness: Silent failure if too long. @Cleanup. (LPWSTR)name_u16s.data, query_num_chars); if (!result_num_chars) { internal_free(name_u16s.data); return {}; } Assert(result_num_chars <= query_num_chars); name_u16s.data[result_num_chars] = 0; // null terminate return name_u16s; } force_inline string format_string_internal (string format, va_list args) { string str = {}; str.count = (s64)vsnprintf(nullptr, (u64)0, (char*)format.data, args); if (thread_context() != nullptr) { str.data = NewArray(str.count + 1); } else { fprintf(stderr, "[Warning] Calling `format_string` without a valid context; falling back to default_allocator\n"); str.data = (u8*)default_allocator_new(str.count + 1); } // Note that vsnprintf always produces a null-terminated result! str.count = (s64)vsnprintf((char*)str.data, (size_t)(str.count + 1), (char*)format.data, args); return str; } string format_string (Allocator allocator, char* format, ...) { // #sprint push_allocator(allocator); // only for debugging, we should only be calling this with a valid thread_local context // and a valid allocator. Assert(thread_context() != nullptr); Assert(allocator.proc != nullptr); va_list args; va_start(args, format); string s = format_string_internal(format, args); va_end(args); return s; } string format_string (char* format, ...) { // #sprint va_list args; va_start(args, format); string s = format_string_internal(format, args); va_end(args); return s; } force_inline String_Builder* new_string_builder (Arena_Reserve new_reserve, string label) { return arena_array_new(1, new_reserve, label); } force_inline void append (String_Builder* sb, string s) { array_add(*sb, ArrayView(s.count, s.data)); } void append (String_Builder* sb, ArrayView strings) { s64 combined_length = 0; for (s64 i = 0; i < strings.count; i += 1) { combined_length += strings[i].count; } s64 final_length = sb->count + combined_length; if (sb->allocated < final_length) { array_reserve(*sb, final_length); } for (s64 i = 0; i < strings.count; i += 1) { string s = strings[i]; array_add(*sb, ArrayView(s.count, s.data)); } } force_inline void append_no_add (String_Builder* sb, string s) { array_add(*sb, ArrayView(s.count, s.data)); sb->count -= s.count; } // Unfortunately this follows the printf format, which is annoying. // I'd rather have something like fmt:: void print_to_builder_internal (String_Builder* sb, string format, va_list args) { s64 expected_final_count = max_array_size(*sb);// amount to reserve if (sb->allocated < expected_final_count) { array_reserve(*sb, expected_final_count); } s64 buffer_size = sb->allocated - sb->count; // available space u8* current_point = &sb->data[sb->count]; s64 print_count = (s64)vsnprintf((char*)current_point, (size_t)buffer_size, (char*)format.data, args); // maybe truncate ArenaArray to size so we're not committing a large block of memory? // This could be quite slow, so we just hold onto the memory. sb->count += print_count; } void print_to_builder (String_Builder* sb, string format, ...) { s64 expected_final_count = max_array_size(*sb); if (sb->allocated < expected_final_count) { array_reserve(*sb, expected_final_count); } s64 buffer_size = sb->allocated - sb->count; // available space u8* current_point = &sb->data[sb->count]; va_list args; va_start(args, format); s64 print_count = (s64)vsnprintf((char*)current_point, (size_t)buffer_size, (char*)format.data, args); va_end(args); sb->count += print_count; } string string_view (String_Builder* sb) { // should probably ensure final byte is null terminated... append_no_add(sb, "\0"); // doesn't increment sb.count return to_string(to_view(*sb)); } // for when we want to keep the string builder around and recycle the memory. internal force_inline void reset_string_builder (String_Builder* sb, bool keep_memory) { array_poison_range(*sb, 0, sb->count); if (keep_memory) { reset_keeping_memory(*sb); } else { array_reset(*sb); } } force_inline string builder_to_string (String_Builder* sb) { // #copy_string string final_string = copy_string(to_string(to_view(*sb))); return final_string; } internal force_inline void free_string_builder (String_Builder* sb) { arena_array_free(*sb); } char is_ascii_letter (char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } char to_lower_ascii (char c) { if (c >= 'A' && c <= 'Z') c = c + ('a' - 'A'); // or c += 32; return c; } char to_upper_ascii (char c) { if (c >= 'a' && c <= 'z') c = c - ('a' - 'A'); // or c -= 32; return c; } force_inline bool is_upper_ascii (char c) { return (c >= 'A' && c <= 'Z'); } force_inline bool is_lower_ascii (char c) { return (c >= 'a' && c <= 'z'); } // string to_lower_in_place (string s) { } // Input must be ascii or utf8! string to_lower_copy (string s_orig) { string s = copy_string(s_orig); for (s64 i = 0; i < s.count; i += 1) { s.data[i] = to_lower_ascii(s.data[i]); } return s; } #define format_cstring(fmt, ...) \ (char*)format_string(fmt, ##__VA_ARGS__).data bool is_any (u8 c, string chars) { for_each(i, chars) { if (chars.data[i] == c) return true; } return false; } string trim_right (string s, string chars, bool replace_with_zeros) { s64 count = s.count; for_each_reverse(i, s) { if (is_any(s.data[i], chars)) { if (replace_with_zeros) { s.data[i] = 0; } count -= 1; } else { break; } } return string_view(s, 0, count); } string trim_left (string s, string chars) { s64 count = 0; for_each(i, s) { if (is_any(s.data[i], chars)) { count += 1; } else { break; } } return string_view(s, count, s.count - count); } string trim (string s, string chars) { string left_trim = trim_left(s, chars); return trim_right(left_trim, chars, false); } force_inline u32 ctz32(u32 x) { #if COMPILER_MSVC return _tzcnt_u32(x); #else return __builtin_ctz(x); #endif } s64 find_index_of_any_from_right (string s, string bytes) { s64 cursor = s.count-1; while (cursor >= 0) { if (is_any(s[cursor], bytes)) return cursor; cursor -= 1; } return -1; } s64 find_index_from_left (string s, u8 c, s64 start_offset) { // #NOTE (confusing!): start_offset is the start of where we want to scan from, // and the returned offset is based on the START of s.data, not from start_offset! u8* start_p = s.data + start_offset; s64 len = s.count - start_offset; __m256i vneedle = _mm256_set1_epi8((char)c); /** AVX-512BW: __m512i v = _mm512_loadu_si512(p); __mmask64 k = _mm512_cmpeq_epi8_mask(v, vneedle); if (k) return p + _tzcnt_u64(k); */ s64 i = 0; for (; i + 32 <= s.count; i += 32) { // Load 256-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary. __m256i v = _mm256_loadu_si256((__m256i*)(start_p + i)); // Compare packed 8-bit integers in a and b for equality, and store the results in dst. __m256i cmp = _mm256_cmpeq_epi8(v, vneedle); // Create mask from the most significant bit of each 8-bit element in a, and store the result in dst. u32 mask = _mm256_movemask_epi8(cmp); if (mask) { return start_offset + i + ctz32(mask); } } // scalar tail: for (; i < len; i += 1) { if (start_p[i] == c) return start_offset + i; } return -1; } internal bool split_by_char (string s, char c, string* a, string* b) { s64 index = find_index_from_left(s, c); if (index == -1) { (*a) = s; (*b) = {}; return false; } (*a) = trim({index, s.data}); (*b) = trim({s.count-index, s.data + index + 1}); return true; } ArrayView string_split (string s, u8 c) { Array results; string remainder = s; while (remainder.count) { string left; string right; bool found = split_by_char(remainder, c, &left, &right); if (found) { array_add(results, left); } else { array_add(results, remainder); break; } remainder = right; } if (!remainder) array_add(results, string("")); // We want 'split' to return an unambiguous result (was there a match at the end or not?), and to be the inverse of 'join'. For this to happen, if there was a match at the very end, we just add an empty string. Maybe there is a more elegant way to structure the loop to make this happen. return results; } void replace_chars (string s, string chars, u8 replacement) { for_each(i, s) { if (is_any(s[i], chars)) { s[i] = replacement; } } } bool begins_with (string s, string prefix) { if (s.count < prefix.count) return false; string t = string_view(s, 0, prefix.count); return (t == prefix); } // #path manipulation: string path_filename (string path) { s64 index = find_index_of_any_from_right(path, "\\/"); if (index == -1) return path; index += 1; return string_view(path, index, path.count - index); } string path_strip_filename (string path) { s64 index = find_index_of_any_from_right(path, "\\/"); if (index == -1) return ""; return string_view(path, 0, index + 1); } #if OS_WINDOWS string strip_leading_backslash (string path) { if (path == "") return ""; if (path[0] == '\\') { return string_view(path, 1, path.count - 1); } return path; } bool is_absolute_path (string path) { if (path.count < 3) return false; return path[1] == ':' && (path[2] == '/' || path[2] == '\\'); } #endif bool path_extension (string path, string* ext) { (*ext) = ""; s64 index = find_index_of_any_from_right(path, ".\\/"); if (index <= 0) { return false; } if (path[index] != '.') { return false; } u8 previous = path[index-1]; if ( (previous == '\\') || (previous == '/') ) { return false; } if (previous == '.') { if (index == 1) return false; u8 two_previous = path[index-2]; if (two_previous == '\\' || two_previous == '/') { return false; } } (*ext) = string_view(path, index + 1, path.count - index - 1); return true; }