#pragma once // #TODO: #strings: // [ ] see: #Parsing stuff: // [?] How do I accept variadic arguments of any type to my print function? // [ ] Need to sort out how formatted strings and string builders are allocated // [ ] Separate functions for temp alloc (tprint??) // [ ] I should also put path manipulation here or in a separate file? struct string { s64 count; u8* data; // Construct from a string literal or C-string string () { // default constructor count = 0; data = nullptr; } string (char* cstr) { count = strlen(cstr); data = (u8*)cstr; } string (u8* cstr) { count = strlen((char*)cstr); data = cstr; } string (s64 _count, char* str) { count = _count; data = (u8*)str; } string (s64 _count, u8* str) { count = _count; data = str; } bool operator==(const string& other) const { string first_string = *this; string second_string = other; // return strings_match(*this, other); if (first_string.count != second_string.count) { return false; } for (s64 i = 0; i < first_string.count; i += 1) { if (first_string.data[i] != second_string.data[i]) { return false; } } return true; } bool operator ! () { Assert(count >= 0); return (data == nullptr || count == 0); } bool operator!=(const string& other) const { return !(*this == other); } u8& operator[](s64 index) { #if ARRAY_ENABLE_BOUNDS_CHECKING if (index < 0 || index >= count) { debug_break(); } // index out of bounds #endif return data[index]; } }; struct wstring { s64 count; u16* data; wstring () { // default constructor count = 0; data = nullptr; } wstring (s32 length) { data = NewArray(length + 1); s32 length_bytes = (length + 1) * sizeof(u16); count = length_bytes; } wstring (s64 _count, u16* _data) { count = _count; data = _data; } bool operator ! () { Assert(count >= 0); return (data == nullptr || count == 0); } u16& operator[](s64 index) { #if ARRAY_ENABLE_BOUNDS_CHECKING if (index < 0 || index >= count) { debug_break(); } // index out of bounds #endif return data[index]; } }; // ~Keep these API bool is_valid (string s); bool is_c_string (string s); u8* to_c_string (string s); // #allocates force_inline string copy_string (Allocator allocator, string s); string copy_string (string s); // #allocates, returned string is #null-terminated. string copy_string_no_context (string s); string copy_string_untracked (string s); void string_free_no_context (string& s); void string_free_untracked (string s); string copy_string (char* c_string); // #allocates, returned string is #null-terminated. string to_string (ArrayView str); ArrayView to_view (string s); void string_free(string& s); // String manipulation & comparison force_inline string string_view (string s, s64 start_index, s64 view_count); bool strings_match (string first_string, string second_string); // #Unicode string wide_to_utf8 (u16* source, s32 length=-1); wstring utf8_to_wide (string source); // string format_string_temp (char* format, ...); force_inline string format_string (Allocator allocator, char* format, ...); string format_string (char* format, ...); // string format_string_no_context (char* format, ...); string to_lower_copy (string s_orig); string DEFAULT_SPACES = " \r\t\n"; string trim_right (string s, string chars=DEFAULT_SPACES, bool replace_with_zeros=true); string trim_left (string s, string chars=DEFAULT_SPACES); string trim (string s, string chars=DEFAULT_SPACES); s64 find_index_of_any_from_right (string s, string bytes); s64 find_index_from_left (string s, u8 c, s64 start_offset=0); ArrayView string_split (string s, u8 c); // #path manipulation: string path_filename (string path); string path_strip_filename (string path); // #TODO #Parsing stuff: // is_white_space(char: u8) // advance // eat_spaces // string to type or type to string conversions // s64 string_to_int (string v, s32 base = 10, s64* remainder=nullptr); // f64 string_to_f64 // f32 string_to_f32 // Need an API for inserting various types (ints, floats, etc.) into a String_Builder, and advancing // the count. internal force_inline u16 ascii_char_sort_key (u8 c) { // Non-ASCII UTF-8 byte → unknown symbol (comes first) if (c & 0x80) { return (0u << 12) | c; } // group: 0 = punct/symbol, 1 = digit, 2 = letter if (c >= '0' && c <= '9') { return (1u << 12) | (u16)(c - '0'); } if (c >= 'a' && c <= 'z') { // lowercase first return (2u << 12) | (u16)((c - 'a') << 1); } if (c >= 'A' && c <= 'Z') { // uppercase second return (2u << 12) | (u16)((c - 'A') << 1) | 1u; } // punctuation / symbols: preserve ASCII order return (0u << 12) | c; } s32 string_lexicographical_compare(const void* p_a, const void* p_b) { string* a = (string*)p_a; string* b = (string*)p_b; s64 n = (a->count < b->count) ? a->count : b->count; for (s64 i = 0; i < n; i += 1) { u16 ka = ascii_char_sort_key(a->data[i]); u16 kb = ascii_char_sort_key(b->data[i]); if (ka < kb) return -1; if (ka > kb) return 1; } // if all shared prefix bytes are equal, the shorter string comes first. if (a->count < b->count) return -1; if (a->count > b->count) return 1; return 0; } s32 string_lexicographical_compare_memcmp(const void* a, const void* b) { // byte-wise lexicographical order, not locale-aware string *sa = (string*)a; string *sb = (string*)b; s64 min_count = sa->count < sb->count ? sa->count : sb->count; s32 r = memcmp(sa->data, sb->data, min_count); if (r != 0) return r; // if all shared prefix bytes are equal, the shorter string comes first. if (sa->count < sb->count) return -1; if (sa->count > sb->count) return 1; return 0; } s32 string_index_lexicographical_compare(const void* p_a, const void* p_b, void* ctx) { u32 ia = *(u32 *)p_a; u32 ib = *(u32 *)p_b; ArrayView strings = *(ArrayView*)ctx; return string_lexicographical_compare(&strings[ia], &strings[ib]); } // #TODO: This is really slow! Even in release mode!! force_inline ArrayView string_sort_by_index (ArrayView source) { ArrayView indices = ArrayView(source.count, false); // should not init? for_each(i, indices) { indices[i] = i; } sort_r(indices.data, indices.count, sizeof(u32), string_index_lexicographical_compare, &source); return indices; } struct Tokenizer { string s; u8* start; u8* end; u8* current; }; Tokenizer get_tokenizer_from_string (string s, bool make_copy=true) { Tokenizer t; if (make_copy) { t.s = copy_string(s); } else { t.s = s; } t.start = t.s.data; t.end = t.s.data + t.s.count; t.current = t.start; return t; }