255 lines
6.8 KiB
C
255 lines
6.8 KiB
C
#pragma once
|
|
// #TODO: #strings:
|
|
// [ ] see: #Parsing stuff:
|
|
// [?] How do I accept variadic arguments of any type to my print function?
|
|
// [ ] Need to sort out how formatted strings and string builders are allocated
|
|
// [ ] Separate functions for temp alloc (tprint??)
|
|
// [ ] I should also put path manipulation here or in a separate file?
|
|
|
|
struct string {
|
|
s64 count;
|
|
u8* data;
|
|
// Construct from a string literal or C-string
|
|
string () { // default constructor
|
|
count = 0;
|
|
data = nullptr;
|
|
}
|
|
|
|
string (char* cstr) {
|
|
count = strlen(cstr);
|
|
data = (u8*)cstr;
|
|
}
|
|
|
|
string (u8* cstr) {
|
|
count = strlen((char*)cstr);
|
|
data = cstr;
|
|
}
|
|
|
|
string (s64 _count, char* str) { count = _count; data = (u8*)str; }
|
|
string (s64 _count, u8* str) { count = _count; data = str; }
|
|
|
|
bool operator==(const string& other) const {
|
|
string first_string = *this;
|
|
string second_string = other;
|
|
// return strings_match(*this, other);
|
|
if (first_string.count != second_string.count) {
|
|
return false;
|
|
}
|
|
|
|
for (s64 i = 0; i < first_string.count; i += 1) {
|
|
if (first_string.data[i] != second_string.data[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool operator ! () {
|
|
Assert(count >= 0);
|
|
return (data == nullptr || count == 0);
|
|
}
|
|
|
|
bool operator!=(const string& other) const {
|
|
return !(*this == other);
|
|
}
|
|
|
|
u8& operator[](s64 index) {
|
|
#if ARRAY_ENABLE_BOUNDS_CHECKING
|
|
if (index < 0 || index >= count) { debug_break(); } // index out of bounds
|
|
#endif
|
|
return data[index];
|
|
}
|
|
};
|
|
|
|
struct wstring {
|
|
s64 count;
|
|
u16* data;
|
|
|
|
wstring () { // default constructor
|
|
count = 0;
|
|
data = nullptr;
|
|
}
|
|
|
|
wstring (s32 length) {
|
|
data = NewArray<u16>(length + 1);
|
|
s32 length_bytes = (length + 1) * sizeof(u16);
|
|
count = length_bytes;
|
|
}
|
|
|
|
wstring (s64 _count, u16* _data) {
|
|
count = _count;
|
|
data = _data;
|
|
}
|
|
|
|
bool operator ! () {
|
|
Assert(count >= 0);
|
|
return (data == nullptr || count == 0);
|
|
}
|
|
|
|
u16& operator[](s64 index) {
|
|
#if ARRAY_ENABLE_BOUNDS_CHECKING
|
|
if (index < 0 || index >= count) { debug_break(); } // index out of bounds
|
|
#endif
|
|
return data[index];
|
|
}
|
|
};
|
|
|
|
// ~Keep these API
|
|
bool is_valid (string s);
|
|
bool is_c_string (string s);
|
|
u8* to_c_string (string s); // #allocates
|
|
force_inline string copy_string (Allocator allocator, string s);
|
|
string copy_string (string s); // #allocates, returned string is #null-terminated.
|
|
string copy_string_no_context (string s);
|
|
string copy_string_untracked (string s);
|
|
void string_free_no_context (string& s);
|
|
void string_free_untracked (string s);
|
|
string copy_string (char* c_string); // #allocates, returned string is #null-terminated.
|
|
string to_string (ArrayView<u8> str);
|
|
ArrayView<u8> to_view (string s);
|
|
void string_free(string& s);
|
|
|
|
// String manipulation & comparison
|
|
force_inline string string_view (string s, s64 start_index, s64 view_count);
|
|
bool strings_match (string first_string, string second_string);
|
|
|
|
// #Unicode
|
|
string wide_to_utf8 (u16* source, s32 length=-1);
|
|
wstring utf8_to_wide (string source);
|
|
|
|
// string format_string_temp (char* format, ...);
|
|
force_inline string format_string (Allocator allocator, char* format, ...);
|
|
string format_string (char* format, ...);
|
|
// string format_string_no_context (char* format, ...);
|
|
|
|
string to_lower_copy (string s_orig);
|
|
|
|
string DEFAULT_SPACES = " \r\t\n";
|
|
string trim_right (string s, string chars=DEFAULT_SPACES, bool replace_with_zeros=true);
|
|
string trim_left (string s, string chars=DEFAULT_SPACES);
|
|
string trim (string s, string chars=DEFAULT_SPACES);
|
|
|
|
s64 find_index_of_any_from_right (string s, string bytes);
|
|
s64 find_index_from_left (string s, u8 c, s64 start_offset=0);
|
|
|
|
ArrayView<string> string_split (string s, u8 c);
|
|
|
|
// #path manipulation:
|
|
string path_filename (string path);
|
|
string path_strip_filename (string path);
|
|
|
|
// #TODO #Parsing stuff:
|
|
// is_white_space(char: u8)
|
|
// advance
|
|
// eat_spaces
|
|
|
|
// string to type or type to string conversions
|
|
// s64 string_to_int (string v, s32 base = 10, s64* remainder=nullptr);
|
|
// f64 string_to_f64
|
|
// f32 string_to_f32
|
|
|
|
// Need an API for inserting various types (ints, floats, etc.) into a String_Builder, and advancing
|
|
// the count.
|
|
|
|
internal force_inline u16 ascii_char_sort_key (u8 c) {
|
|
// Non-ASCII UTF-8 byte → unknown symbol (comes first)
|
|
if (c & 0x80) {
|
|
return (0u << 12) | c;
|
|
}
|
|
|
|
// group: 0 = punct/symbol, 1 = digit, 2 = letter
|
|
if (c >= '0' && c <= '9') {
|
|
return (1u << 12) | (u16)(c - '0');
|
|
}
|
|
|
|
if (c >= 'a' && c <= 'z') {
|
|
// lowercase first
|
|
return (2u << 12) | (u16)((c - 'a') << 1);
|
|
}
|
|
|
|
if (c >= 'A' && c <= 'Z') {
|
|
// uppercase second
|
|
return (2u << 12) | (u16)((c - 'A') << 1) | 1u;
|
|
}
|
|
|
|
// punctuation / symbols: preserve ASCII order
|
|
return (0u << 12) | c;
|
|
}
|
|
|
|
s32 string_lexicographical_compare(const void* p_a, const void* p_b) {
|
|
string* a = (string*)p_a;
|
|
string* b = (string*)p_b;
|
|
|
|
s64 n = (a->count < b->count) ? a->count : b->count;
|
|
|
|
for (s64 i = 0; i < n; i += 1) {
|
|
u16 ka = ascii_char_sort_key(a->data[i]);
|
|
u16 kb = ascii_char_sort_key(b->data[i]);
|
|
|
|
if (ka < kb) return -1;
|
|
if (ka > kb) return 1;
|
|
}
|
|
|
|
// if all shared prefix bytes are equal, the shorter string comes first.
|
|
if (a->count < b->count) return -1;
|
|
if (a->count > b->count) return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
s32 string_lexicographical_compare_memcmp(const void* a, const void* b) {
|
|
// byte-wise lexicographical order, not locale-aware
|
|
string *sa = (string*)a;
|
|
string *sb = (string*)b;
|
|
|
|
s64 min_count = sa->count < sb->count ? sa->count : sb->count;
|
|
s32 r = memcmp(sa->data, sb->data, min_count);
|
|
if (r != 0) return r;
|
|
|
|
// if all shared prefix bytes are equal, the shorter string comes first.
|
|
if (sa->count < sb->count) return -1;
|
|
if (sa->count > sb->count) return 1;
|
|
return 0;
|
|
}
|
|
|
|
s32 string_index_lexicographical_compare(const void* p_a, const void* p_b, void* ctx) {
|
|
u32 ia = *(u32 *)p_a;
|
|
u32 ib = *(u32 *)p_b;
|
|
ArrayView<string> strings = *(ArrayView<string>*)ctx;
|
|
|
|
return string_lexicographical_compare(&strings[ia], &strings[ib]);
|
|
}
|
|
|
|
// #TODO: This is really slow! Even in release mode!!
|
|
force_inline ArrayView<u32> string_sort_by_index (ArrayView<string> source) {
|
|
ArrayView<u32> indices = ArrayView<u32>(source.count, false); // should not init?
|
|
for_each(i, indices) { indices[i] = i; }
|
|
|
|
sort_r(indices.data, indices.count, sizeof(u32), string_index_lexicographical_compare, &source);
|
|
|
|
return indices;
|
|
}
|
|
|
|
struct Tokenizer {
|
|
string s;
|
|
u8* start;
|
|
u8* end;
|
|
u8* current;
|
|
};
|
|
|
|
Tokenizer get_tokenizer_from_string (string s, bool make_copy=true) {
|
|
Tokenizer t;
|
|
if (make_copy) {
|
|
t.s = copy_string(s);
|
|
} else {
|
|
t.s = s;
|
|
}
|
|
t.start = t.s.data;
|
|
t.end = t.s.data + t.s.count;
|
|
t.current = t.start;
|
|
|
|
return t;
|
|
}
|
|
|