Musa-STL-Cpp/lib/Base/File_Search.h

218 lines
7.9 KiB
C

struct File_Search_Filter_Params {
bool search_directories = true;
bool search_files = true;
bool exact_match = false;
bool order_most_recent = false; // this should be an enum, but w/e :SortOrderComboBox
bool match_word = false;
bool case_sensitive = false;
// #TODO: File data
// Preprocess params
// ArrayView<bool> include_drive; // defaults all to true.
};
struct File_Search {
string query;
File_Search_Filter_Params params;
// Filesystem Data:
#if OS_WINDOWS
Win32_File_Enumeration* src;
// #TODO: reserve space for result indices!
#endif
Thread_Group thread_group;
};
Thread_Continue_Status file_search_thread_group_proc (Thread_Group* group, Thread* thread, void* work) {
return Thread_Continue_Status::CONTINUE;
}
void prepare_file_search (File_Search* fs, string query) {
// Note we will have to copy all the params out of File_Search into File_Search_Task/MT
// Tokenizer t = get_tokenizer_from_string(query);
// Tokenizer is the wrong move, we really just want to split up the query and analyze each bit separately.
// 1. We want to split by spaces or other significant symbols like `|`
Array<string> query_split;
query_split.allocator = default_allocator();
// 2. construct filters from splits
// a. contains drive letters (can be independent, or include additional dirs, e.g. `C:\jai`
// b. "NOT" operator `!`
}
void file_search_run_search_query (File_Search* fs, string query) {
Assert(fs->src != nullptr); // check our data is actually there.
if (fs->src == nullptr) return;
prepare_file_search(fs, query);
// 1. preprocess and configure filter parameters for search query:
// a. search for drive letters `C:` etc. and `C:\`
// we need to tokenize the search query!
// Tokenizer query_tokenizer = {query, 0};
// Check if thread_group initted:
if (!fs->thread_group.initialized) {
// option1: os_cpu_physical_core_count, option2: os_cpu_logical_core_count
push_allocator(default_allocator());
bool enable_work_stealing = false;
string thread_group_label = "File_Search";
thread_group_init(&fs->thread_group, os_cpu_physical_core_count(), file_search_thread_group_proc,
thread_group_label, enable_work_stealing);
// #TODO: split up work.
// Assert(params->include_drive.count == fs->src->drive_data.count);
// for_each(d, fs->src->drive_data) {
// if (!params->include_drive[d]) continue; // only include drives we're actually searching.
// // #current
// }
thread_group_start(&fs->thread_group);
}
}
global Thread_Group bit_table_thread_group;
global s64 bit_table_thread_group_work_added = 0;
global s64 bit_table_thread_group_work_completed = 0;
global f64 bit_table_task_start_time = 0;
global f64 bit_table_task_end_time_main_thread = 0;
global s64 bit_table_task_result_count = 0;
void bit_table_task_reset_global_state () {
bit_table_thread_group_work_added = 0;
bit_table_thread_group_work_completed = 0;
bit_table_task_start_time = 0;
bit_table_task_end_time_main_thread = 0;
bit_table_task_result_count = 0;
}
struct Bit_Table_Test_Work {
Bit_Array bit_query;
ArrayView<Bit_Array> bit_arrays;
Array<u32> results;
s32 index_offset;
s32 sample_count;
b32 directories; // either it's fst_dirs or fst_files.
string query; // for debugging / printing
// For memchr stuff:
Win32_File_Enumeration_Drive* drive;
};
void delete_task (Bit_Table_Test_Work* btt) {
string_free(btt->query);
bit_array_delete(&btt->bit_query);
array_free(btt->results);
}
Thread_Continue_Status bit_table_testing_thread_group_proc (Thread_Group* group, Thread* thread, void* work) {
Bit_Table_Test_Work* btt = thread_group_task(Bit_Table_Test_Work);
// debug_break();
auto_release_temp();
// We need to use filter params here to check if this is correct.
Assert(btt->query.count == 1);
for (s64 b = 0; b < btt->sample_count; b += 1) {
// For single ascii character: testing bit arrays
string current_entry;
if (btt->directories) {
current_entry = directory_name_string_view(btt->drive, btt->index_offset + b);
} else {
current_entry = file_name_string_view(btt->drive, btt->index_offset + b);
}
// memchr (find characters in a buffer)
if (is_ascii_letter(btt->query.data[0])) {
u8 lower_case = to_lower_ascii(btt->query.data[0]);
u8 upper_case = to_upper_ascii(btt->query.data[0]);
if (memchr_avx2(current_entry.data, lower_case, (u16)current_entry.count)
|| memchr_avx2(current_entry.data, upper_case, (u16)current_entry.count)) {
array_add(btt->results, (u32)(btt->index_offset + b));
}
} else Assert(false); // not yet implemented for symbols!
// For any X64 use memchr
// if (memchr(current_entry.data, lower_case, current_entry.count) || memchr(current_entry.data, upper_case, current_entry.count)) {
// for_each(b, btt->bit_arrays) {
// if (contains_single_ascii(&btt->bit_arrays[b], &btt->bit_query)) {
// array_add(btt->results, (u32)b);
// }
// Testing memchr:
}
f64 elapsed_time = GetUnixTimestamp()-bit_table_task_start_time;
push_allocator(temp());
log("Done with query %s in %s",
btt->query.data,
format_time_seconds(elapsed_time).data);
return Thread_Continue_Status::CONTINUE;
}
void bit_table_testing_init () {
if (bit_table_thread_group.initialized) return;
bool enable_work_stealing = true;
thread_group_init(&bit_table_thread_group, os_cpu_logical_core_count(), bit_table_testing_thread_group_proc,
"bit_table_thread_group", enable_work_stealing);
thread_group_start(&bit_table_thread_group);
}
void bit_table_testing_add_query (string query) {
bit_table_testing_init();
bit_table_task_start_time = GetUnixTimestamp();
s32 thread_count = bit_table_thread_group.worker_info.count;
// dirs...
for_each(d, w32fe->drive_data) { // for each drive
auto drive = &w32fe->drive_data[d];
s64 dirs_per_thread = directory_count(drive) / thread_count;
s64 start_offset = 0;
auto src_array = to_view(drive->fst_dirs.bit_arrays);
for (s32 i = 0; i < thread_count; i += 1) {
auto btt = New<Bit_Table_Test_Work>();
btt->drive = drive;
btt->query = copy_string(query);
bit_array_initialize(&btt->bit_query, 128);
Bit_Array_Add_Value_Ascii_No_Case(&btt->bit_query, btt->query);
if (i == thread_count - 1) {
dirs_per_thread = (src_array.count - start_offset);
}
btt->bit_arrays = array_view(src_array, start_offset, dirs_per_thread);
btt->index_offset = start_offset;
btt->sample_count = dirs_per_thread;
start_offset += dirs_per_thread;
btt->results = Array<u32>(context_allocator(), btt->bit_arrays.count);
btt->directories = true;
add_work(&bit_table_thread_group, btt);
bit_table_thread_group_work_added += 1;
}
Assert(start_offset == directory_count(drive));
}
// files...
for_each(d, w32fe->drive_data) { // for each drive
auto drive = &w32fe->drive_data[d];
s64 files_per_thread = file_count(drive) / thread_count;
s64 start_offset = 0;
auto src_array = to_view(drive->fst_files.bit_arrays);
for (s32 i = 0; i < thread_count; i += 1) {
auto btt = New<Bit_Table_Test_Work>();
btt->drive = drive;
btt->query = copy_string(query);
bit_array_initialize(&btt->bit_query, 128);
Bit_Array_Add_Value_Ascii_No_Case(&btt->bit_query, btt->query);
if (i == thread_count - 1) {
files_per_thread = (src_array.count - start_offset);
}
btt->bit_arrays = array_view(src_array, start_offset, files_per_thread);
btt->index_offset = start_offset;
btt->sample_count = files_per_thread;
start_offset += files_per_thread;
btt->results = Array<u32>(context_allocator(), btt->bit_arrays.count);
btt->directories = false;
add_work(&bit_table_thread_group, btt);
bit_table_thread_group_work_added += 1;
}
Assert(start_offset == file_count(drive));
}
}