struct Parent_Index { s32 thread_index; // group->worker_info[thread_index].thread s32 parent_index; // index into d_*offsets/lengths/etc. }; struct File_Enumeration_Thread_Results { // #userdata Arena* arena; // for strings // Directories ArenaArray* d_offsets; ArenaArray* d_lengths; ArenaArray* d_parent_indices; ArenaArray* d_modtime; // s64 dirs_enumerated = 0; // Files ArenaArray* f_offsets; ArenaArray* f_lengths; ArenaArray* f_parent_indices; ArenaArray* f_sizes; ArenaArray* f_modtime; }; void initialize (File_Enumeration_Thread_Results* fcr) { // Preallocate for 2^22 files: fcr->arena = next_arena(Arena_Reserve::Size_2G); fcr->d_offsets = arena_array_new(4194304, Arena_Reserve::Size_2G); fcr->d_lengths = arena_array_new(4194304, Arena_Reserve::Size_2G); fcr->d_parent_indices = arena_array_new(4194304, Arena_Reserve::Size_2G); fcr->d_modtime = arena_array_new(4194304, Arena_Reserve::Size_2G); fcr->f_offsets = arena_array_new(4194304, Arena_Reserve::Size_2G); fcr->f_lengths = arena_array_new(4194304, Arena_Reserve::Size_2G); fcr->f_parent_indices = arena_array_new(4194304, Arena_Reserve::Size_2G); fcr->f_sizes = arena_array_new(4194304, Arena_Reserve::Size_2G); fcr->f_modtime = arena_array_new(4194304, Arena_Reserve::Size_2G); } struct Enumeration_Work { string first_directory; Parent_Index parent; bool is_root = false; Array next; }; struct Drive_Enumeration { // master thread struct ArrayView drives; Thread* master_thread; s32 thread_count; bool thread_started; bool thread_completed; // Files_Combined_Results paths; // Files_Combined_Results files; s32 work_added = 0; s32 work_completed = 0; }; // void push_root (Drive_Enumeration* de, string label, s32 index) { // array_add(*de->paths.name, label); // array_add(*de->paths.parent_indices, index); // array_add(*de->paths.sizes, (u64)0); // array_add(*de->paths.modtime, (u64)0); // } global Drive_Enumeration* drive_enumeration; // File_Enumeration_Thread_Results* results_from_thread_index (Thread_Group* group, s32 thread_index) { // return ; // } string path_from_parent_index (Thread_Group* group, Parent_Index pid, Parent_Index* next_pid) { if (pid.parent_index == -1) return ""; auto results = (File_Enumeration_Thread_Results*)group->worker_info[pid.thread_index].thread.context->userdata; u8* offset = (results->arena->memory_base + (*results->d_offsets)[pid.parent_index]); u32 length = (*results->d_lengths)[pid.parent_index]; (*next_pid) = (*results->d_parent_indices)[pid.parent_index]; return {(s64)length, offset}; } // This is much stupider and more complicated than I would like, unfortunately. string directory_get_full_path (Thread_Group* group, Parent_Index pid, string dir_name) { push_allocator(GPAllocator()); // to copy from String_Builder Array paths; paths.allocator = temp(); Parent_Index this_pid = pid; Parent_Index next_pid = {}; string parent_dir = path_from_parent_index(group, this_pid, &next_pid); array_add(paths, parent_dir); this_pid = next_pid; next_pid = {}; while (this_pid.parent_index != -1) { parent_dir = path_from_parent_index(group, this_pid, &next_pid); array_add(paths, parent_dir); this_pid = next_pid; next_pid = {}; } // go in reverse order and add together string String_Builder* sb = new_string_builder(Arena_Reserve::Size_64K); for (s64 i = paths.count-1; i >= 0; i -= 1) { append(sb, paths[i]); append(sb, "\\"); } append(sb, dir_name); return builder_to_string(sb); } /*void update_results (Drive_Enumeration* de, Enumeration_Work* ew) { // merge results and release resources! // unfortunately this is a LOT of copying! for_each(i, (*ew->d_offsets)) { u8* string_ptr = (ew->thread_arena->memory_base + (*ew->d_offsets)[i]); string name = {(*ew->d_lengths)[i], string_ptr}; array_add(*de->paths.name, name); array_add(*de->paths.parent_indices, (*ew->d_parent_indices)[i]); array_add(*de->paths.sizes, (*ew->d_sizes)[i]); array_add(*de->paths.modtime, (*ew->d_modtime)[i]); } for_each(i, (*ew->offsets)) { u8* string_ptr = (ew->thread_arena->memory_base + (*ew->offsets)[i]); string name = {(*ew->lengths)[i], string_ptr}; array_add(*de->files.name, name); array_add(*de->files.parent_indices, (*ew->parent_indices)[i]); array_add(*de->files.sizes, (*ew->sizes)[i]); array_add(*de->files.modtime, (*ew->modtime)[i]); } }*/ void add_record (File_Enumeration_Thread_Results* results, WIN32_FIND_DATAW* find_data, string name, Parent_Index parent_index, bool is_directory) { u32 offset = (u32)(name.data - results->arena->memory_base); u64 size = ((u64)find_data->nFileSizeHigh << 32) | ((u64)find_data->nFileSizeLow & 0xFFFFFFFF); u64 modtime = FILETIME_to_ticks(find_data->ftLastWriteTime); if (is_directory) { array_add((*results->d_offsets), offset); array_add((*results->d_lengths), (s16)name.count); array_add((*results->d_parent_indices), parent_index); // #parent_index array_add((*results->d_modtime), modtime); } else { array_add((*results->f_offsets), offset); array_add((*results->f_lengths), (s16)name.count); array_add((*results->f_parent_indices), parent_index); // #parent_index array_add((*results->f_sizes), size); array_add((*results->f_modtime), modtime); } } Thread_Continue_Status file_enumeration_thread_group_proc (Thread_Group* group, Thread* thread, void* work) { // 1. setup userdata as an Arena*: // #TODO: replace userdata with a struct that manages the thread-local data for this // particular problem. This data can be rescued before we File_Enumeration_Thread_Results* results; if (!thread->context->userdata) { thread->context->userdata = New(GPAllocator()); initialize((File_Enumeration_Thread_Results*)thread->context->userdata); } results = (File_Enumeration_Thread_Results*)thread->context->userdata; Enumeration_Work* enum_work = (Enumeration_Work*)work; // Validate thread context? push_allocator(temp()); auto_release_temp(); // log("file_enumeration_thread_group_proc, thread index: %d", thread->index); // MAKE SURE PATH IS NULL TERMINATED! wstring wildcard_name = utf8_to_wide(format_string("%s\\*", enum_work->first_directory.data)); // #temp WIN32_FIND_DATAW find_data; HANDLE h = FindFirstFileExW((LPCWSTR)wildcard_name.data, FindExInfoBasic, &find_data, FindExSearchNameMatch, nullptr, FIND_FIRST_EX_LARGE_FETCH); if (h == INVALID_HANDLE_VALUE) { return Thread_Continue_Status::CONTINUE; } s32 thread_index = get_thread_index(group, (s32)thread->index); // zero-indexed to thread group Parent_Index pi = enum_work->parent; push_arena(results->arena); if (enum_work->is_root) { // see add_record string name = copy_string(enum_work->first_directory); u32 offset = (u32)(name.data - results->arena->memory_base); s32 current_index = (s32)(*results->d_offsets).count; pi = {thread_index, current_index}; array_add((*results->d_offsets), offset); array_add((*results->d_lengths), (s16)name.count); Parent_Index root_pi = {thread_index, -1}; array_add((*results->d_parent_indices), root_pi); // #parent_index array_add((*results->d_modtime), (u64)0); // results->dirs_enumerated += 1; } while (true) { string name = wide_to_utf8((u16*)find_data.cFileName); bool should_continue = (name.count == 0 || name == "." || name == ".."); if (should_continue) { bool success = FindNextFileW(h, &find_data); if (!success) break; continue; } bool is_directory = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; Parent_Index next_index = {thread_index, enum_work->parent.parent_index}; add_record(results, &find_data, name, next_index, is_directory); if (is_directory) { push_allocator(GPAllocator()); auto new_work = New(false); new_work->first_directory = directory_get_full_path(group, pi, name); new_work->parent = next_index; new_work->is_root = false; new_work->next = {}; array_add(enum_work->next, new_work); } bool success = FindNextFileW(h, &find_data); if (!success) break; } FindClose(h); return Thread_Continue_Status::CONTINUE; } s64 multithreaded_file_enumeration_master_proc (Thread* thread) { auto task = thread_task(Drive_Enumeration); Thread_Group* file_enum_thread_group = New(); s32 thread_count = os_cpu_physical_core_count(); push_allocator(GPAllocator()); thread_group_init(file_enum_thread_group, thread_count, file_enumeration_thread_group_proc, true); for_each(d, task->drives) { auto work = New(GPAllocator(), false); //replace with arena bootstrap? work->first_directory = task->drives[d]->label; // this includes the colon-slash, (e.g. `C:\`). work->parent = {-1, -1}; // #HACK: (s32)d work->is_root = true; work->next = {}; add_work(file_enum_thread_group, work); task->work_added += 1; } start(file_enum_thread_group); // set task completed. s64 path_index = task->drives.count; while (true) { auto_release_temp(); ArrayView cw = get_completed_work(file_enum_thread_group); task->work_completed += (s32)cw.count; for_each(i, cw) { auto ew = (Enumeration_Work*)cw[i]; for_each(w, ew->next) { auto new_work = ew->next[w]; add_work(file_enum_thread_group, new_work); } task->work_added += (s32)ew->next.count; string_free(ew->first_directory); array_free(ew->next); internal_free(ew); } log("work completed: %d/%d",task->work_completed, task->work_added); // if (task->work_completed >= task->work_added) break; Sleep(1); } shutdown(file_enum_thread_group); task->thread_completed = true; return 0; } void os_run_file_enumeration_multithreaded () { // Need some struct to track the state of this operation. drive_enumeration = New(); (*drive_enumeration) = { os_get_available_drives(), New(), os_cpu_physical_core_count(), 0, 0, 0, 0 }; // initialize(&drive_enumeration->paths); // initialize(&drive_enumeration->files); // We start 1 thread to run the thread group and track the threads string thread_name = "Multithreaded Enumeration: Master Thread"; bool success = thread_init(drive_enumeration->master_thread, multithreaded_file_enumeration_master_proc, thread_name); Assert(success); thread_start(drive_enumeration->master_thread, drive_enumeration); drive_enumeration->thread_started = true; } bool file_enum_multithreading_started () { if (drive_enumeration == nullptr) return false; return drive_enumeration->thread_started; } bool file_enum_multithreading_active () { if (drive_enumeration == nullptr) return false; if (drive_enumeration->thread_completed) { return false; } if (drive_enumeration->thread_started) { return true; } return false; }