Musa-Cpp-Lib-V2/lib/OS/OS_Win32_File_Enumeration.cpp

335 lines
11 KiB
C++

struct Parent_Index {
s32 thread_index; // group->worker_info[thread_index].thread
s32 parent_index; // index into d_*offsets/lengths/etc.
};
struct File_Enumeration_Thread_Results { // #userdata
Arena* arena; // for strings
// Directories
ArenaArray<u32>* d_offsets;
ArenaArray<s16>* d_lengths;
ArenaArray<Parent_Index>* d_parent_indices;
ArenaArray<u64>* d_modtime;
// s64 dirs_enumerated = 0;
// Files
ArenaArray<u32>* f_offsets;
ArenaArray<s16>* f_lengths;
ArenaArray<Parent_Index>* f_parent_indices;
ArenaArray<u64>* f_sizes;
ArenaArray<u64>* f_modtime;
};
void initialize (File_Enumeration_Thread_Results* fcr) { // Preallocate for 2^22 files:
fcr->arena = next_arena(Arena_Reserve::Size_2G);
fcr->d_offsets = arena_array_new<u32>(4194304, Arena_Reserve::Size_2G);
fcr->d_lengths = arena_array_new<s16>(4194304, Arena_Reserve::Size_2G);
fcr->d_parent_indices = arena_array_new<Parent_Index>(4194304, Arena_Reserve::Size_2G);
fcr->d_modtime = arena_array_new<u64>(4194304, Arena_Reserve::Size_2G);
fcr->f_offsets = arena_array_new<u32>(4194304, Arena_Reserve::Size_2G);
fcr->f_lengths = arena_array_new<s16>(4194304, Arena_Reserve::Size_2G);
fcr->f_parent_indices = arena_array_new<Parent_Index>(4194304, Arena_Reserve::Size_2G);
fcr->f_sizes = arena_array_new<u64>(4194304, Arena_Reserve::Size_2G);
fcr->f_modtime = arena_array_new<u64>(4194304, Arena_Reserve::Size_2G);
}
struct Enumeration_Work {
string first_directory;
Parent_Index parent;
bool is_root = false;
Array<Enumeration_Work*> next;
};
struct Drive_Enumeration { // master thread struct
ArrayView<OS_Drive*> drives;
Thread* master_thread;
s32 thread_count;
bool thread_started;
bool thread_completed;
// Files_Combined_Results paths;
// Files_Combined_Results files;
s32 work_added = 0;
s32 work_completed = 0;
};
// void push_root (Drive_Enumeration* de, string label, s32 index) {
// array_add(*de->paths.name, label);
// array_add(*de->paths.parent_indices, index);
// array_add(*de->paths.sizes, (u64)0);
// array_add(*de->paths.modtime, (u64)0);
// }
global Drive_Enumeration* drive_enumeration;
// File_Enumeration_Thread_Results* results_from_thread_index (Thread_Group* group, s32 thread_index) {
// return ;
// }
string path_from_parent_index (Thread_Group* group, Parent_Index pid, Parent_Index* next_pid) {
if (pid.parent_index == -1) return "";
auto results = (File_Enumeration_Thread_Results*)group->worker_info[pid.thread_index].thread.context->userdata;
u8* offset = (results->arena->memory_base + (*results->d_offsets)[pid.parent_index]);
u32 length = (*results->d_lengths)[pid.parent_index];
(*next_pid) = (*results->d_parent_indices)[pid.parent_index];
return {(s64)length, offset};
}
// This is much stupider and more complicated than I would like, unfortunately.
string directory_get_full_path (Thread_Group* group, Parent_Index pid, string dir_name) {
push_allocator(GPAllocator()); // to copy from String_Builder
Array<string> paths;
paths.allocator = temp();
Parent_Index this_pid = pid;
Parent_Index next_pid = {};
string parent_dir = path_from_parent_index(group, this_pid, &next_pid);
array_add(paths, parent_dir);
this_pid = next_pid;
next_pid = {};
while (this_pid.parent_index != -1) {
parent_dir = path_from_parent_index(group, this_pid, &next_pid);
array_add(paths, parent_dir);
this_pid = next_pid;
next_pid = {};
}
// go in reverse order and add together string
String_Builder* sb = new_string_builder(Arena_Reserve::Size_64K);
for (s64 i = paths.count-1; i >= 0; i -= 1) {
append(sb, paths[i]);
append(sb, "\\");
}
append(sb, dir_name);
return builder_to_string(sb);
}
/*void update_results (Drive_Enumeration* de, Enumeration_Work* ew) {
// merge results and release resources!
// unfortunately this is a LOT of copying!
for_each(i, (*ew->d_offsets)) {
u8* string_ptr = (ew->thread_arena->memory_base + (*ew->d_offsets)[i]);
string name = {(*ew->d_lengths)[i], string_ptr};
array_add(*de->paths.name, name);
array_add(*de->paths.parent_indices, (*ew->d_parent_indices)[i]);
array_add(*de->paths.sizes, (*ew->d_sizes)[i]);
array_add(*de->paths.modtime, (*ew->d_modtime)[i]);
}
for_each(i, (*ew->offsets)) {
u8* string_ptr = (ew->thread_arena->memory_base + (*ew->offsets)[i]);
string name = {(*ew->lengths)[i], string_ptr};
array_add(*de->files.name, name);
array_add(*de->files.parent_indices, (*ew->parent_indices)[i]);
array_add(*de->files.sizes, (*ew->sizes)[i]);
array_add(*de->files.modtime, (*ew->modtime)[i]);
}
}*/
void add_record (File_Enumeration_Thread_Results* results,
WIN32_FIND_DATAW* find_data,
string name,
Parent_Index parent_index,
bool is_directory) {
u32 offset = (u32)(name.data - results->arena->memory_base);
u64 size = ((u64)find_data->nFileSizeHigh << 32) | ((u64)find_data->nFileSizeLow & 0xFFFFFFFF);
u64 modtime = FILETIME_to_ticks(find_data->ftLastWriteTime);
if (is_directory) {
array_add((*results->d_offsets), offset);
array_add((*results->d_lengths), (s16)name.count);
array_add((*results->d_parent_indices), parent_index); // #parent_index
array_add((*results->d_modtime), modtime);
} else {
array_add((*results->f_offsets), offset);
array_add((*results->f_lengths), (s16)name.count);
array_add((*results->f_parent_indices), parent_index); // #parent_index
array_add((*results->f_sizes), size);
array_add((*results->f_modtime), modtime);
}
}
Thread_Continue_Status file_enumeration_thread_group_proc (Thread_Group* group, Thread* thread, void* work) {
// 1. setup userdata as an Arena*:
// #TODO: replace userdata with a struct that manages the thread-local data for this
// particular problem. This data can be rescued before we
File_Enumeration_Thread_Results* results;
if (!thread->context->userdata) {
thread->context->userdata = New<File_Enumeration_Thread_Results>(GPAllocator());
initialize((File_Enumeration_Thread_Results*)thread->context->userdata);
}
results = (File_Enumeration_Thread_Results*)thread->context->userdata;
Enumeration_Work* enum_work = (Enumeration_Work*)work;
// Validate thread context?
push_allocator(temp());
auto_release_temp();
// log("file_enumeration_thread_group_proc, thread index: %d", thread->index);
// MAKE SURE PATH IS NULL TERMINATED!
wstring wildcard_name = utf8_to_wide(format_string("%s\\*", enum_work->first_directory.data)); // #temp
WIN32_FIND_DATAW find_data;
HANDLE h = FindFirstFileExW((LPCWSTR)wildcard_name.data, FindExInfoBasic, &find_data,
FindExSearchNameMatch, nullptr, FIND_FIRST_EX_LARGE_FETCH);
if (h == INVALID_HANDLE_VALUE) {
return Thread_Continue_Status::CONTINUE;
}
s32 thread_index = get_thread_index(group, (s32)thread->index); // zero-indexed to thread group
Parent_Index pi = enum_work->parent;
push_arena(results->arena);
if (enum_work->is_root) { // see add_record
string name = copy_string(enum_work->first_directory);
u32 offset = (u32)(name.data - results->arena->memory_base);
s32 current_index = (s32)(*results->d_offsets).count;
pi = {thread_index, current_index};
array_add((*results->d_offsets), offset);
array_add((*results->d_lengths), (s16)name.count);
Parent_Index root_pi = {thread_index, -1};
array_add((*results->d_parent_indices), root_pi); // #parent_index
array_add((*results->d_modtime), (u64)0);
// results->dirs_enumerated += 1;
}
while (true) {
string name = wide_to_utf8((u16*)find_data.cFileName);
bool should_continue = (name.count == 0 || name == "." || name == "..");
if (should_continue) {
bool success = FindNextFileW(h, &find_data);
if (!success)
break;
continue;
}
bool is_directory = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0;
Parent_Index next_index = {thread_index, enum_work->parent.parent_index};
add_record(results, &find_data, name, next_index, is_directory);
if (is_directory) {
push_allocator(GPAllocator());
auto new_work = New<Enumeration_Work>(false);
new_work->first_directory = directory_get_full_path(group, pi, name);
new_work->parent = next_index;
new_work->is_root = false;
new_work->next = {};
array_add(enum_work->next, new_work);
}
bool success = FindNextFileW(h, &find_data);
if (!success) break;
}
FindClose(h);
return Thread_Continue_Status::CONTINUE;
}
s64 multithreaded_file_enumeration_master_proc (Thread* thread) {
auto task = thread_task(Drive_Enumeration);
Thread_Group* file_enum_thread_group = New<Thread_Group>();
s32 thread_count = os_cpu_physical_core_count();
push_allocator(GPAllocator());
thread_group_init(file_enum_thread_group, thread_count, file_enumeration_thread_group_proc, true);
for_each(d, task->drives) {
auto work = New<Enumeration_Work>(GPAllocator(), false); //replace with arena bootstrap?
work->first_directory = task->drives[d]->label; // this includes the colon-slash, (e.g. `C:\`).
work->parent = {-1, -1}; // #HACK: (s32)d
work->is_root = true;
work->next = {};
add_work(file_enum_thread_group, work);
task->work_added += 1;
}
start(file_enum_thread_group);
// set task completed.
s64 path_index = task->drives.count;
while (true) {
auto_release_temp();
ArrayView<void*> cw = get_completed_work(file_enum_thread_group);
task->work_completed += (s32)cw.count;
for_each(i, cw) {
auto ew = (Enumeration_Work*)cw[i];
for_each(w, ew->next) {
auto new_work = ew->next[w];
add_work(file_enum_thread_group, new_work);
}
task->work_added += (s32)ew->next.count;
string_free(ew->first_directory);
array_free(ew->next);
internal_free(ew);
}
log("work completed: %d/%d",task->work_completed, task->work_added);
// if (task->work_completed >= task->work_added) break;
Sleep(1);
}
shutdown(file_enum_thread_group);
task->thread_completed = true;
return 0;
}
void os_run_file_enumeration_multithreaded () {
// Need some struct to track the state of this operation.
drive_enumeration = New<Drive_Enumeration>();
(*drive_enumeration) = {
os_get_available_drives(),
New<Thread>(),
os_cpu_physical_core_count(),
0, 0, 0, 0
};
// initialize(&drive_enumeration->paths);
// initialize(&drive_enumeration->files);
// We start 1 thread to run the thread group and track the threads
string thread_name = "Multithreaded Enumeration: Master Thread";
bool success = thread_init(drive_enumeration->master_thread,
multithreaded_file_enumeration_master_proc, thread_name);
Assert(success);
thread_start(drive_enumeration->master_thread, drive_enumeration);
drive_enumeration->thread_started = true;
}
bool file_enum_multithreading_started () {
if (drive_enumeration == nullptr) return false;
return drive_enumeration->thread_started;
}
bool file_enum_multithreading_active () {
if (drive_enumeration == nullptr) return false;
if (drive_enumeration->thread_completed) {
return false;
}
if (drive_enumeration->thread_started) {
return true;
}
return false;
}