From 5d4491785379a11cb86743e9cf6611a089d15f59 Mon Sep 17 00:00:00 2001 From: Musa Mahmood Date: Wed, 10 Dec 2025 09:54:38 -0500 Subject: [PATCH] Fix major bug in GPAllocator_Proc ::RESIZE path. Fix major bug in Arena_Free_List.cpp --- lib/Base/Arena_Array.h | 4 +- lib/Base/Arena_Free_List.cpp | 2 - lib/Base/Base_Thread_Context.h | 5 +- lib/Base/General_Purpose_Allocator.cpp | 6 +- lib/Base/New_String.cpp | 52 ++++ lib/Base/String.cpp | 2 +- lib/Base/Thread_Group.cpp | 7 +- lib/Base/Threads.cpp | 5 +- lib/OS/OS_Filesystem.cpp | 87 +++++-- lib/OS/OS_Win32.cpp | 340 ++++++++++++++++++++++++- lib/OS/OS_Win32.h | 14 +- lib/OS/OS_Win32_NTFS.cpp | 61 +++-- lib_main.cpp | 4 +- src/Ex1.cpp | 88 ++++--- 14 files changed, 578 insertions(+), 99 deletions(-) create mode 100644 lib/Base/New_String.cpp diff --git a/lib/Base/Arena_Array.h b/lib/Base/Arena_Array.h index 488bbf1..a1755ce 100644 --- a/lib/Base/Arena_Array.h +++ b/lib/Base/Arena_Array.h @@ -65,8 +65,8 @@ template s64 memory_usage (ArenaArray& array) { return arena_usage_committed_bytes(array.arena); } -template void arena_array_free (ArenaArray& array) { - release_arena(array.arena, true); +template void arena_array_free (ArenaArray& array, bool delete_pages=true) { + release_arena(array.arena, delete_pages); array.arena = nullptr; #if BUILD_DEBUG poison_struct(&array); diff --git a/lib/Base/Arena_Free_List.cpp b/lib/Base/Arena_Free_List.cpp index bce6e8a..e3b15b0 100644 --- a/lib/Base/Arena_Free_List.cpp +++ b/lib/Base/Arena_Free_List.cpp @@ -53,8 +53,6 @@ Arena* next_arena (Arena_Reserve reserve_size) { arena_free_list->in_flight_count[reserve_index] += 1; - unlock(&arena_free_list->mutex); - Assert(arena != nullptr); return arena; } diff --git a/lib/Base/Base_Thread_Context.h b/lib/Base/Base_Thread_Context.h index 6a6cf52..2290b5a 100644 --- a/lib/Base/Base_Thread_Context.h +++ b/lib/Base/Base_Thread_Context.h @@ -1,5 +1,4 @@ // #hacky fwd declares -struct Thread; struct Error; struct Graphics; @@ -9,7 +8,7 @@ struct Thread_Context { Allocator allocator; s32 thread_idx; - u16 _padding0; + // u16 _padding0; u16 GPAllocator_alignment = 16; Logger logger = {nullptr, nullptr}; String_Builder* log_builder; @@ -27,6 +26,8 @@ struct Thread_Context { // Graphics stuff: Graphics* graphics; + + void* userdata; // for appending other arenas, etc. }; // C_LINKAGE thread_static TCTX* tctx_thread_local; diff --git a/lib/Base/General_Purpose_Allocator.cpp b/lib/Base/General_Purpose_Allocator.cpp index 97bd555..e5fa7c5 100644 --- a/lib/Base/General_Purpose_Allocator.cpp +++ b/lib/Base/General_Purpose_Allocator.cpp @@ -152,10 +152,8 @@ void* GPAllocator_Proc (Allocator_Mode mode, s64 requested_size, s64 old_size, v } break; case Allocator_Mode::RESIZE: { void* result = GPAllocator_Resize(old_size, old_memory, requested_size, alignment); - s64 size_to_copy = old_size < requested_size ? old_size : requested_size; - if (result && size_to_copy) { - memcpy(result, old_memory, size_to_copy); - } + // NOTE: The _aligned_realloc function already copies the old memory, so there's + // no need to copy the old memory block here. return result; } break; case Allocator_Mode::DEALLOCATE: { diff --git a/lib/Base/New_String.cpp b/lib/Base/New_String.cpp new file mode 100644 index 0000000..e940727 --- /dev/null +++ b/lib/Base/New_String.cpp @@ -0,0 +1,52 @@ +/* +#define SB_HELPER() \ + if (sb->allocated < max_array_size(*sb)) { \ + array_reserve(*sb, max_array_size(*sb)); \ + } \ + s64 buffer_size = sb->allocated - sb->count;\ + u8* current_point = &sb->data[sb->count] + +// This is not super flexible because we cannot do leading zeros or align... +template // Base-10 +string int_to_string(String_Builder* sb, T value, s32 width, bool is_signed) { + SB_HELPER(); + + string result; + bool negative = false; + u8 temporary_buffer[32]; + s32 position = 0; + u64 v = 0; + + if (is_signed && value < 0) { + negative = true; + // convert to unsigned magnitude safely + v = (u64)(-(s64)value); + } else { + v = (u64)value; + } + + // Special case 0: + if (v == 0) { + + } +} + + +force_inline void print_to_builder (String_Builder* sb, u8 v) { + SB_HELPER(); + return int_to_string(v, 8, is_signed=false); +} + +force_inline void print_to_builder (String_Builder* sb, string v) { + + sb->count += v.count; +} + + +// fallback: +template +force_inline string print_to_builder (String_Builder* sb, T&) { + return string("[unsupported type for to_builder conversion!]"); +} + +*/ \ No newline at end of file diff --git a/lib/Base/String.cpp b/lib/Base/String.cpp index fbd7218..d64da72 100644 --- a/lib/Base/String.cpp +++ b/lib/Base/String.cpp @@ -18,7 +18,7 @@ u8* to_c_string (string s) { } string copy_string (string s) { - Assert(s.count > 0); + // Assert(s.count > 0); if (s.count <= 0) return ""; string str = {}; diff --git a/lib/Base/Thread_Group.cpp b/lib/Base/Thread_Group.cpp index 8859188..1e5bf54 100644 --- a/lib/Base/Thread_Group.cpp +++ b/lib/Base/Thread_Group.cpp @@ -78,7 +78,7 @@ s64 thread_group_run (Thread* thread) { entry->thread_index = thread->index; entry->next = nullptr; - Thread_Continue_Status should_continue = Thread_Continue_Status::THREAD_CONTINUE; + Thread_Continue_Status should_continue = Thread_Continue_Status::CONTINUE; if (group->proc) { should_continue = group->proc(group, thread, entry->work); } @@ -86,7 +86,7 @@ s64 thread_group_run (Thread* thread) { // The work is done, add it to the completed list: add_work(&info->completed, entry); - if (should_continue == Thread_Continue_Status::THREAD_STOP) { + if (should_continue == Thread_Continue_Status::STOP) { break; } } @@ -213,7 +213,8 @@ bool shutdown (Thread_Group* group, s32 timeout_milliseconds = -1) { for (s64 i = 0; i < group->worker_info.count; i += 1) { Worker_Info* info = &group->worker_info[i]; - thread_deinit(&info->thread); + + thread_deinit(&info->thread, false); destroy(&info->available); destroy(&info->completed); array_free(info->work_steal_indices); diff --git a/lib/Base/Threads.cpp b/lib/Base/Threads.cpp index 4689147..253d9c7 100644 --- a/lib/Base/Threads.cpp +++ b/lib/Base/Threads.cpp @@ -6,7 +6,6 @@ struct Thread_Group; void init(Work_List* list); void destroy(Work_List* list); -typedef s64 (*Thread_Proc)(Thread* thread); s64 thread_group_run (Thread* thread); struct Thread { Thread_Context* context; @@ -54,8 +53,8 @@ struct Worker_Info { }; static_assert(sizeof(Worker_Info) % 64 == 0); // This MUST be padded to cache line! enum class Thread_Continue_Status: s32 { - THREAD_STOP = 0, - THREAD_CONTINUE = 1 + STOP = 0, + CONTINUE = 1 }; typedef Thread_Continue_Status (*Thread_Group_Proc)(Thread_Group* group, Thread* thread, void* work); struct Thread_Group { diff --git a/lib/OS/OS_Filesystem.cpp b/lib/OS/OS_Filesystem.cpp index 1d120a0..940d91a 100644 --- a/lib/OS/OS_Filesystem.cpp +++ b/lib/OS/OS_Filesystem.cpp @@ -43,7 +43,7 @@ constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22 // template struct DFS_Array { Serializer* wstrings; // is a ArenaArray - // Serializer* strings; + Serializer* strings; ArenaArray* offsets; // offsets into strings->data ArenaArray* lengths; // this type may vary Not sure if I should make it a template argument. Seems yucky. @@ -60,29 +60,11 @@ struct DFS_Array { struct DFS_Value { wstring wpath; u64 modtime; - u64 size; + s64 size; s32 parent_index; string full_path; }; -// string get_full_path_from_index (Dense_FS* dfs, s32 parent_index, string file_name) { - -// } - -DFS_Value get_value (Dense_FS* dfs, DFS_Array* dfsa, s64 i) { - u32 path_offset = (*dfsa->offsets)[i]; - u16* path_wide_ptr = (u16*)((dfsa->wstrings->data) + path_offset); - wstring path_wide = {(*dfsa->lengths)[i], path_wide_ptr}; - DFS_Value dfsv; - dfsv.modtime = (*dfsa->modtimes)[i]; - dfsv.size = (*dfsa->sizes)[i]; - dfsv.parent_index = (*dfsa->parent_indices)[i]; - string path_utf8 = wide_to_utf8(path_wide.data, (s32)path_wide.count); - dfsv.full_path = path_utf8; - // dfsv.full_path = get_full_path_from_offset(dfs, dfsv.parent_index, path_utf8); - return dfsv; -} - struct Dense_FS { // Link to OS_Drive DFS_Array paths; DFS_Array files; @@ -92,6 +74,68 @@ struct Dense_FS { // Link to OS_Drive OS_Drive* drive; // backlink for reference. }; +// Need the drive letter too! +string get_full_path (string drive_label, Dense_FS* dfs, s32 first_parent_index, string file_name) { + s32 parent_index = first_parent_index; + + DFS_Array* paths = &dfs->paths; + + auto_release_temp(); + Array path_list_reverse; + path_list_reverse.allocator = temp(); + + array_add(path_list_reverse, file_name); + + while (parent_index > 0) { // -1 means we reached the drive letter (root) + u32 path_offset = (*paths->offsets)[parent_index]; + u8* path_data_ptr = (u8*)((paths->strings->data) + path_offset); + string next_dir = {(s64)(*paths->lengths)[parent_index], path_data_ptr}; + array_add(path_list_reverse, next_dir); + + parent_index = (*paths->parent_indices)[parent_index]; + } + + // Drive letter with colon: + Assert(drive_label.count >= 2); + string drive_letter_with_colon = {2, drive_label.data}; + + array_add(path_list_reverse, drive_letter_with_colon); + + // reset_string_builder(sb, true); + String_Builder* sb = new_string_builder(); + + for (s64 i = path_list_reverse.count-1; i >= 0; i -= 1) { + append(sb, path_list_reverse[i]); + if (i != 0) { append(sb, "/"); } + } + + // return copy_string(string_view(sb)); + return builder_to_string(sb); +} + +string get_full_path_from_index (OS_Drive* drive, DFS_Array* dfsa, s64 i) { + u32 path_offset = (*dfsa->offsets)[i]; + u8* path_data_ptr = (u8*)((dfsa->strings->data) + path_offset); + string file_name = {(*dfsa->lengths)[i], path_data_ptr}; + return get_full_path(drive->label, drive->data, (*dfsa->parent_indices)[i], file_name); +} + +DFS_Value get_value (Dense_FS* dfs, DFS_Array* dfsa, s64 i) { + u32 path_offset = (*dfsa->offsets)[i]; + u8* path_data_ptr = (u8*)((dfsa->strings->data) + path_offset); + // u16* path_wide_ptr = (u16*)((dfsa->wstrings->data) + path_offset); + // wstring path_wide = {(*dfsa->lengths)[i], path_wide_ptr}; + DFS_Value dfsv; + dfsv.modtime = (*dfsa->modtimes)[i]; + dfsv.size = (s64)(*dfsa->sizes)[i]; + dfsv.parent_index = (*dfsa->parent_indices)[i]; + // #TODO: I cna just use strings. + // string path_utf8 = wide_to_utf8(path_wide.data, (s32)path_wide.count); + // dfsv.full_path = get_full_path_from_offset(dfs, dfsv.parent_index, path_utf8); + dfsv.full_path = {(*dfsa->lengths)[i], path_data_ptr}; + return dfsv; +} + s64 item_count (DFS_Array* dfsa) { return dfsa->offsets->count; } @@ -99,7 +143,7 @@ s64 item_count (DFS_Array* dfsa) { void initialize (DFS_Array* dfsa) { Assert(dfsa != nullptr); dfsa->wstrings = new_serializer(Arena_Reserve::Size_2G); - // dfsa->strings = new_serializer(Arena_Reserve::Size_2G); + dfsa->strings = new_serializer(Arena_Reserve::Size_2G); dfsa->offsets = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); dfsa->lengths = arena_array_new (DFS_Preallocation_Count, Arena_Reserve::Size_2G); @@ -114,6 +158,7 @@ void initialize (DFS_Array* dfsa) { void release (DFS_Array* dfsa) { free_serializer(dfsa->wstrings); + free_serializer(dfsa->strings); if (is_valid(dfsa->offsets)) { arena_array_free(*dfsa->offsets); } if (is_valid(dfsa->lengths)) { arena_array_free(*dfsa->lengths); } diff --git a/lib/OS/OS_Win32.cpp b/lib/OS/OS_Win32.cpp index 553c12f..df937e9 100644 --- a/lib/OS/OS_Win32.cpp +++ b/lib/OS/OS_Win32.cpp @@ -24,6 +24,13 @@ s64 GetUnixTimestampNanoseconds () { return unix_time_nanoseconds; } + +u64 FILETIME_to_ticks (FILETIME fileTime) { + u64 ticks = ((u64)fileTime.dwHighDateTime << (u64)32) + | (u64)fileTime.dwLowDateTime; // in 100ns ticks + return ticks; +} + #endif struct OS_System_Info { @@ -249,7 +256,7 @@ C_LINKAGE DWORD OS_Windows_Thread_Entry_Point (void* parameter) { // Individual Thread API #define thread_task(T) (T*)thread->data; -internal bool thread_init (Thread* thread, Thread_Proc proc, string thread_name="") { +internal bool thread_init (Thread* thread, Thread_Proc proc, string thread_name) { Assert(thread != nullptr && proc != nullptr); DWORD windows_thread_id = 0; @@ -292,7 +299,7 @@ internal bool thread_init (Thread* thread, Thread_Proc proc, string thread_name= return true; } -internal void thread_deinit (Thread* thread, bool zero_thread=false) { +internal void thread_deinit (Thread* thread,bool zero_thread) { // Move errors from thread to parent thread push_errors_to_parent_thread(thread->context); @@ -310,12 +317,12 @@ internal void thread_deinit (Thread* thread, bool zero_thread=false) { if (zero_thread) memset(thread, 0, sizeof(Thread)); } -internal void thread_start (Thread* thread, void* thread_data = nullptr) { +internal void thread_start (Thread* thread, void* thread_data) { if (thread_data) thread->data = thread_data; ResumeThread(thread->os_thread.windows_thread); } -internal bool thread_is_done (Thread* thread, s32 milliseconds=0) { +internal bool thread_is_done (Thread* thread, s32 milliseconds) { Assert(milliseconds >= -1); DWORD result = WaitForSingleObject(thread->os_thread.windows_thread, (DWORD)milliseconds); @@ -506,6 +513,8 @@ internal bool file_length (string file_path, s64* length) { if (!file_is_valid(f)) { return false; } bool success = file_length(f, length); + file_close(&f); + return success; } @@ -1106,4 +1115,325 @@ string os_get_machine_name () { // #TODO: #window_interaction (mouse/keyboard) // [ ] get_mouse_pointer_position -// [ ] ... What APIs do I need for Keyboard \ No newline at end of file +// [ ] ... What APIs do I need for Keyboard + +struct Enumeration_Work { + string first_directory; + s32 parent_index; + + Arena* thread_arena; // pointer to relevant tctx->arena + // Directories + ArenaArray* d_offsets; + ArenaArray* d_lengths; + ArenaArray* d_parent_indices; + ArenaArray* d_sizes; + ArenaArray* d_modtime; + // Files + ArenaArray* offsets; + ArenaArray* lengths; + ArenaArray* parent_indices; + ArenaArray* sizes; + ArenaArray* modtime; +}; + +struct Files_Combined_Results { + // ArenaArray full_path; + ArenaArray* name; + ArenaArray* parent_indices; + ArenaArray* sizes; + ArenaArray* modtime; +}; + +struct Drive_Enumeration { // master thread struct + Arena* arena; + ArrayView drives; + Thread* master_thread; + + s32 thread_count; + bool thread_started; + bool thread_completed; + + Files_Combined_Results paths; + Files_Combined_Results files; + + s32 work_added = 0; + s32 work_completed = 0; +}; + +void push_root (Drive_Enumeration* de, string label, s32 index) { + array_add(*de->paths.name, label); + array_add(*de->paths.parent_indices, index); + array_add(*de->paths.sizes, (u64)0); + array_add(*de->paths.modtime, (u64)0); +} + +global Drive_Enumeration* drive_enumeration; + +string directory_get_full_path (Drive_Enumeration* de, s64 index) { + push_allocator(GPAllocator()); // to copy from String_Builder + Files_Combined_Results* f = &de->paths; + string dir_name = (*f->name)[index]; + s32 parent_index = (*f->parent_indices)[index]; + s32 next_parent = (*f->parent_indices)[parent_index]; + + Array paths; + paths.allocator = temp(); + + array_add(paths, (*f->name)[parent_index]); + + while (parent_index != next_parent) { + parent_index = next_parent; + next_parent = (*f->parent_indices)[parent_index]; + array_add(paths, (*f->name)[parent_index]); + } + + // while (parent_index > -1) { // should be while(true) + // + // s32 next_parent = (*f->parent_indices)[parent_index]; + // if (parent_index == next_parent) break; + // s32 parent_index = next_parent; + // } + + // go in reverse order and add together string + String_Builder* sb = new_string_builder(Arena_Reserve::Size_64K); + for (s64 i = paths.count-1; i >= 0; i -= 1) { + append(sb, paths[i]); + append(sb, "\\"); + } + append(sb, dir_name); + + return builder_to_string(sb); +} + +void update_results (Drive_Enumeration* de, Enumeration_Work* ew) { + // merge results and release resources! + // unfortunately this is a LOT of copying! + for_each(i, (*ew->d_offsets)) { + u8* string_ptr = (ew->thread_arena->memory_base + (*ew->d_offsets)[i]); + string name = {(*ew->d_lengths)[i], string_ptr}; + array_add(*de->paths.name, name); + array_add(*de->paths.parent_indices, (*ew->d_parent_indices)[i]); + array_add(*de->paths.sizes, (*ew->d_sizes)[i]); + array_add(*de->paths.modtime, (*ew->d_modtime)[i]); + } + for_each(i, (*ew->offsets)) { + u8* string_ptr = (ew->thread_arena->memory_base + (*ew->offsets)[i]); + string name = {(*ew->lengths)[i], string_ptr}; + array_add(*de->files.name, name); + array_add(*de->files.parent_indices, (*ew->parent_indices)[i]); + array_add(*de->files.sizes, (*ew->sizes)[i]); + array_add(*de->files.modtime, (*ew->modtime)[i]); + } +} + +void add_record (Enumeration_Work* ew, WIN32_FIND_DATAW* find_data, string name, s32 parent_index=-1) { + u32 offset = (u32)(name.data - ew->thread_arena->memory_base); + bool is_directory = (find_data->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + u64 size = ((u64)find_data->nFileSizeHigh << 32) | ((u64)find_data->nFileSizeLow & 0xFFFFFFFF); + + if (is_directory) { + array_add((*ew->d_offsets), offset); + array_add((*ew->d_lengths), (s16)name.count); + array_add((*ew->d_parent_indices), parent_index); // #TODO #parent_index + array_add((*ew->d_sizes), size); + array_add((*ew->d_modtime), FILETIME_to_ticks(find_data->ftLastWriteTime)); + } else { + array_add((*ew->offsets), offset); + array_add((*ew->lengths), (s16)name.count); + array_add((*ew->parent_indices), parent_index); // #TODO #parent_index + array_add((*ew->sizes), size); + array_add((*ew->modtime), FILETIME_to_ticks(find_data->ftLastWriteTime)); + } +} + +Thread_Continue_Status file_enumeration_thread_group_proc (Thread_Group* group, Thread* thread, void* work) { + // 1. setup userdata as an Arena*: + Arena* result_arena; + if (!thread->context->userdata) { + result_arena = next_arena(Arena_Reserve::Size_64G); + thread->context->userdata = result_arena; + } else { + result_arena = (Arena*)thread->context->userdata; + } + + Enumeration_Work* enum_work = (Enumeration_Work*)work; + enum_work->thread_arena = (Arena*)thread->context->userdata; + + enum_work->d_offsets = arena_array_new(4096, Arena_Reserve::Size_2M); + enum_work->d_lengths = arena_array_new(4096, Arena_Reserve::Size_2M); + enum_work->d_parent_indices = arena_array_new(4096, Arena_Reserve::Size_2M); + enum_work->d_sizes = arena_array_new(4096, Arena_Reserve::Size_2M); + enum_work->d_modtime = arena_array_new(4096, Arena_Reserve::Size_2M); + + enum_work->offsets = arena_array_new(4096, Arena_Reserve::Size_2M); + enum_work->lengths = arena_array_new(4096, Arena_Reserve::Size_2M); + enum_work->parent_indices = arena_array_new(4096, Arena_Reserve::Size_2M); + enum_work->sizes = arena_array_new(4096, Arena_Reserve::Size_2M); + enum_work->modtime = arena_array_new(4096, Arena_Reserve::Size_2M); + + // Validate thread context? + push_allocator(temp()); + auto_release_temp(); + + // log("file_enumeration_thread_group_proc, thread index: %d", thread->index); + + // MAKE SURE PATH IS NULL TERMINATED! + wstring wildcard_name = utf8_to_wide(format_string("%s\\*", enum_work->first_directory.data)); // #temp + WIN32_FIND_DATAW find_data; + HANDLE h = FindFirstFileExW((LPCWSTR)wildcard_name.data, FindExInfoBasic, &find_data, + FindExSearchNameMatch, nullptr, FIND_FIRST_EX_LARGE_FETCH); + if (h == INVALID_HANDLE_VALUE) { + return Thread_Continue_Status::CONTINUE; + } + + while (true) { + push_arena(result_arena); + string name = wide_to_utf8((u16*)find_data.cFileName); // #NOT_TEMP + bool should_continue = (name.count == 0 || name == "." || name == ".."); + if (should_continue) { + bool success = FindNextFileW(h, &find_data); + if (!success) + break; + continue; + } + + add_record(enum_work, &find_data, name, enum_work->parent_index); + + bool success = FindNextFileW(h, &find_data); + if (!success) break; + } + + FindClose(h); + return Thread_Continue_Status::CONTINUE; +} + +s64 multithreaded_file_enumeration_master_proc (Thread* thread) { + auto task = thread_task(Drive_Enumeration); + + push_arena(task->arena); + + Thread_Group* file_enum_thread_group = New(); + + s32 thread_count = os_cpu_physical_core_count(); + + push_allocator(GPAllocator()); + thread_group_init(file_enum_thread_group, thread_count, file_enumeration_thread_group_proc, true); + + + for_each(d, task->drives) { + auto work = New(GPAllocator()); //replace with arena bootstrap? + work->first_directory = task->drives[d]->label; // this includes the colon-slash, (e.g. `C:\`). + work->parent_index = (s32)d; // #HACK? + // add label root to combined results, so we can look it up later! + push_root(task, work->first_directory, work->parent_index); + + add_work(file_enum_thread_group, work); + task->work_added += 1; + } + + start(file_enum_thread_group); + // set task completed. + + s64 path_index = task->drives.count; + + // #TODO: Get completed work! + while (task->work_completed < task->work_added) { + auto_release_temp(); + ArrayView cw = get_completed_work(file_enum_thread_group); + for_each(i, cw) { + auto ew = (Enumeration_Work*)cw[i]; + update_results(task, ew); + + arena_array_free(*ew->d_offsets, false); + arena_array_free(*ew->d_lengths, false); + arena_array_free(*ew->d_parent_indices, false); + arena_array_free(*ew->d_sizes, false); + arena_array_free(*ew->d_modtime, false); + arena_array_free(*ew->offsets, false); + arena_array_free(*ew->lengths, false); + arena_array_free(*ew->parent_indices, false); + arena_array_free(*ew->sizes, false); + arena_array_free(*ew->modtime, false); + + string_free(ew->first_directory); + internal_free(ew); + } + task->work_completed += (s32)cw.count; + + // For each new directory: + // s64 dirs_to_enumerate = task->paths.name->count - path_index; + for (s64 i = path_index; i < task->paths.name->count; i += 1) { + auto work = New(GPAllocator()); + work->first_directory = directory_get_full_path(task, i);// need full name here! + work->parent_index = (s32)i; + + add_work(file_enum_thread_group, work); + + task->work_added += 1; + } + path_index = task->paths.name->count; + + Sleep(1); + log("work completed: %d/%d",task->work_completed, task->work_added); + } + + shutdown(file_enum_thread_group); + + task->thread_completed = true; + return 0; +} + +void initialize (Files_Combined_Results* fcr) { + fcr->name = arena_array_new(4194304, Arena_Reserve::Size_2G); // 2GB @ 16-byte strings => 134.2M entries. 64 might be better here for really large file collections! + fcr->parent_indices = arena_array_new(4194304, Arena_Reserve::Size_2G); + fcr->sizes = arena_array_new(4194304, Arena_Reserve::Size_2G); + fcr->modtime = arena_array_new(4194304, Arena_Reserve::Size_2G); +} + +void run_multithreaded_enumeration_thread () { + // Need some struct to track the state of this operation. + Arena* arena = next_arena(Arena_Reserve::Size_64K); + push_arena(arena); + + drive_enumeration = New(); + (*drive_enumeration) = { + arena, + os_get_available_drives(), + New(), + os_cpu_physical_core_count(), + 0, false, false, {}, {}, + 0, 0 + }; + + initialize(&drive_enumeration->paths); + initialize(&drive_enumeration->files); + + // We start 1 thread to run the thread group and track the threads + string thread_name = "Multithreaded Enumeration: Master Thread"; + bool success = thread_init(drive_enumeration->master_thread, + multithreaded_file_enumeration_master_proc, thread_name); + Assert(success); + thread_start(drive_enumeration->master_thread, drive_enumeration); + drive_enumeration->thread_started = true; +} + +bool file_enum_multithreading_started () { + if (drive_enumeration == nullptr) return false; + return drive_enumeration->thread_started; +} + +bool file_enum_multithreading_active () { + if (drive_enumeration == nullptr) return false; + if (drive_enumeration->thread_completed) { + return false; + } + if (drive_enumeration->thread_started) { + return true; + } + return false; +} + +// if (drive_enumeration != nullptr) { +// // Check if task is completed, clean up thread. +// discard arena and zero drive_enumeration. +// } \ No newline at end of file diff --git a/lib/OS/OS_Win32.h b/lib/OS/OS_Win32.h index 3e1240d..9d34778 100644 --- a/lib/OS/OS_Win32.h +++ b/lib/OS/OS_Win32.h @@ -22,6 +22,14 @@ internal void wait (Condition_Variable* cv, Mutex* mutex, s32 wait_time_ms = -1) internal void wake (Condition_Variable* cv); internal void wake_all (Condition_Variable* cv); +struct Thread; // #hack forward declares to get this to compile. +typedef s64 (*Thread_Proc)(Thread* thread); + +internal bool thread_init (Thread* thread, Thread_Proc proc, string thread_name=""); +internal void thread_deinit (Thread* thread, bool zero_thread=false); +internal void thread_start (Thread* thread, void* thread_data = nullptr); +internal bool thread_is_done (Thread* thread, s32 milliseconds=0); + typedef u32 OS_Error_Code; internal string get_error_string (OS_Error_Code error_code); @@ -199,8 +207,4 @@ Window_Info* get_main_window_pointer (); string os_get_machine_name (); -// struct File_Contents { -// File file = {}; -// ArrayView file_data = {}; -// bool read_success = false; -// }; + diff --git a/lib/OS/OS_Win32_NTFS.cpp b/lib/OS/OS_Win32_NTFS.cpp index 6b012e1..e7567f2 100644 --- a/lib/OS/OS_Win32_NTFS.cpp +++ b/lib/OS/OS_Win32_NTFS.cpp @@ -87,7 +87,7 @@ struct NTFS_NonResidentAttributeHeader : NTFS_AttributeHeader { u64 attributeAllocated; // allocatedSize u64 attributeSize; // dataSize u64 streamDataSize; // initializedSize - // u64 compressedSize; + u64 compressedSize; }; struct NTFS_RunHeader { @@ -127,11 +127,14 @@ void add_record (Dense_FS* dfs, NTFS_File* file) { } // UTF-8 (string) version - // string s = wide_to_utf8(file->name_data, file->name_count); - // u32 offset = AddString_NoCount(array->strings, s.data, file->name_count); + string s = wide_to_utf8(file->name_data, file->name_count); + u32 offset = AddString_NoCount(array->strings, s.data, (u8)s.count); + + // I need the full path for this lol. + // file_length(s, (s64*)&file->file_size); // UTF-16LE (wstring) version - u32 offset = AddString_NoCount(array->wstrings, (u8*)file->name_data, file->name_count * sizeof(u16)); + // u32 offset = AddString_NoCount(array->wstrings, (u8*)file->name_data, file->name_count * sizeof(u16)); array_add(*array->parent_ids, file->parent_id); array_add(*array->record_ids, file->record_id); @@ -293,13 +296,27 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) { file.name_count = fileNameAttribute->fileNameLength; file.name_data = (u16*)fileNameAttribute->fileName; file.file_modtime = (u64)fileNameAttribute->modificationTime; - // We need to get size from the data attribute } } - if (attribute->attributeType == 0x80) { // $DATA - NTFS_NonResidentAttributeHeader* nonresident_attribute = (NTFS_NonResidentAttributeHeader*)attribute; - file.file_size = nonresident_attribute->attributeSize; - } + /* #NOTE: File size doesn't work at all, so just use slower WinAPI for now :( + if (attribute->attributeType == 0x80 && attribute->nameLength == 0) { // $DATA + // #TODO: Check if file is compressed then access compressedSize + bool is_compressed = (attribute->flags & 0x0800) == 0x0800; + if ((bool)attribute->nonResident) { + NTFS_NonResidentAttributeHeader* nonresident_attribute = (NTFS_NonResidentAttributeHeader*)attribute; + if (is_compressed) { + file.file_size = nonresident_attribute->compressedSize; + } else { + file.file_size = nonresident_attribute->attributeSize; // keep bottom 48-bits + } + } else { + NTFS_ResidentAttributeHeader* res = (NTFS_ResidentAttributeHeader*)attribute; + file.file_size = res->attributeLength; + } + + Assert(file.file_size < GB(64)); + if (file.is_directory) { file.file_size = 0; } + }*/ if (attribute->attributeType == 0xFFFFFFFF) { add_record(drive->data, &file); // See Dense_FS drive->data @@ -336,8 +353,12 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) { u32 parent_id = (*drive->data->paths.parent_ids)[i]; bool parent_exists = 0; s32 previous_index = find_previous_index(drive->data, parent_id, &parent_exists); - (*drive->data->paths.parent_indices)[i] = previous_index; // -1 if failed. fail_count += (s64)(!parent_exists); + if (!parent_exists) { + (*drive->data->paths.parent_indices)[i] = -1; // -1 if failed + } else { + (*drive->data->paths.parent_indices)[i] = previous_index; + } } // Link files: @@ -347,10 +368,16 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) { u32 parent_id = (*drive->data->files.parent_ids)[i]; bool parent_exists = 0; s32 previous_index = find_previous_index(drive->data, parent_id, &parent_exists); - (*drive->data->files.parent_indices)[i] = previous_index; // -1 if failed. fail_count += (s64)(!parent_exists); + if (!parent_exists) { + (*drive->data->files.parent_indices)[i] = -1; + } else { + (*drive->data->files.parent_indices)[i] = previous_index; + } } + // For all files and directories with a parent, find the parent(s) and get the file size!. + if (fail_count) { log_warning("[%s] Failed to find parent for %lld items", drive_path.data, fail_count); } @@ -474,7 +501,8 @@ bool Deserialize_Win32_Drives (string file_path) { Dense_FS_initialize(drive->data); { // (Dense_FS):paths DFS_Array paths = drive->data->paths; - ReadToArenaArray(d, paths.wstrings); + ReadToArenaArray(d, paths.strings); + // ReadToArenaArray(d, paths.wstrings); ReadToArenaArray(d, paths.offsets); ReadToArenaArray(d, paths.lengths); ReadToArenaArray(d, paths.modtimes); @@ -483,7 +511,8 @@ bool Deserialize_Win32_Drives (string file_path) { } { // (Dense_FS):files DFS_Array files = drive->data->files; - ReadToArenaArray(d, files.wstrings); + ReadToArenaArray(d, files.strings); + // ReadToArenaArray(d, files.wstrings); ReadToArenaArray(d, files.offsets); ReadToArenaArray(d, files.lengths); ReadToArenaArray(d, files.modtimes); @@ -527,7 +556,8 @@ bool Serialize_Win32_Drives (ArrayView drives, string file_path) { { // (Dense_FS):paths DFS_Array paths = drive->data->paths; // Note these are all prefixed with their respective lengths. - AddArray(s, to_view(*paths.wstrings)); + AddArray(s, to_view(*paths.strings)); + // AddArray(s, to_view(*paths.wstrings)); AddArray(s, to_view(*paths.offsets)); AddArray(s, to_view(*paths.lengths)); AddArray(s, to_view(*paths.modtimes)); @@ -536,7 +566,8 @@ bool Serialize_Win32_Drives (ArrayView drives, string file_path) { } { // (Dense_FS):files DFS_Array files = drive->data->files; - AddArray(s, to_view(*files.wstrings)); + AddArray(s, to_view(*files.strings)); + // AddArray(s, to_view(*files.wstrings)); AddArray(s, to_view(*files.offsets)); AddArray(s, to_view(*files.lengths)); AddArray(s, to_view(*files.modtimes)); diff --git a/lib_main.cpp b/lib_main.cpp index 774ae18..ce07948 100644 --- a/lib_main.cpp +++ b/lib_main.cpp @@ -48,13 +48,13 @@ #include "lib/Base/Threads.cpp" #include "lib/OS/OS_Filesystem.cpp" +#include "lib/Base/Thread_Group.cpp" + #if OS_WINDOWS # include "lib/OS/OS_Win32.cpp" # include "lib/OS/OS_Win32_NTFS.cpp" #endif -#include "lib/Base/Thread_Group.cpp" - #include "lib/Graphics.cpp" // #if OS_LINUX.. diff --git a/src/Ex1.cpp b/src/Ex1.cpp index 2c0fa85..ee31965 100644 --- a/src/Ex1.cpp +++ b/src/Ex1.cpp @@ -1,12 +1,10 @@ struct ExplorerUI { u8 search_input[64]; u8 secondary_input[64]; - }; struct Explorer { // A bunch of flags? - // Icon cache? // Array frame_textures; }; @@ -106,10 +104,17 @@ void Ex1_show_ntfs_workspace () { using namespace ImGui; } for_each(d, ntfs_workspace.drives) { if (ntfs_workspace.supplementary[d].radio_button == 0) { // files - Dense_FS* dfs = ntfs_workspace.drives[d]->data; - DFS_Array* dfsa = &ntfs_workspace.drives[d]->data->files; - DFS_Value v = get_value(dfs, dfsa, ntfs_workspace.supplementary[d].index); - Text("Filename: %s, parent_id: %d", v.full_path.data, v.parent_index); + OS_Drive* drive = ntfs_workspace.drives[d]; + Dense_FS* dfs = drive->data; + DFS_Array* dfsa = &drive->data->files; + s64 file_index = ntfs_workspace.supplementary[d].index; + DFS_Value v = get_value(dfs, dfsa, file_index); + // #TODO NOTE: v.full_path is NOT the full path #rename + Text("Filename: %s, parent_id: %d", copy_string(v.full_path).data, v.parent_index); + string full_path = get_full_path_from_index(drive, dfsa, file_index); + Text("Full path: %s", full_path.data); + bool success = file_length(full_path, &v.size); // temp, obviously we don't wanna call this every frame lol + Text(" > size: %lld B", v.size); Text(" > size: %s", format_bytes(v.size).data); // Text(" > modtime: %s", idk how to convert FILETIME to calendar time } else { @@ -170,10 +175,23 @@ void Ex1_Control_Panel () { using namespace ImGui; Deserialize_Win32_Drives(file_path); } + if (file_enum_multithreading_started()) { + if (thread_is_done(drive_enumeration->master_thread)) { + push_allocator(GPAllocator()); + // Thread* thread = drive_enumeration->master_thread; + // auto task = thread_task(Drive_Enumeration); + // Nothing to free? + thread_deinit(drive_enumeration->master_thread, true); + } + } + if (drives.count > 0 && !all_drives_enumerated && Button("Enumerate all NTFS drives")) { // && ex1_ntfs.initialized // if drive count exceeds the number of threads, we need to group them so each thread // can enumerate multiple drives. // We need to distribute the drives across our available threads: + { run_multithreaded_enumeration_thread(); + return; + } push_allocator(GPAllocator()); Array> drive_split; drive_split.allocator = temp(); // this is only needed for this frame @@ -286,34 +304,36 @@ void ImGui_Debug_Panel () { using namespace ImGui; } // #cpuid // Text("[cpus] physical: %d, logical: %d, primary: %d, secondary: %d", os_cpu_physical_core_count(), os_cpu_logical_core_count(), os_cpu_primary_core_count(), os_cpu_secondary_core_count()); - SeparatorText("Arena In-Use List"); - for (u8 i = 0; i < Arena_Reserve_Count; i += 1) { -#if ARENA_DEBUG - auto t = format_cstring( - " [%s] in_use: %d, committed_bytes: %s", - format_bytes(Arena_Sizes[i], 0).data, - arena_free_list->in_flight_count[i], - format_bytes(committed_bytes(arena_free_list->in_flight[i])).data - ); -#else - auto t = format_cstring( - " [%s] in_use: %d, committed_bytes: %s", - format_bytes(Arena_Sizes[i], 0).data, - arena_free_list->in_flight_count[i], - "disabled in release mode" - ); -#endif - Text(t); - } - SeparatorText("Arena Free List"); - for (u8 i = 0; i < Arena_Reserve_Count; i += 1) { - auto t = format_cstring( - " [%s] free: %d, committed_bytes: %s", - format_bytes(Arena_Sizes[i], 0).data, - (s32)arena_free_list->free_table[i].count, - format_bytes(committed_bytes(arena_free_list->free_table[i])).data - ); - Text(t); + { SeparatorText("Arena In-Use List"); + lock_guard(&arena_free_list->mutex); + for (u8 i = 0; i < Arena_Reserve_Count; i += 1) { + #if ARENA_DEBUG + auto t = format_cstring( + " [%s] in_use: %d, committed_bytes: %s", + format_bytes(Arena_Sizes[i], 0).data, + arena_free_list->in_flight_count[i], + format_bytes(committed_bytes(arena_free_list->in_flight[i])).data + ); + #else + auto t = format_cstring( + " [%s] in_use: %d, committed_bytes: %s", + format_bytes(Arena_Sizes[i], 0).data, + arena_free_list->in_flight_count[i], + "disabled in release mode" + ); + #endif + Text(t); + } + SeparatorText("Arena Free List"); + for (u8 i = 0; i < Arena_Reserve_Count; i += 1) { + auto t = format_cstring( + " [%s] free: %d, committed_bytes: %s", + format_bytes(Arena_Sizes[i], 0).data, + (s32)arena_free_list->free_table[i].count, + format_bytes(committed_bytes(arena_free_list->free_table[i])).data + ); + Text(t); + } } SeparatorText("Child Threads"); SeparatorText("Errors");