From e0f5631a055a5ea16bfb79f9df44163448812323 Mon Sep 17 00:00:00 2001 From: Musa Mahmood Date: Mon, 8 Dec 2025 10:50:59 -0500 Subject: [PATCH] Serialize `Win32_Drive` `Dense_FS` data to file --- lib/Base/Arena_Array.h | 7 +++ lib/Base/Serializer.cpp | 14 ------ lib/Base/Serializer.h | 24 ++++++++-- lib/Base/String.cpp | 19 +++++++- lib/Base/String.h | 1 + lib/OS/OS_Filesystem.cpp | 50 ++++++++++++++++----- lib/OS/OS_Win32.cpp | 16 +++++++ lib/OS/OS_Win32.h | 7 +-- lib/OS/OS_Win32_NTFS.cpp | 95 ++++++++++++++++++++++++++++++++++++---- lib_main.cpp | 3 +- src/Ex1.cpp | 36 ++++++++++++++- 11 files changed, 228 insertions(+), 44 deletions(-) delete mode 100644 lib/Base/Serializer.cpp diff --git a/lib/Base/Arena_Array.h b/lib/Base/Arena_Array.h index 5c55de2..3d94ada 100644 --- a/lib/Base/Arena_Array.h +++ b/lib/Base/Arena_Array.h @@ -50,6 +50,12 @@ template T* array_start (ArenaArray& array) { return (T*)(array.arena->memory_base + ARRAY_ARENA_START_OFFSET); } +template bool is_valid (ArenaArray* array) { + if (array == nullptr) return false; + if (array->arena == nullptr) return false; + return is_valid(array->arena); +} + template bool is_empty (ArenaArray& array) { return ((array.count == 0) || !is_valid(array.arena)); } @@ -61,6 +67,7 @@ template s64 memory_usage (ArenaArray& array) { template void arena_array_free (ArenaArray& array) { release_arena(array.arena, true); + array.arena = nullptr; #if BUILD_DEBUG poison_struct(&array); #endif diff --git a/lib/Base/Serializer.cpp b/lib/Base/Serializer.cpp deleted file mode 100644 index 49977b1..0000000 --- a/lib/Base/Serializer.cpp +++ /dev/null @@ -1,14 +0,0 @@ -force_inline Serializer* new_serializer (Arena_Reserve new_reserve) { - return arena_array_new(1, new_reserve); -} - -force_inline void reset_serializer (Serializer* serializer) { - array_poison_range(*serializer, 0, serializer->count); - reset_keeping_memory(*serializer); -} - -force_inline void free_serializer (Serializer* serializer) { - arena_array_free(*serializer); -} - -// #redundant to_view () #note, just call to_view (ArenaArray&) diff --git a/lib/Base/Serializer.h b/lib/Base/Serializer.h index 3767186..8e39f92 100644 --- a/lib/Base/Serializer.h +++ b/lib/Base/Serializer.h @@ -1,10 +1,26 @@ typedef ArenaArray Serializer; -force_inline Serializer* new_serializer (Arena_Reserve new_reserve=Arena_Reserve::Size_2G); -force_inline void reset_serializer (Serializer* serializer); -force_inline void free_serializer (Serializer* serializer); +force_inline Serializer* new_serializer (Arena_Reserve new_reserve) { + return arena_array_new(1, new_reserve); +} -// force_inline ArrayView to_view (Serializer* serializer); #redundant, just call to_view (ArenaArray&) +force_inline void reset_serializer (Serializer* serializer) { +#if BUILD_DEBUG + array_poison_range(*serializer, 0, serializer->count); +#endif + reset_keeping_memory(*serializer); +} + +force_inline void free_serializer (Serializer* serializer) { + arena_array_free(*serializer); +} + +// force_inline ArrayView to_view (Serializer* serializer); +// #redundant, just call to_view (ArenaArray&) + +force_inline s64 count_bytes (Serializer* serializer) { + return serializer->count; +} template force_inline void Add (Serializer* serializer, T item) { u8* current_point = &serializer->data[serializer->count]; diff --git a/lib/Base/String.cpp b/lib/Base/String.cpp index 639ce81..fbd7218 100644 --- a/lib/Base/String.cpp +++ b/lib/Base/String.cpp @@ -132,7 +132,24 @@ wstring utf8_to_wide (string source) { return name_u16s; } -string format_string (char* format, ...) { +string format_string_temp (char* format, ...) { // #sprint + push_allocator(temp()); + constexpr s64 BUFFER_SIZE = 4096; + + string str = {}; + + str.data = NewArray(BUFFER_SIZE); + + va_list args; + va_start(args, format); + // Note that this *is* null-terminated for compatibility. + str.count = (s64)vsnprintf((char*)str.data, (size_t)BUFFER_SIZE, format, args); + va_end(args); + + return str; +} + +string format_string (char* format, ...) { // #sprint constexpr s64 BUFFER_SIZE = 4096; string str = {}; diff --git a/lib/Base/String.h b/lib/Base/String.h index 4a1f0bc..0991807 100644 --- a/lib/Base/String.h +++ b/lib/Base/String.h @@ -85,6 +85,7 @@ bool strings_match (string first_string, string second_string); string wide_to_utf8 (u16* source, s32 length=-1); wstring utf8_to_wide (string source); +string format_string_temp (char* format, ...); string format_string (char* format, ...); string format_string_no_context (char* format, ...); diff --git a/lib/OS/OS_Filesystem.cpp b/lib/OS/OS_Filesystem.cpp index 53cbf01..d513beb 100644 --- a/lib/OS/OS_Filesystem.cpp +++ b/lib/OS/OS_Filesystem.cpp @@ -40,7 +40,8 @@ constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22 // template struct DFS_Array { - Serializer* strings; + Serializer* wstrings; + // Serializer* strings; ArenaArray* offsets; // offsets into strings->data ArenaArray* lengths; // this type may vary Not sure if I should make it a template argument. Seems yucky. @@ -49,15 +50,9 @@ struct DFS_Array { ArenaArray* parent_indices; - // s64 index; // current index when inserting; - // #Temporary arrays for linking files/dirs to their parent directory, if present. ArenaArray* record_ids; ArenaArray* parent_ids; - - // #TODO: Sort indices (should these be trees?) - // ArenaArray indices_sorted_by_modtime; - // ArenaArray indices_sorted_by_size; }; s64 item_count (DFS_Array* dfsa) { @@ -66,7 +61,8 @@ s64 item_count (DFS_Array* dfsa) { void initialize (DFS_Array* dfsa) { Assert(dfsa != nullptr); - dfsa->strings = new_serializer(Arena_Reserve::Size_2G); + dfsa->wstrings = new_serializer(Arena_Reserve::Size_2G); + // dfsa->strings = new_serializer(Arena_Reserve::Size_2G); dfsa->offsets = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); dfsa->lengths = arena_array_new (DFS_Preallocation_Count, Arena_Reserve::Size_2G); @@ -77,17 +73,45 @@ void initialize (DFS_Array* dfsa) { dfsa->parent_ids = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); dfsa->parent_indices = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); - // dfsa->index = 0; +} + +void release (DFS_Array* dfsa) { + free_serializer(dfsa->wstrings); + + if (is_valid(dfsa->offsets)) { arena_array_free(*dfsa->offsets); } + if (is_valid(dfsa->lengths)) { arena_array_free(*dfsa->lengths); } + if (is_valid(dfsa->modtimes)) { arena_array_free(*dfsa->modtimes); } + if (is_valid(dfsa->sizes)) { arena_array_free(*dfsa->sizes); } + + if (is_valid(dfsa->record_ids)) { arena_array_free(*dfsa->record_ids); } + if (is_valid(dfsa->parent_ids)) { arena_array_free(*dfsa->parent_ids); } + + if (is_valid(dfsa->parent_indices)) { arena_array_free(*dfsa->parent_indices); } + + zero_struct(dfsa); } struct Dense_FS { // Link to OS_Drive - OS_Drive* drive; // backlink for reference. DFS_Array paths; DFS_Array files; ArenaTable path_table; // . + + OS_Drive* drive; // backlink for reference. }; +force_inline void release (Dense_FS* dfs) { + Timed_Block_Print("release: Dense_FS*"); + dfs->drive = nullptr; // just a link + + release(&dfs->paths); + release(&dfs->files); + + if (table_is_valid(&dfs->path_table)) { + table_release(&dfs->path_table); + } +} + void initialize (Dense_FS* dfs, OS_Drive* drive) { Assert(drive != nullptr); Assert(dfs != nullptr); // Is there a less stupid way of doing this? @@ -118,4 +142,10 @@ void cleanup_after_enumeration(Dense_FS* dfs) { arena_array_free(*dfs->paths.parent_ids); arena_array_free(*dfs->files.record_ids); arena_array_free(*dfs->files.parent_ids); + + // Unfortunately, we have to do this whenever we free, because we're using raw pointers. + dfs->paths.record_ids = nullptr; + dfs->paths.parent_ids = nullptr; + dfs->files.record_ids = nullptr; + dfs->files.parent_ids = nullptr; } diff --git a/lib/OS/OS_Win32.cpp b/lib/OS/OS_Win32.cpp index 73566a1..68d2b6e 100644 --- a/lib/OS/OS_Win32.cpp +++ b/lib/OS/OS_Win32.cpp @@ -573,6 +573,10 @@ internal bool file_write (File* file, void* data, s64 length) { return result; } +force_inline bool file_write (File* file, ArrayView view) { + return file_write(file, view.data, view.count); +} + internal bool write_entire_file (string file_path, void* file_data, s64 count) { File f = file_open(file_path, true, false); if (!file_is_valid(f)) return false; @@ -1074,6 +1078,18 @@ string Win32_drive_letter (string any_path) { return copy_string({1, any_path.data}); } +string os_get_machine_name () { + constexpr u8 WIN32_MAX_COMPUTER_LENGTH_NAME = 31; + u16 buffer[WIN32_MAX_COMPUTER_LENGTH_NAME + 1]; + + u32 count = WIN32_MAX_COMPUTER_LENGTH_NAME + 1; + if (GetComputerNameW((LPWSTR)buffer, (LPDWORD)&count)) { + return wide_to_utf8(buffer); + } + + return ""; +} + // #TODO: #window_creation #window_manipulation // [ ] resize_window // [ ] position_window diff --git a/lib/OS/OS_Win32.h b/lib/OS/OS_Win32.h index 7a02522..6c902a8 100644 --- a/lib/OS/OS_Win32.h +++ b/lib/OS/OS_Win32.h @@ -177,12 +177,11 @@ struct Win32_Drive { u32 max_component_length; u32 file_system_flags; bool is_present; - // Not sure if this should be here... - // f64 last_seen_alive_timestamp; - Dense_FS* data; + s64 bytes_accessed; s64 file_count; f32 time_to_enumerate; + Dense_FS* data; }; typedef Win32_Drive OS_Drive; @@ -196,6 +195,8 @@ bool os_create_window (string new_window_name, Window_Type parent=nullptr, bool Window_Info get_main_window (); Window_Info* get_main_window_pointer (); +string os_get_machine_name (); + // struct File_Contents { // File file = {}; // ArrayView file_data = {}; diff --git a/lib/OS/OS_Win32_NTFS.cpp b/lib/OS/OS_Win32_NTFS.cpp index 73a42cd..8eb35ef 100644 --- a/lib/OS/OS_Win32_NTFS.cpp +++ b/lib/OS/OS_Win32_NTFS.cpp @@ -114,10 +114,8 @@ struct NTFS_MFT_Internal { ArrayView mft_file; ArrayView mft_buffer; HANDLE handle; -#if BUILD_DEBUG s64 bytes_accessed; s64 file_count; -#endif }; void add_record (Dense_FS* dfs, NTFS_File* file) { @@ -128,10 +126,13 @@ void add_record (Dense_FS* dfs, NTFS_File* file) { array = &dfs->files; } - string s = wide_to_utf8(file->name_data, file->name_count); + // UTF-8 (string) version + // string s = wide_to_utf8(file->name_data, file->name_count); + // u32 offset = AddString_NoCount(array->strings, s.data, file->name_count); + + // UTF-16LE (wstring) version + u32 offset = AddString_NoCount(array->wstrings, (u8*)file->name_data, file->name_count * sizeof(u16)); - u32 offset = AddString_NoCount(array->strings, s.data, file->name_count); - // #TODO: Add other items to arrays: array_add(*array->parent_ids, file->parent_id); array_add(*array->record_ids, file->record_id); array_add(*array->lengths, file->name_count); @@ -317,14 +318,13 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) { log_none("Found %lld files on drive %s (bytes_accessed: %s)", mft->file_count, drive_path.data, format_bytes(mft->bytes_accessed).data); - drive->file_count = mft->file_count; - drive->bytes_accessed = mft->bytes_accessed; - drive->time_to_enumerate = (f32)(GetUnixTimestamp() - start_time); - // #TODO: Generate parent_indices from record_id and parent_id Timed_Block_Print("NTFS_MFT_read_raw: generate parent_indices"); // 1. Setup hash table: s64 path_count = item_count(&drive->data->paths); + + // #TODO: Before we start inserting stuff into the table we should ensure we have enough space + // for everything. See table_ensure_space :: for (s64 i = 0; i < path_count; i += 1) { table_set(&drive->data->path_table, (*drive->data->paths.record_ids)[i], (s32)i); } @@ -356,6 +356,12 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) { } cleanup_after_enumeration(drive->data); + + drive->file_count = mft->file_count; + drive->bytes_accessed = mft->bytes_accessed; + drive->time_to_enumerate = (f32)(GetUnixTimestamp() - start_time); + + log_none("[%s] SUCCESS: total time to enumerate %.3f seconds", drive_path.data, drive->time_to_enumerate); return NO_ERROR; } @@ -382,3 +388,74 @@ s64 ntfs_enumeration_thread_proc (Thread* thread) { return 0; } + +void os_clear_drive_data () { + ArrayView drives = os_get_available_drives(); + + for_each(d, drives) { + OS_Drive* drive = drives[d]; + + release(drive->data); + + drive->data = nullptr; + } +} + +constexpr u32 Win32_Drive_Magic_Number = 0x41b5c7a9; + +bool Serialize_Win32_Drives (ArrayView drives, string file_path) { + Timed_Block_Print("Serialize_Win32_Drives"); + File f = file_open(file_path, true, false, true); + if (!file_is_valid(f)) return false; + + Serializer* s = new_serializer(Arena_Reserve::Size_64G); + // #TODO #Serialization Unfortunately, there's a lot of needless copying here + // it would be a lot nicer if we could just write-file in place. idk how to do that though ;_; + // Serialize header + for_each(d, drives) { + Win32_Drive* drive = drives[d]; + // First, serialize the drive header: + Add(s, (s32)Win32_Drive_Magic_Number); + Add(s, (s32)d); + AddString16(s, drive->label); + AddString16(s, drive->volume_name); + Add(s, drive->type); + Add(s, drive->file_system); + Add(s, drive->full_size); + Add(s, drive->free_space); + Add(s, drive->serial_number); + Add(s, drive->max_component_length); + Add(s, drive->file_system_flags); + // Write to file and reset + + // (Dense_FS) + Assert(drive->data); + { // (Dense_FS):paths + DFS_Array paths = drive->data->paths; + // Note these are all prefixed with their respective lengths. + AddArray(s, to_view(*paths.wstrings)); + AddArray(s, to_view(*paths.offsets)); + AddArray(s, to_view(*paths.lengths)); + AddArray(s, to_view(*paths.modtimes)); + AddArray(s, to_view(*paths.sizes)); + AddArray(s, to_view(*paths.parent_indices)); + } + { // (Dense_FS):files + DFS_Array files = drive->data->files; + AddArray(s, to_view(*files.wstrings)); + AddArray(s, to_view(*files.offsets)); + AddArray(s, to_view(*files.lengths)); + AddArray(s, to_view(*files.modtimes)); + AddArray(s, to_view(*files.sizes)); + AddArray(s, to_view(*files.parent_indices)); + } + + file_write(&f, to_view(*s)); + reset_serializer(s); + } + + file_close(&f); + free_serializer(s); + + return true; +} \ No newline at end of file diff --git a/lib_main.cpp b/lib_main.cpp index 6a102c6..774ae18 100644 --- a/lib_main.cpp +++ b/lib_main.cpp @@ -26,10 +26,8 @@ #endif #include "lib/UI/Layout.h" #include "lib/Base/Logger.h" -#include "lib/Base/String.cpp" #include "lib/Base/Serializer.h" -#include "lib/Base/Serializer.cpp" #include "lib/Base/Base_Thread_Context.h" #include "lib/Base/Expandable_Arena.h" @@ -37,6 +35,7 @@ #include "lib/Base/Arena_Free_List.cpp" #include "lib/Base/Arena.cpp" +#include "lib/Base/String.cpp" #include "lib/Base/Base_Thread_Context.cpp" #include "lib/Base/ErrorType.cpp" diff --git a/src/Ex1.cpp b/src/Ex1.cpp index 5ab535c..d404a42 100644 --- a/src/Ex1.cpp +++ b/src/Ex1.cpp @@ -106,7 +106,20 @@ void Ex1_Control_Panel () { using namespace ImGui; // } } - if (drives.count > 0 && Button("Enumerate all NTFS drives")) { // && ex1_ntfs.initialized + s32 drives_enumerated = 0; + for_each(d, drives) { + OS_Drive* drive = drives[d]; + if (!drives[d]->time_to_enumerate) { continue; } + if (!drives[d]->data) { continue; } + + if (drives[d]->data->paths.offsets->count > 0) { + drives_enumerated += 1; + } + } + bool all_drives_enumerated = !ex1_ntfs.threads_in_flight.count + && (drives_enumerated == drives.count); + + if (drives.count > 0 && !all_drives_enumerated && Button("Enumerate all NTFS drives")) { // && ex1_ntfs.initialized // if drive count exceeds the number of threads, we need to group them so each thread // can enumerate multiple drives. // We need to distribute the drives across our available threads: @@ -197,6 +210,18 @@ void Ex1_Control_Panel () { using namespace ImGui; reset_struct(&ex1_ntfs); } + // How do I tell when all files are enumerated? + // check drives[i]->data.paths.wstrings.count count? + if (all_drives_enumerated && Button("Save drive data")) { + string file_path = format_string("%s_DriveData.bin", os_get_machine_name().data); + bool success = Serialize_Win32_Drives(drives, file_path); + if (!success) { log_error("Failed to save Win32_Drive data"); } + } + + if (all_drives_enumerated && Button("Clear all drive data")) { + os_clear_drive_data(); + } + End(); } @@ -214,12 +239,21 @@ void ImGui_Debug_Panel () { using namespace ImGui; // Text("[cpus] physical: %d, logical: %d, primary: %d, secondary: %d", os_cpu_physical_core_count(), os_cpu_logical_core_count(), os_cpu_primary_core_count(), os_cpu_secondary_core_count()); SeparatorText("Arena In-Use List"); for (u8 i = 0; i < Arena_Reserve_Count; i += 1) { +#if ARENA_DEBUG auto t = format_cstring( " [%s] in_use: %d, committed_bytes: %s", format_bytes(Arena_Sizes[i], 0).data, arena_free_list->in_flight_count[i], format_bytes(committed_bytes(arena_free_list->in_flight[i])).data ); +#else + auto t = format_cstring( + " [%s] in_use: %d, committed_bytes: %s", + format_bytes(Arena_Sizes[i], 0).data, + arena_free_list->in_flight_count[i], + "disabled in release mode" + ); +#endif Text(t); } SeparatorText("Arena Free List");