Serialize Win32_Drive Dense_FS data to file

This commit is contained in:
Musa Mahmood 2025-12-08 10:50:59 -05:00
parent 85c0709504
commit e0f5631a05
11 changed files with 228 additions and 44 deletions

View File

@ -50,6 +50,12 @@ template <typename T> T* array_start (ArenaArray<T>& array) {
return (T*)(array.arena->memory_base + ARRAY_ARENA_START_OFFSET);
}
template <typename T> bool is_valid (ArenaArray<T>* array) {
if (array == nullptr) return false;
if (array->arena == nullptr) return false;
return is_valid(array->arena);
}
template <typename T> bool is_empty (ArenaArray<T>& array) {
return ((array.count == 0) || !is_valid(array.arena));
}
@ -61,6 +67,7 @@ template <typename T> s64 memory_usage (ArenaArray<T>& array) {
template <typename T> void arena_array_free (ArenaArray<T>& array) {
release_arena(array.arena, true);
array.arena = nullptr;
#if BUILD_DEBUG
poison_struct(&array);
#endif

View File

@ -1,14 +0,0 @@
force_inline Serializer* new_serializer (Arena_Reserve new_reserve) {
return arena_array_new<u8>(1, new_reserve);
}
force_inline void reset_serializer (Serializer* serializer) {
array_poison_range(*serializer, 0, serializer->count);
reset_keeping_memory(*serializer);
}
force_inline void free_serializer (Serializer* serializer) {
arena_array_free(*serializer);
}
// #redundant to_view () #note, just call to_view (ArenaArray<T>&)

View File

@ -1,10 +1,26 @@
typedef ArenaArray<u8> Serializer;
force_inline Serializer* new_serializer (Arena_Reserve new_reserve=Arena_Reserve::Size_2G);
force_inline void reset_serializer (Serializer* serializer);
force_inline void free_serializer (Serializer* serializer);
force_inline Serializer* new_serializer (Arena_Reserve new_reserve) {
return arena_array_new<u8>(1, new_reserve);
}
// force_inline ArrayView<u8> to_view (Serializer* serializer); #redundant, just call to_view (ArenaArray<T>&)
force_inline void reset_serializer (Serializer* serializer) {
#if BUILD_DEBUG
array_poison_range(*serializer, 0, serializer->count);
#endif
reset_keeping_memory(*serializer);
}
force_inline void free_serializer (Serializer* serializer) {
arena_array_free(*serializer);
}
// force_inline ArrayView<u8> to_view (Serializer* serializer);
// #redundant, just call to_view (ArenaArray<T>&)
force_inline s64 count_bytes (Serializer* serializer) {
return serializer->count;
}
template <typename T> force_inline void Add (Serializer* serializer, T item) {
u8* current_point = &serializer->data[serializer->count];

View File

@ -132,7 +132,24 @@ wstring utf8_to_wide (string source) {
return name_u16s;
}
string format_string (char* format, ...) {
string format_string_temp (char* format, ...) { // #sprint
push_allocator(temp());
constexpr s64 BUFFER_SIZE = 4096;
string str = {};
str.data = NewArray<u8>(BUFFER_SIZE);
va_list args;
va_start(args, format);
// Note that this *is* null-terminated for compatibility.
str.count = (s64)vsnprintf((char*)str.data, (size_t)BUFFER_SIZE, format, args);
va_end(args);
return str;
}
string format_string (char* format, ...) { // #sprint
constexpr s64 BUFFER_SIZE = 4096;
string str = {};

View File

@ -85,6 +85,7 @@ bool strings_match (string first_string, string second_string);
string wide_to_utf8 (u16* source, s32 length=-1);
wstring utf8_to_wide (string source);
string format_string_temp (char* format, ...);
string format_string (char* format, ...);
string format_string_no_context (char* format, ...);

View File

@ -40,7 +40,8 @@ constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22
// template <typename Length_Type>
struct DFS_Array {
Serializer* strings;
Serializer* wstrings;
// Serializer* strings;
ArenaArray<u32>* offsets; // offsets into strings->data
ArenaArray<u8>* lengths; // this type may vary <hmmm> Not sure if I should make it a template argument. Seems yucky.
@ -49,15 +50,9 @@ struct DFS_Array {
ArenaArray<s32>* parent_indices;
// s64 index; // current index when inserting;
// #Temporary arrays for linking files/dirs to their parent directory, if present.
ArenaArray<u32>* record_ids;
ArenaArray<u32>* parent_ids;
// #TODO: Sort indices (should these be trees?)
// ArenaArray<s32> indices_sorted_by_modtime;
// ArenaArray<s32> indices_sorted_by_size;
};
s64 item_count (DFS_Array* dfsa) {
@ -66,7 +61,8 @@ s64 item_count (DFS_Array* dfsa) {
void initialize (DFS_Array* dfsa) {
Assert(dfsa != nullptr);
dfsa->strings = new_serializer(Arena_Reserve::Size_2G);
dfsa->wstrings = new_serializer(Arena_Reserve::Size_2G);
// dfsa->strings = new_serializer(Arena_Reserve::Size_2G);
dfsa->offsets = arena_array_new<u32>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
dfsa->lengths = arena_array_new<u8> (DFS_Preallocation_Count, Arena_Reserve::Size_2G);
@ -77,17 +73,45 @@ void initialize (DFS_Array* dfsa) {
dfsa->parent_ids = arena_array_new<u32>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
dfsa->parent_indices = arena_array_new<s32>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
// dfsa->index = 0;
}
void release (DFS_Array* dfsa) {
free_serializer(dfsa->wstrings);
if (is_valid(dfsa->offsets)) { arena_array_free(*dfsa->offsets); }
if (is_valid(dfsa->lengths)) { arena_array_free(*dfsa->lengths); }
if (is_valid(dfsa->modtimes)) { arena_array_free(*dfsa->modtimes); }
if (is_valid(dfsa->sizes)) { arena_array_free(*dfsa->sizes); }
if (is_valid(dfsa->record_ids)) { arena_array_free(*dfsa->record_ids); }
if (is_valid(dfsa->parent_ids)) { arena_array_free(*dfsa->parent_ids); }
if (is_valid(dfsa->parent_indices)) { arena_array_free(*dfsa->parent_indices); }
zero_struct(dfsa);
}
struct Dense_FS { // Link to OS_Drive
OS_Drive* drive; // backlink for reference.
DFS_Array paths;
DFS_Array files;
ArenaTable<u32, s32> path_table; // <entry_id, array_offset>.
OS_Drive* drive; // backlink for reference.
};
force_inline void release (Dense_FS* dfs) {
Timed_Block_Print("release: Dense_FS*");
dfs->drive = nullptr; // just a link
release(&dfs->paths);
release(&dfs->files);
if (table_is_valid(&dfs->path_table)) {
table_release(&dfs->path_table);
}
}
void initialize (Dense_FS* dfs, OS_Drive* drive) {
Assert(drive != nullptr); Assert(dfs != nullptr);
// Is there a less stupid way of doing this?
@ -118,4 +142,10 @@ void cleanup_after_enumeration(Dense_FS* dfs) {
arena_array_free(*dfs->paths.parent_ids);
arena_array_free(*dfs->files.record_ids);
arena_array_free(*dfs->files.parent_ids);
// Unfortunately, we have to do this whenever we free, because we're using raw pointers.
dfs->paths.record_ids = nullptr;
dfs->paths.parent_ids = nullptr;
dfs->files.record_ids = nullptr;
dfs->files.parent_ids = nullptr;
}

View File

@ -573,6 +573,10 @@ internal bool file_write (File* file, void* data, s64 length) {
return result;
}
force_inline bool file_write (File* file, ArrayView<u8> view) {
return file_write(file, view.data, view.count);
}
internal bool write_entire_file (string file_path, void* file_data, s64 count) {
File f = file_open(file_path, true, false);
if (!file_is_valid(f)) return false;
@ -1074,6 +1078,18 @@ string Win32_drive_letter (string any_path) {
return copy_string({1, any_path.data});
}
string os_get_machine_name () {
constexpr u8 WIN32_MAX_COMPUTER_LENGTH_NAME = 31;
u16 buffer[WIN32_MAX_COMPUTER_LENGTH_NAME + 1];
u32 count = WIN32_MAX_COMPUTER_LENGTH_NAME + 1;
if (GetComputerNameW((LPWSTR)buffer, (LPDWORD)&count)) {
return wide_to_utf8(buffer);
}
return "";
}
// #TODO: #window_creation #window_manipulation
// [ ] resize_window
// [ ] position_window

View File

@ -177,12 +177,11 @@ struct Win32_Drive {
u32 max_component_length;
u32 file_system_flags;
bool is_present;
// Not sure if this should be here...
// f64 last_seen_alive_timestamp;
Dense_FS* data;
s64 bytes_accessed;
s64 file_count;
f32 time_to_enumerate;
Dense_FS* data;
};
typedef Win32_Drive OS_Drive;
@ -196,6 +195,8 @@ bool os_create_window (string new_window_name, Window_Type parent=nullptr, bool
Window_Info get_main_window ();
Window_Info* get_main_window_pointer ();
string os_get_machine_name ();
// struct File_Contents {
// File file = {};
// ArrayView<u8> file_data = {};

View File

@ -114,10 +114,8 @@ struct NTFS_MFT_Internal {
ArrayView<u8> mft_file;
ArrayView<u8> mft_buffer;
HANDLE handle;
#if BUILD_DEBUG
s64 bytes_accessed;
s64 file_count;
#endif
};
void add_record (Dense_FS* dfs, NTFS_File* file) {
@ -128,10 +126,13 @@ void add_record (Dense_FS* dfs, NTFS_File* file) {
array = &dfs->files;
}
string s = wide_to_utf8(file->name_data, file->name_count);
// UTF-8 (string) version
// string s = wide_to_utf8(file->name_data, file->name_count);
// u32 offset = AddString_NoCount(array->strings, s.data, file->name_count);
// UTF-16LE (wstring) version
u32 offset = AddString_NoCount(array->wstrings, (u8*)file->name_data, file->name_count * sizeof(u16));
u32 offset = AddString_NoCount(array->strings, s.data, file->name_count);
// #TODO: Add other items to arrays:
array_add(*array->parent_ids, file->parent_id);
array_add(*array->record_ids, file->record_id);
array_add(*array->lengths, file->name_count);
@ -317,14 +318,13 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) {
log_none("Found %lld files on drive %s (bytes_accessed: %s)", mft->file_count, drive_path.data, format_bytes(mft->bytes_accessed).data);
drive->file_count = mft->file_count;
drive->bytes_accessed = mft->bytes_accessed;
drive->time_to_enumerate = (f32)(GetUnixTimestamp() - start_time);
// #TODO: Generate parent_indices from record_id and parent_id
Timed_Block_Print("NTFS_MFT_read_raw: generate parent_indices");
// 1. Setup hash table:
s64 path_count = item_count(&drive->data->paths);
// #TODO: Before we start inserting stuff into the table we should ensure we have enough space
// for everything. See table_ensure_space ::
for (s64 i = 0; i < path_count; i += 1) {
table_set(&drive->data->path_table, (*drive->data->paths.record_ids)[i], (s32)i);
}
@ -356,6 +356,12 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) {
}
cleanup_after_enumeration(drive->data);
drive->file_count = mft->file_count;
drive->bytes_accessed = mft->bytes_accessed;
drive->time_to_enumerate = (f32)(GetUnixTimestamp() - start_time);
log_none("[%s] SUCCESS: total time to enumerate %.3f seconds", drive_path.data, drive->time_to_enumerate);
return NO_ERROR;
}
@ -382,3 +388,74 @@ s64 ntfs_enumeration_thread_proc (Thread* thread) {
return 0;
}
void os_clear_drive_data () {
ArrayView<OS_Drive*> drives = os_get_available_drives();
for_each(d, drives) {
OS_Drive* drive = drives[d];
release(drive->data);
drive->data = nullptr;
}
}
constexpr u32 Win32_Drive_Magic_Number = 0x41b5c7a9;
bool Serialize_Win32_Drives (ArrayView<Win32_Drive*> drives, string file_path) {
Timed_Block_Print("Serialize_Win32_Drives");
File f = file_open(file_path, true, false, true);
if (!file_is_valid(f)) return false;
Serializer* s = new_serializer(Arena_Reserve::Size_64G);
// #TODO #Serialization Unfortunately, there's a lot of needless copying here
// it would be a lot nicer if we could just write-file in place. idk how to do that though ;_;
// Serialize header
for_each(d, drives) {
Win32_Drive* drive = drives[d];
// First, serialize the drive header:
Add(s, (s32)Win32_Drive_Magic_Number);
Add(s, (s32)d);
AddString16(s, drive->label);
AddString16(s, drive->volume_name);
Add(s, drive->type);
Add(s, drive->file_system);
Add(s, drive->full_size);
Add(s, drive->free_space);
Add(s, drive->serial_number);
Add(s, drive->max_component_length);
Add(s, drive->file_system_flags);
// Write to file and reset
// (Dense_FS)
Assert(drive->data);
{ // (Dense_FS):paths
DFS_Array paths = drive->data->paths;
// Note these are all prefixed with their respective lengths.
AddArray(s, to_view(*paths.wstrings));
AddArray(s, to_view(*paths.offsets));
AddArray(s, to_view(*paths.lengths));
AddArray(s, to_view(*paths.modtimes));
AddArray(s, to_view(*paths.sizes));
AddArray(s, to_view(*paths.parent_indices));
}
{ // (Dense_FS):files
DFS_Array files = drive->data->files;
AddArray(s, to_view(*files.wstrings));
AddArray(s, to_view(*files.offsets));
AddArray(s, to_view(*files.lengths));
AddArray(s, to_view(*files.modtimes));
AddArray(s, to_view(*files.sizes));
AddArray(s, to_view(*files.parent_indices));
}
file_write(&f, to_view(*s));
reset_serializer(s);
}
file_close(&f);
free_serializer(s);
return true;
}

View File

@ -26,10 +26,8 @@
#endif
#include "lib/UI/Layout.h"
#include "lib/Base/Logger.h"
#include "lib/Base/String.cpp"
#include "lib/Base/Serializer.h"
#include "lib/Base/Serializer.cpp"
#include "lib/Base/Base_Thread_Context.h"
#include "lib/Base/Expandable_Arena.h"
@ -37,6 +35,7 @@
#include "lib/Base/Arena_Free_List.cpp"
#include "lib/Base/Arena.cpp"
#include "lib/Base/String.cpp"
#include "lib/Base/Base_Thread_Context.cpp"
#include "lib/Base/ErrorType.cpp"

View File

@ -106,7 +106,20 @@ void Ex1_Control_Panel () { using namespace ImGui;
// }
}
if (drives.count > 0 && Button("Enumerate all NTFS drives")) { // && ex1_ntfs.initialized
s32 drives_enumerated = 0;
for_each(d, drives) {
OS_Drive* drive = drives[d];
if (!drives[d]->time_to_enumerate) { continue; }
if (!drives[d]->data) { continue; }
if (drives[d]->data->paths.offsets->count > 0) {
drives_enumerated += 1;
}
}
bool all_drives_enumerated = !ex1_ntfs.threads_in_flight.count
&& (drives_enumerated == drives.count);
if (drives.count > 0 && !all_drives_enumerated && Button("Enumerate all NTFS drives")) { // && ex1_ntfs.initialized
// if drive count exceeds the number of threads, we need to group them so each thread
// can enumerate multiple drives.
// We need to distribute the drives across our available threads:
@ -197,6 +210,18 @@ void Ex1_Control_Panel () { using namespace ImGui;
reset_struct(&ex1_ntfs);
}
// How do I tell when all files are enumerated?
// check drives[i]->data.paths.wstrings.count count?
if (all_drives_enumerated && Button("Save drive data")) {
string file_path = format_string("%s_DriveData.bin", os_get_machine_name().data);
bool success = Serialize_Win32_Drives(drives, file_path);
if (!success) { log_error("Failed to save Win32_Drive data"); }
}
if (all_drives_enumerated && Button("Clear all drive data")) {
os_clear_drive_data();
}
End();
}
@ -214,12 +239,21 @@ void ImGui_Debug_Panel () { using namespace ImGui;
// Text("[cpus] physical: %d, logical: %d, primary: %d, secondary: %d", os_cpu_physical_core_count(), os_cpu_logical_core_count(), os_cpu_primary_core_count(), os_cpu_secondary_core_count());
SeparatorText("Arena In-Use List");
for (u8 i = 0; i < Arena_Reserve_Count; i += 1) {
#if ARENA_DEBUG
auto t = format_cstring(
" [%s] in_use: %d, committed_bytes: %s",
format_bytes(Arena_Sizes[i], 0).data,
arena_free_list->in_flight_count[i],
format_bytes(committed_bytes(arena_free_list->in_flight[i])).data
);
#else
auto t = format_cstring(
" [%s] in_use: %d, committed_bytes: %s",
format_bytes(Arena_Sizes[i], 0).data,
arena_free_list->in_flight_count[i],
"disabled in release mode"
);
#endif
Text(t);
}
SeparatorText("Arena Free List");