Add file deserialization, add block view of each record.

This commit is contained in:
Musa Mahmood 2025-12-08 21:37:20 -05:00
parent e0f5631a05
commit 19e0d2d1c3
11 changed files with 267 additions and 51 deletions

View File

@ -11,10 +11,9 @@ void* arena_allocator_proc (Allocator_Mode mode, s64 requested_size, s64 old_siz
return arena_alloc(arena, requested_size);
} break;
case Allocator_Mode::RESIZE: {
Assert(false); // DO NOT USE RESIZE WITH ARENAS! :ArenaResizing
// Or maybeeee...
// Resize should check if current_point matches the end of the old allocation?
// and resize accordingly + pass back same pointer.
// Assert(false); // DO NOT USE RESIZE WITH ARENAS! :ArenaResizing
// #TODO: Resize should check if current_point matches the end of the old allocation?
// and resize accordingly + pass back same pointer. Otherwise just make a new allocation and copy.
void* result = arena_alloc(arena, requested_size);
s64 size_to_copy = old_size < requested_size ? old_size : requested_size;
if (result && size_to_copy) {

View File

@ -106,6 +106,13 @@ template <typename T> ArrayView<T> to_view (ArenaArray<T>& array, s64 start_offs
return av;
}
template <typename T> void copy_from_view (ArenaArray<T>* array, ArrayView<T> view) {
T* start = array->data + array->count;
s64 new_count = array->count + view.count;
array_resize(*array, new_count, false);
memcpy(start, view.data, view.count * sizeof(T));
}
template <typename T> void array_add (ArenaArray<T>& array, ArrayView<T> items) {
T* current_point = &array.data[array.count];
s64 final_count = array.count + items.count;

View File

@ -164,17 +164,16 @@ T pop (Array<T>& src) {
return result;
}
// template <typename T, typename U>
// void array_add (Array<T>& src, U new_item) {
// static_assert(sizeof(U) <= sizeof(T));
// auto new_count = src.count + 1;
// array_maybe_grow(src);
template <typename T>
T* array_add (Array<T>& src) {
array_maybe_grow(src);
// T new_item_casted = (T)new_item;
T* result = &src.data[src.count];
(*result) = T(); // initialize
// src.count += 1;
// memcpy(&src[src.count-1], &new_item_casted, sizeof(T));
// }
src.count += 1;
return result;
}
template <typename T>
void array_add (Array<T>& src, T new_item) {

View File

@ -92,7 +92,7 @@ template <typename T> force_inline void Read (Deserializer* ds, T* item) {
template <typename T> force_inline void ReadArrayView (Deserializer* ds, ArrayView<T>& view, s64 view_count) {
view.count = view_count;
view.data = &ds->data[ds->cursor];
view.data = (T*)&ds->data[ds->cursor];
ds->cursor += (view_count * sizeof(T));
}
@ -115,6 +115,13 @@ template <typename T> force_inline ArrayView<T> ReadSizedArray (Deserializer* ds
return array;
}
template <typename T> void ReadToArenaArray (Deserializer* ds, ArenaArray<T>* aa) {
ArrayView<T> temp;
Read(ds, &temp.count);
ReadArrayView(ds, temp, temp.count);
copy_from_view(aa, temp);
}
force_inline void ReadStringView (Deserializer* ds, string& sv, s64 view_count) {
sv.count = view_count;
sv.data = &ds->data[ds->cursor];

View File

@ -61,6 +61,11 @@ struct wstring {
count = length_bytes;
}
wstring (s64 _count, u16* _data) {
count = _count;
data = _data;
}
bool operator ! () {
Assert(count >= 0);
return (data == nullptr || count == 0);

View File

@ -23,7 +23,9 @@
// Returns offset
force_inline u32 AddString_NoCount (Serializer* serializer, u8* data, u8 count) { // #TODO: , bool null_terminate=false
u8* current_point = &serializer->data[serializer->count];
u32 original_count = (u32)serializer->count;
u8* current_point = &serializer->data[original_count];
s64 final_count = serializer->allocated + (count * sizeof(u8));
if (serializer->allocated < final_count) {
@ -33,14 +35,14 @@ force_inline u32 AddString_NoCount (Serializer* serializer, u8* data, u8 count)
memcpy(current_point, data, count * sizeof(u8));
serializer->count += count * sizeof(u8);
return (u32)serializer->count;
return original_count;
}
constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22
// template <typename Length_Type>
struct DFS_Array {
Serializer* wstrings;
Serializer* wstrings; // is a ArenaArray<u8>
// Serializer* strings;
ArenaArray<u32>* offsets; // offsets into strings->data
@ -55,6 +57,41 @@ struct DFS_Array {
ArenaArray<u32>* parent_ids;
};
struct DFS_Value {
wstring wpath;
u64 modtime;
u64 size;
s32 parent_index;
string full_path;
};
// string get_full_path_from_index (Dense_FS* dfs, s32 parent_index, string file_name) {
// }
DFS_Value get_value (Dense_FS* dfs, DFS_Array* dfsa, s64 i) {
u32 path_offset = (*dfsa->offsets)[i];
u16* path_wide_ptr = (u16*)((dfsa->wstrings->data) + path_offset);
wstring path_wide = {(*dfsa->lengths)[i], path_wide_ptr};
DFS_Value dfsv;
dfsv.modtime = (*dfsa->modtimes)[i];
dfsv.size = (*dfsa->sizes)[i];
dfsv.parent_index = (*dfsa->parent_indices)[i];
string path_utf8 = wide_to_utf8(path_wide.data, (s32)path_wide.count);
dfsv.full_path = path_utf8;
// dfsv.full_path = get_full_path_from_offset(dfs, dfsv.parent_index, path_utf8);
return dfsv;
}
struct Dense_FS { // Link to OS_Drive
DFS_Array paths;
DFS_Array files;
ArenaTable<u32, s32> path_table; // <entry_id, array_offset>.
OS_Drive* drive; // backlink for reference.
};
s64 item_count (DFS_Array* dfsa) {
return dfsa->offsets->count;
}
@ -91,15 +128,6 @@ void release (DFS_Array* dfsa) {
zero_struct(dfsa);
}
struct Dense_FS { // Link to OS_Drive
DFS_Array paths;
DFS_Array files;
ArenaTable<u32, s32> path_table; // <entry_id, array_offset>.
OS_Drive* drive; // backlink for reference.
};
force_inline void release (Dense_FS* dfs) {
Timed_Block_Print("release: Dense_FS*");
dfs->drive = nullptr; // just a link
@ -112,11 +140,7 @@ force_inline void release (Dense_FS* dfs) {
}
}
void initialize (Dense_FS* dfs, OS_Drive* drive) {
Assert(drive != nullptr); Assert(dfs != nullptr);
// Is there a less stupid way of doing this?
dfs->drive = drive;
drive->data = dfs;
internal void Dense_FS_initialize (Dense_FS* dfs) {
initialize(&dfs->paths);
initialize(&dfs->files);
@ -126,6 +150,15 @@ void initialize (Dense_FS* dfs, OS_Drive* drive) {
dfs->path_table.compare_function = u32_keys_match;
}
void initialize (Dense_FS* dfs, OS_Drive* drive) {
Assert(drive != nullptr); Assert(dfs != nullptr);
// Is there a less stupid way of doing this?
dfs->drive = drive;
drive->data = dfs;
Dense_FS_initialize(dfs);
}
s32 find_previous_index (Dense_FS* dfs, u32 record_id, bool* success) {
s32 result = -1;

View File

@ -611,6 +611,14 @@ win32_wnd_proc (HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam) {
return result;
}
internal bool file_exists (string file_path) {
push_allocator(temp());
DWORD result = GetFileAttributesW((LPCWSTR)utf8_to_wide(file_path).data);
return (result != INVALID_FILE_ATTRIBUTES);
}
internal BOOL
monitor_enum_proc (HMONITOR hMonitor, HDC hdc, RECT* rect, LPARAM data) {
Monitor monitor = {};

View File

@ -41,6 +41,8 @@ internal bool file_write (File* file, void* data, s64 length);
internal bool write_entire_file (string file_path, void* file_data, s64 count);
internal bool write_entire_file (string file_path, ArrayView<u8> file_data);
internal bool file_exists (string file_path);
// file_write
// write_entire_file...

View File

@ -135,10 +135,10 @@ void add_record (Dense_FS* dfs, NTFS_File* file) {
array_add(*array->parent_ids, file->parent_id);
array_add(*array->record_ids, file->record_id);
array_add(*array->lengths, file->name_count);
array_add(*array->offsets, (u32)offset);
array_add(*array->modtimes, file->file_modtime);
array_add(*array->sizes, file->file_size);
array_add(*array->lengths, file->name_count);
array_add(*array->offsets, (u32)offset);
array_add(*array->modtimes, file->file_modtime);
array_add(*array->sizes, file->file_size);
}
NTFS_MFT_Internal* new_ntfs_mft_internal () { // call with temp
@ -294,10 +294,6 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) {
file.name_data = (u16*)fileNameAttribute->fileName;
file.file_modtime = (u64)fileNameAttribute->modificationTime;
// We need to get size from the data attribute
add_record(drive->data, &file);
// See Dense_FS drive->data
mft->file_count += 1;
}
}
if (attribute->attributeType == 0x80) { // $DATA
@ -305,6 +301,10 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) {
file.file_size = nonresident_attribute->attributeSize;
}
if (attribute->attributeType == 0xFFFFFFFF) {
add_record(drive->data, &file);
// See Dense_FS drive->data
mft->file_count += 1;
break;
}
@ -403,6 +403,98 @@ void os_clear_drive_data () {
constexpr u32 Win32_Drive_Magic_Number = 0x41b5c7a9;
struct NTFS_Drive {
s32 radio_button;
s32 index;
};
// #TEMPORARY STRUCTURE FOR EXPERIMENTATION.
struct NTFS_Workspace {
Array<OS_Drive*> drives;
Array<NTFS_Drive> supplementary;
Arena* arena;
// s32 results_to_show;
};
global NTFS_Workspace ntfs_workspace;
bool ntfs_workspace_files_loaded () {
if (ntfs_workspace.drives.count == 0) return false;
return true;
}
bool Deserialize_Win32_Drives (string file_path) {
Timed_Block_Print("Deserialize_Win32_Drives");
push_allocator(temp());
auto_release_temp();
Deserializer deserializer = read_entire_file(file_path, true);
if (deserializer.count == 0) return false;
auto d = &deserializer;
auto drive_table = get_drive_table();
u32 magic_number; s32 drive_count;
Read(d, &magic_number);
Assert(magic_number == Win32_Drive_Magic_Number);
Read(d, &drive_count);
ntfs_workspace.arena = next_arena(Arena_Reserve::Size_64G);
push_arena(ntfs_workspace.arena);
Assert(ntfs_workspace.drives.count == 0);
array_resize(ntfs_workspace.supplementary, drive_count);
// ntfs_workspace.drives.allocator = GPAllocator();
log("[Deserialize_Win32_Drives] drive_count: %d", drive_count);
for (s32 i = 0; i < drive_count; i += 1) {
// look up disk based on drive_label
s32 index = 0;
Read(d, &index); Assert(i == index);
string drive_label = {};
ReadString16(d, drive_label);
OS_Drive** drive_ptr = array_add(ntfs_workspace.drives);
(*drive_ptr) = New<OS_Drive>();
OS_Drive* drive = *drive_ptr;
drive->label = copy_string(drive_label);
ReadString16(d, drive->volume_name);
Read(d, &drive->type);
Read(d, &drive->file_system);
Read(d, &drive->full_size);
Read(d, &drive->free_space);
Read(d, &drive->serial_number);
Read(d, &drive->max_component_length);
Read(d, &drive->file_system_flags);
drive->data = New<Dense_FS>();
Dense_FS_initialize(drive->data);
{ // (Dense_FS):paths
DFS_Array paths = drive->data->paths;
ReadToArenaArray(d, paths.wstrings);
ReadToArenaArray(d, paths.offsets);
ReadToArenaArray(d, paths.lengths);
ReadToArenaArray(d, paths.modtimes);
ReadToArenaArray(d, paths.sizes);
ReadToArenaArray(d, paths.parent_indices);
}
{ // (Dense_FS):files
DFS_Array files = drive->data->files;
ReadToArenaArray(d, files.wstrings);
ReadToArenaArray(d, files.offsets);
ReadToArenaArray(d, files.lengths);
ReadToArenaArray(d, files.modtimes);
ReadToArenaArray(d, files.sizes);
ReadToArenaArray(d, files.parent_indices);
}
}
return true;
}
bool Serialize_Win32_Drives (ArrayView<Win32_Drive*> drives, string file_path) {
Timed_Block_Print("Serialize_Win32_Drives");
File f = file_open(file_path, true, false, true);
@ -411,11 +503,13 @@ bool Serialize_Win32_Drives (ArrayView<Win32_Drive*> drives, string file_path) {
Serializer* s = new_serializer(Arena_Reserve::Size_64G);
// #TODO #Serialization Unfortunately, there's a lot of needless copying here
// it would be a lot nicer if we could just write-file in place. idk how to do that though ;_;
// Serialize header
// Serialize drive count;
Add(s, (u32)Win32_Drive_Magic_Number);
Add(s, (s32)drives.count);
for_each(d, drives) {
Win32_Drive* drive = drives[d];
// First, serialize the drive header:
Add(s, (s32)Win32_Drive_Magic_Number);
Add(s, (s32)d);
AddString16(s, drive->label);
AddString16(s, drive->volume_name);
@ -458,4 +552,5 @@ bool Serialize_Win32_Drives (ArrayView<Win32_Drive*> drives, string file_path) {
free_serializer(s);
return true;
}
}

View File

@ -78,13 +78,59 @@ bool Ex1_check_key_combinations() {
return false;
}
void Ex1_show_ntfs_workspace () { using namespace ImGui;
push_allocator(temp());
for_each(d, ntfs_workspace.drives) {
OS_Drive* drive = ntfs_workspace.drives[d];
Text("%d. %s paths: %lld, files: %lld",
d, drive->label.data,
drive->data->paths.offsets->count,
drive->data->files.offsets->count);
}
// SliderInt("Results to Show", &ntfs_workspace.results_to_show, 0, 50);
for_each(d, ntfs_workspace.drives) {
OS_Drive* drive = ntfs_workspace.drives[d];
// #TODO: Radio button for choosing between paths, files
char* rb1 = format_cstring("paths##%s", drive->label.data);
RadioButton(rb1, &ntfs_workspace.supplementary[d].radio_button, 1);
SameLine();
char* rb2 = format_cstring("files##%s", drive->label.data);
RadioButton(rb2, &ntfs_workspace.supplementary[d].radio_button, 0);
SameLine();
s32 max_count = (s32)drive->data->paths.offsets->count;
if (ntfs_workspace.supplementary[d].radio_button == 0) {
max_count = (s32)drive->data->files.offsets->count;
}
char* slider_label = format_cstring("%s index", drive->label.data);
if (SliderInt(slider_label, &ntfs_workspace.supplementary[d].index, 0, max_count)) { }
}
for_each(d, ntfs_workspace.drives) {
if (ntfs_workspace.supplementary[d].radio_button == 0) { // files
Dense_FS* dfs = ntfs_workspace.drives[d]->data;
DFS_Array* dfsa = &ntfs_workspace.drives[d]->data->files;
DFS_Value v = get_value(dfs, dfsa, ntfs_workspace.supplementary[d].index);
Text("Filename: %s, parent_id: %d", v.full_path.data, v.parent_index);
Text(" > size: %s", format_bytes(v.size).data);
// Text(" > modtime: %s", idk how to convert FILETIME to calendar time
} else {
// DFS_Array* dfsa = &ntfs_workspace.drives[d]->data->paths;
}
}
}
void Ex1_Control_Panel () { using namespace ImGui;
Table<string, OS_Drive*>* drive_table = get_drive_table();
Begin("Control Panel");
if (Button("Debug break")) { debug_break(); }
if (Button("Discover drives") || !table_is_valid(drive_table)) {
Win32_Discover_Drives();
push_imgui_window("Control Panel");
// if (Button("Debug break")) { debug_break(); }
if (/*Button("Discover drives") ||*/!table_is_valid(drive_table)) { Win32_Discover_Drives(); }
// Text("ntfs_workspace_files_loaded: %s", ntfs_workspace_files_loaded()? "true": "false");
if (ntfs_workspace_files_loaded()) {
Ex1_show_ntfs_workspace();
return;
}
Text("drive_table is valid: %d", table_is_valid(drive_table));
@ -94,7 +140,7 @@ void Ex1_Control_Panel () { using namespace ImGui;
for_each(i, drives) {
OS_Drive* drive = drives[i];
Text(" > [%d] drive letter: %s (is_present: %d)", drives.count + 1, drive->label.data, drive->is_present);
Text(" > [%d] drive letter: %s (is_present: %d)", i + 1, drive->label.data, drive->is_present);
if (drive->time_to_enumerate != 0) {
SameLine();
Text("Enumerated in %.2f seconds", drive->time_to_enumerate);
@ -119,6 +165,11 @@ void Ex1_Control_Panel () { using namespace ImGui;
bool all_drives_enumerated = !ex1_ntfs.threads_in_flight.count
&& (drives_enumerated == drives.count);
string file_path = format_string_temp("%s_DriveData.bin", os_get_machine_name().data);
if (drives.count > 0 && !all_drives_enumerated && file_exists(file_path) && Button("Load from file (this machine)")) {
Deserialize_Win32_Drives(file_path);
}
if (drives.count > 0 && !all_drives_enumerated && Button("Enumerate all NTFS drives")) { // && ex1_ntfs.initialized
// if drive count exceeds the number of threads, we need to group them so each thread
// can enumerate multiple drives.
@ -213,7 +264,7 @@ void Ex1_Control_Panel () { using namespace ImGui;
// How do I tell when all files are enumerated?
// check drives[i]->data.paths.wstrings.count count?
if (all_drives_enumerated && Button("Save drive data")) {
string file_path = format_string("%s_DriveData.bin", os_get_machine_name().data);
string file_path = format_string_temp("%s_DriveData.bin", os_get_machine_name().data);
bool success = Serialize_Win32_Drives(drives, file_path);
if (!success) { log_error("Failed to save Win32_Drive data"); }
}
@ -221,8 +272,6 @@ void Ex1_Control_Panel () { using namespace ImGui;
if (all_drives_enumerated && Button("Clear all drive data")) {
os_clear_drive_data();
}
End();
}
void ImGui_Debug_Panel () { using namespace ImGui;

View File

@ -350,4 +350,16 @@ void ImGui_Show_Font_Info () {
if (ImGui::Button("Increase Font Size")) { imgui_default_font.current_size = clamp<s64>(imgui_default_font.current_size + 1, 0, 5); }
}
ImGui::End();
}
}
#define push_imgui_window(title) \
Push_ImGui_Window Concat(_push_imgui_window_guard, __LINE__)(title)
struct Push_ImGui_Window {
Push_ImGui_Window (string name) {
ImGui::Begin((char*)name.data);
}
~Push_ImGui_Window () {
ImGui::End();
}
};