diff --git a/lib/Base/Arena_Array.h b/lib/Base/Arena_Array.h index bdb68aa..5c55de2 100644 --- a/lib/Base/Arena_Array.h +++ b/lib/Base/Arena_Array.h @@ -133,7 +133,7 @@ template force_inline void maybe_grow (ArenaArray& array) { if (array.count >= array.allocated) { s64 reserve = 2 * array.allocated; // if reserve < 8 reserve = 8; // no point doing this because we allocate by page, and we're never realloc'ing - reserve_internal(array, reserve, sizeof(T)); + reserve_internal((ArenaArray&)array, reserve, sizeof(T)); } } diff --git a/lib/Base/Arena_Hash_Table.h b/lib/Base/Arena_Hash_Table.h index 96a9af3..9c38eb5 100644 --- a/lib/Base/Arena_Hash_Table.h +++ b/lib/Base/Arena_Hash_Table.h @@ -33,6 +33,7 @@ template bool table_is_valid (ArenaTable* table) return true; } +// table_release template void table_init (ArenaTable* table, s64 slots_to_allocate=64, Arena_Reserve new_reserve=Arena_Reserve::Size_64M) { s64 n = Next_Power_Of_Two(slots_to_allocate); diff --git a/lib/Base/ErrorType.cpp b/lib/Base/ErrorType.cpp index 39a98dd..a94fe18 100644 --- a/lib/Base/ErrorType.cpp +++ b/lib/Base/ErrorType.cpp @@ -153,6 +153,7 @@ void push_error_no_context (Thread_Context* tctx, Error* new_error) { if (new_error == nullptr) return; Error* current_error = tctx->current_error; if (current_error) { + new_error->previous_error = current_error; current_error->next_error = new_error; } else { tctx->first_error = new_error; diff --git a/lib/Base/Hash_Functions.h b/lib/Base/Hash_Functions.h index 9d6cbbd..13bef54 100644 --- a/lib/Base/Hash_Functions.h +++ b/lib/Base/Hash_Functions.h @@ -56,6 +56,13 @@ u32 string_hash_function_fnv1a (void* key, s64 size) { return (u32)(hash_u64 ^ (hash_u64 >> 32)); } +bool u32_keys_match (void* key1, void* key2) { + u32 key1_u32 = *(u32*)key1; + u32 key2_u32 = *(u32*)key2; + + return key1_u32 == key2_u32; +} + bool u64_keys_match (void* key1, void* key2) { u64 key1_u64 = *(u64*)key1; u64 key2_u64 = *(u64*)key2; diff --git a/lib/OS/OS_Filesystem.cpp b/lib/OS/OS_Filesystem.cpp index f2dedd9..53cbf01 100644 --- a/lib/OS/OS_Filesystem.cpp +++ b/lib/OS/OS_Filesystem.cpp @@ -22,7 +22,7 @@ // // Returns offset -force_inline s32 AddString_NoCount (Serializer* serializer, u8* data, u8 count) { // #TODO: , bool null_terminate=false +force_inline u32 AddString_NoCount (Serializer* serializer, u8* data, u8 count) { // #TODO: , bool null_terminate=false u8* current_point = &serializer->data[serializer->count]; s64 final_count = serializer->allocated + (count * sizeof(u8)); @@ -32,6 +32,8 @@ force_inline s32 AddString_NoCount (Serializer* serializer, u8* data, u8 count) memcpy(current_point, data, count * sizeof(u8)); serializer->count += count * sizeof(u8); + + return (u32)serializer->count; } constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22 @@ -40,15 +42,16 @@ constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22 struct DFS_Array { Serializer* strings; - ArenaArray* offsets; // offsets into string_arena + ArenaArray* offsets; // offsets into strings->data ArenaArray* lengths; // this type may vary Not sure if I should make it a template argument. Seems yucky. ArenaArray* modtimes; ArenaArray* sizes; + ArenaArray* parent_indices; - s64 index; // current index when inserting; + // s64 index; // current index when inserting; - // #Temporary arrays for linking??? + // #Temporary arrays for linking files/dirs to their parent directory, if present. ArenaArray* record_ids; ArenaArray* parent_ids; @@ -57,6 +60,10 @@ struct DFS_Array { // ArenaArray indices_sorted_by_size; }; +s64 item_count (DFS_Array* dfsa) { + return dfsa->offsets->count; +} + void initialize (DFS_Array* dfsa) { Assert(dfsa != nullptr); dfsa->strings = new_serializer(Arena_Reserve::Size_2G); @@ -65,9 +72,12 @@ void initialize (DFS_Array* dfsa) { dfsa->lengths = arena_array_new (DFS_Preallocation_Count, Arena_Reserve::Size_2G); dfsa->modtimes = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); dfsa->sizes = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); - dfsa->parent_indices = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); - dfsa->index = 0; + dfsa->record_ids = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); + dfsa->parent_ids = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); + + dfsa->parent_indices = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); + // dfsa->index = 0; } struct Dense_FS { // Link to OS_Drive @@ -79,9 +89,33 @@ struct Dense_FS { // Link to OS_Drive }; void initialize (Dense_FS* dfs, OS_Drive* drive) { - Assert(drive != nullptr); + Assert(drive != nullptr); Assert(dfs != nullptr); + // Is there a less stupid way of doing this? dfs->drive = drive; + drive->data = dfs; initialize(&dfs->paths); initialize(&dfs->files); - table_init(&dfs->path_table, DFS_Preallocation_Count); -} \ No newline at end of file + + table_init(&dfs->path_table, 1048576); // 2^20 + // dfs->path_table.hash_function = table_hash_function_fnv1a; // default. + dfs->path_table.hash_function = sdbm_hash; + dfs->path_table.compare_function = u32_keys_match; +} + +s32 find_previous_index (Dense_FS* dfs, u32 record_id, bool* success) { + s32 result = -1; + + (*success) = table_find(&dfs->path_table, record_id, &result); + + return result; +} + +void cleanup_after_enumeration(Dense_FS* dfs) { + table_release(&dfs->path_table); + reset_struct(&dfs->path_table); + + arena_array_free(*dfs->paths.record_ids); + arena_array_free(*dfs->paths.parent_ids); + arena_array_free(*dfs->files.record_ids); + arena_array_free(*dfs->files.parent_ids); +} diff --git a/lib/OS/OS_Win32_NTFS.cpp b/lib/OS/OS_Win32_NTFS.cpp index 08cba71..73a42cd 100644 --- a/lib/OS/OS_Win32_NTFS.cpp +++ b/lib/OS/OS_Win32_NTFS.cpp @@ -97,14 +97,13 @@ struct NTFS_RunHeader { #pragma pack(pop) struct NTFS_File { - u32 parent_id; - u32 record_id; - bool is_directory; - u8 name_count; + u32 parent_id; + u32 record_id; u16* name_data; - u64 file_modtime; // FILETIME? - u64 file_size; - string name_utf8; + u64 file_modtime; // FILETIME + u64 file_size; + u8 name_count; + bool is_directory; }; constexpr s64 NTFS_MFT_File_Record_Size = 1024; // File Entry Block @@ -121,6 +120,26 @@ struct NTFS_MFT_Internal { #endif }; +void add_record (Dense_FS* dfs, NTFS_File* file) { + DFS_Array* array; + if (file->is_directory) { + array = &dfs->paths; + } else { + array = &dfs->files; + } + + string s = wide_to_utf8(file->name_data, file->name_count); + + u32 offset = AddString_NoCount(array->strings, s.data, file->name_count); + // #TODO: Add other items to arrays: + array_add(*array->parent_ids, file->parent_id); + array_add(*array->record_ids, file->record_id); + array_add(*array->lengths, file->name_count); + array_add(*array->offsets, (u32)offset); + array_add(*array->modtimes, file->file_modtime); + array_add(*array->sizes, file->file_size); +} + NTFS_MFT_Internal* new_ntfs_mft_internal () { // call with temp NTFS_MFT_Internal* mft = New(true); mft->mft_file = ArrayView(NTFS_MFT_File_Record_Size); @@ -152,7 +171,7 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) { string drive_path = drive->label; Assert(context_allocator() != temp()); // pointless as we're releasing temp end-of-scope - Allocator primary_allocator = context_allocator(); + // Allocator primary_allocator = context_allocator(); auto_release_temp(); push_allocator(temp()); @@ -170,7 +189,7 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) { NTFS_MFT_Internal* mft = new_ntfs_mft_internal(); mft->handle = file_handle; - push_allocator(primary_allocator); + // push_allocator(primary_allocator); bool success; NTFS_BootSector boot_sector; @@ -206,8 +225,8 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) { Assert(data_attribute != nullptr); // #dense_fs_alloc - Dense_FS* dfs = New(); - initialize(dfs, drive); + drive->data = New(GPAllocator()); + initialize(drive->data, drive); NTFS_RunHeader* dataRun = (NTFS_RunHeader*)((u8*)data_attribute + data_attribute->dataRunsOffset); u64 cluster_number = 0, records_processed = 0; @@ -269,14 +288,13 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) { if (fileNameAttribute->namespaceType != 2 && !fileNameAttribute->nonResident) { file.parent_id = (u32)fileNameAttribute->parentRecordNumber; // truncate file.record_id = fileRecord->recordNumber; + file.is_directory = fileRecord->isDirectory; file.name_count = fileNameAttribute->fileNameLength; file.name_data = (u16*)fileNameAttribute->fileName; - // file.name_utf8 = wide_to_utf8(file.name_data, file.name_count); // @Allocates file.file_modtime = (u64)fileNameAttribute->modificationTime; - file.is_directory = fileRecord->isDirectory; // We need to get size from the data attribute - // #TODO: #continue from here! + add_record(drive->data, &file); // See Dense_FS drive->data mft->file_count += 1; } @@ -297,12 +315,48 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) { CloseHandle(file_handle); - log_none("Found %lld files (bytes_accessed: %s)", mft->file_count, format_bytes(mft->bytes_accessed).data); + log_none("Found %lld files on drive %s (bytes_accessed: %s)", mft->file_count, drive_path.data, format_bytes(mft->bytes_accessed).data); drive->file_count = mft->file_count; drive->bytes_accessed = mft->bytes_accessed; drive->time_to_enumerate = (f32)(GetUnixTimestamp() - start_time); + // #TODO: Generate parent_indices from record_id and parent_id + Timed_Block_Print("NTFS_MFT_read_raw: generate parent_indices"); + // 1. Setup hash table: + s64 path_count = item_count(&drive->data->paths); + for (s64 i = 0; i < path_count; i += 1) { + table_set(&drive->data->path_table, (*drive->data->paths.record_ids)[i], (s32)i); + } + + // Link directories: + array_resize(*drive->data->paths.parent_indices, path_count, /*init*/false); + s64 fail_count = 0; + for (s64 i = 0; i < path_count; i += 1) { + u32 parent_id = (*drive->data->paths.parent_ids)[i]; + bool parent_exists = 0; + s32 previous_index = find_previous_index(drive->data, parent_id, &parent_exists); + (*drive->data->paths.parent_indices)[i] = previous_index; // -1 if failed. + fail_count += (s64)(!parent_exists); + } + + // Link files: + s64 file_count = item_count(&drive->data->files); + array_resize(*drive->data->files.parent_indices, file_count, false); + for (s64 i = 0; i < file_count; i += 1) { + u32 parent_id = (*drive->data->files.parent_ids)[i]; + bool parent_exists = 0; + s32 previous_index = find_previous_index(drive->data, parent_id, &parent_exists); + (*drive->data->files.parent_indices)[i] = previous_index; // -1 if failed. + fail_count += (s64)(!parent_exists); + } + + if (fail_count) { + log_warning("[%s] Failed to find parent for %lld items", drive_path.data, fail_count); + } + + cleanup_after_enumeration(drive->data); + return NO_ERROR; } diff --git a/src/Ex1.cpp b/src/Ex1.cpp index 0a52ad0..5ab535c 100644 --- a/src/Ex1.cpp +++ b/src/Ex1.cpp @@ -166,16 +166,6 @@ void Ex1_Control_Panel () { using namespace ImGui; } } - if (ex1_ntfs.threads_started && !ex1_ntfs.threads_in_flight.count) { - // All threads are complete, we're free to clean up remaining memory - push_allocator(GPAllocator()); - array_free(ex1_ntfs.threads); - array_free(ex1_ntfs.threads_in_flight); - - // Instead maybe we should just memset this to zero. - reset_struct(&ex1_ntfs); - } - if (ex1_ntfs.threads_in_flight.count) { Text("Threads in flight: %d", ex1_ntfs.threads_in_flight.count); @@ -196,6 +186,17 @@ void Ex1_Control_Panel () { using namespace ImGui; } } } + + if (ex1_ntfs.threads_started && !ex1_ntfs.threads_in_flight.count) { + // All threads are complete, we're free to clean up remaining memory + push_allocator(GPAllocator()); + array_free(ex1_ntfs.threads); + array_free(ex1_ntfs.threads_in_flight); + + // Instead maybe we should just memset this to zero. + reset_struct(&ex1_ntfs); + } + End(); }