Fixed hash table lookups, some error insertions in push_error_no_context

This commit is contained in:
Musa Mahmood 2025-12-07 22:43:32 -05:00
parent fbcfed79b8
commit 85c0709504
7 changed files with 133 additions and 35 deletions

View File

@ -133,7 +133,7 @@ template <typename T> force_inline void maybe_grow (ArenaArray<T>& array) {
if (array.count >= array.allocated) {
s64 reserve = 2 * array.allocated;
// if reserve < 8 reserve = 8; // no point doing this because we allocate by page, and we're never realloc'ing
reserve_internal(array, reserve, sizeof(T));
reserve_internal((ArenaArray<u8>&)array, reserve, sizeof(T));
}
}

View File

@ -33,6 +33,7 @@ template <typename T, typename U> bool table_is_valid (ArenaTable<T, U>* table)
return true;
}
// table_release
template <typename T, typename U> void table_init (ArenaTable<T, U>* table, s64 slots_to_allocate=64, Arena_Reserve new_reserve=Arena_Reserve::Size_64M) {
s64 n = Next_Power_Of_Two(slots_to_allocate);

View File

@ -153,6 +153,7 @@ void push_error_no_context (Thread_Context* tctx, Error* new_error) {
if (new_error == nullptr) return;
Error* current_error = tctx->current_error;
if (current_error) {
new_error->previous_error = current_error;
current_error->next_error = new_error;
} else {
tctx->first_error = new_error;

View File

@ -56,6 +56,13 @@ u32 string_hash_function_fnv1a (void* key, s64 size) {
return (u32)(hash_u64 ^ (hash_u64 >> 32));
}
bool u32_keys_match (void* key1, void* key2) {
u32 key1_u32 = *(u32*)key1;
u32 key2_u32 = *(u32*)key2;
return key1_u32 == key2_u32;
}
bool u64_keys_match (void* key1, void* key2) {
u64 key1_u64 = *(u64*)key1;
u64 key2_u64 = *(u64*)key2;

View File

@ -22,7 +22,7 @@
//
// Returns offset
force_inline s32 AddString_NoCount (Serializer* serializer, u8* data, u8 count) { // #TODO: , bool null_terminate=false
force_inline u32 AddString_NoCount (Serializer* serializer, u8* data, u8 count) { // #TODO: , bool null_terminate=false
u8* current_point = &serializer->data[serializer->count];
s64 final_count = serializer->allocated + (count * sizeof(u8));
@ -32,6 +32,8 @@ force_inline s32 AddString_NoCount (Serializer* serializer, u8* data, u8 count)
memcpy(current_point, data, count * sizeof(u8));
serializer->count += count * sizeof(u8);
return (u32)serializer->count;
}
constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22
@ -40,15 +42,16 @@ constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22
struct DFS_Array {
Serializer* strings;
ArenaArray<u32>* offsets; // offsets into string_arena
ArenaArray<u32>* offsets; // offsets into strings->data
ArenaArray<u8>* lengths; // this type may vary <hmmm> Not sure if I should make it a template argument. Seems yucky.
ArenaArray<u64>* modtimes;
ArenaArray<u64>* sizes;
ArenaArray<s32>* parent_indices;
s64 index; // current index when inserting;
// s64 index; // current index when inserting;
// #Temporary arrays for linking???
// #Temporary arrays for linking files/dirs to their parent directory, if present.
ArenaArray<u32>* record_ids;
ArenaArray<u32>* parent_ids;
@ -57,6 +60,10 @@ struct DFS_Array {
// ArenaArray<s32> indices_sorted_by_size;
};
s64 item_count (DFS_Array* dfsa) {
return dfsa->offsets->count;
}
void initialize (DFS_Array* dfsa) {
Assert(dfsa != nullptr);
dfsa->strings = new_serializer(Arena_Reserve::Size_2G);
@ -65,9 +72,12 @@ void initialize (DFS_Array* dfsa) {
dfsa->lengths = arena_array_new<u8> (DFS_Preallocation_Count, Arena_Reserve::Size_2G);
dfsa->modtimes = arena_array_new<u64>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
dfsa->sizes = arena_array_new<u64>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
dfsa->parent_indices = arena_array_new<s32>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
dfsa->index = 0;
dfsa->record_ids = arena_array_new<u32>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
dfsa->parent_ids = arena_array_new<u32>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
dfsa->parent_indices = arena_array_new<s32>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
// dfsa->index = 0;
}
struct Dense_FS { // Link to OS_Drive
@ -79,9 +89,33 @@ struct Dense_FS { // Link to OS_Drive
};
void initialize (Dense_FS* dfs, OS_Drive* drive) {
Assert(drive != nullptr);
Assert(drive != nullptr); Assert(dfs != nullptr);
// Is there a less stupid way of doing this?
dfs->drive = drive;
drive->data = dfs;
initialize(&dfs->paths);
initialize(&dfs->files);
table_init(&dfs->path_table, DFS_Preallocation_Count);
}
table_init(&dfs->path_table, 1048576); // 2^20
// dfs->path_table.hash_function = table_hash_function_fnv1a; // default.
dfs->path_table.hash_function = sdbm_hash;
dfs->path_table.compare_function = u32_keys_match;
}
s32 find_previous_index (Dense_FS* dfs, u32 record_id, bool* success) {
s32 result = -1;
(*success) = table_find(&dfs->path_table, record_id, &result);
return result;
}
void cleanup_after_enumeration(Dense_FS* dfs) {
table_release(&dfs->path_table);
reset_struct(&dfs->path_table);
arena_array_free(*dfs->paths.record_ids);
arena_array_free(*dfs->paths.parent_ids);
arena_array_free(*dfs->files.record_ids);
arena_array_free(*dfs->files.parent_ids);
}

View File

@ -97,14 +97,13 @@ struct NTFS_RunHeader {
#pragma pack(pop)
struct NTFS_File {
u32 parent_id;
u32 record_id;
bool is_directory;
u8 name_count;
u32 parent_id;
u32 record_id;
u16* name_data;
u64 file_modtime; // FILETIME?
u64 file_size;
string name_utf8;
u64 file_modtime; // FILETIME
u64 file_size;
u8 name_count;
bool is_directory;
};
constexpr s64 NTFS_MFT_File_Record_Size = 1024; // File Entry Block
@ -121,6 +120,26 @@ struct NTFS_MFT_Internal {
#endif
};
void add_record (Dense_FS* dfs, NTFS_File* file) {
DFS_Array* array;
if (file->is_directory) {
array = &dfs->paths;
} else {
array = &dfs->files;
}
string s = wide_to_utf8(file->name_data, file->name_count);
u32 offset = AddString_NoCount(array->strings, s.data, file->name_count);
// #TODO: Add other items to arrays:
array_add(*array->parent_ids, file->parent_id);
array_add(*array->record_ids, file->record_id);
array_add(*array->lengths, file->name_count);
array_add(*array->offsets, (u32)offset);
array_add(*array->modtimes, file->file_modtime);
array_add(*array->sizes, file->file_size);
}
NTFS_MFT_Internal* new_ntfs_mft_internal () { // call with temp
NTFS_MFT_Internal* mft = New<NTFS_MFT_Internal>(true);
mft->mft_file = ArrayView<u8>(NTFS_MFT_File_Record_Size);
@ -152,7 +171,7 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) {
string drive_path = drive->label;
Assert(context_allocator() != temp()); // pointless as we're releasing temp end-of-scope
Allocator primary_allocator = context_allocator();
// Allocator primary_allocator = context_allocator();
auto_release_temp();
push_allocator(temp());
@ -170,7 +189,7 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) {
NTFS_MFT_Internal* mft = new_ntfs_mft_internal();
mft->handle = file_handle;
push_allocator(primary_allocator);
// push_allocator(primary_allocator);
bool success;
NTFS_BootSector boot_sector;
@ -206,8 +225,8 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) {
Assert(data_attribute != nullptr);
// #dense_fs_alloc
Dense_FS* dfs = New<Dense_FS>();
initialize(dfs, drive);
drive->data = New<Dense_FS>(GPAllocator());
initialize(drive->data, drive);
NTFS_RunHeader* dataRun = (NTFS_RunHeader*)((u8*)data_attribute + data_attribute->dataRunsOffset);
u64 cluster_number = 0, records_processed = 0;
@ -269,14 +288,13 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) {
if (fileNameAttribute->namespaceType != 2 && !fileNameAttribute->nonResident) {
file.parent_id = (u32)fileNameAttribute->parentRecordNumber; // truncate
file.record_id = fileRecord->recordNumber;
file.is_directory = fileRecord->isDirectory;
file.name_count = fileNameAttribute->fileNameLength;
file.name_data = (u16*)fileNameAttribute->fileName;
// file.name_utf8 = wide_to_utf8(file.name_data, file.name_count); // @Allocates
file.file_modtime = (u64)fileNameAttribute->modificationTime;
file.is_directory = fileRecord->isDirectory;
// We need to get size from the data attribute
// #TODO: #continue from here!
add_record(drive->data, &file);
// See Dense_FS drive->data
mft->file_count += 1;
}
@ -297,12 +315,48 @@ Error* NTFS_MFT_read_raw (OS_Drive* drive) {
CloseHandle(file_handle);
log_none("Found %lld files (bytes_accessed: %s)", mft->file_count, format_bytes(mft->bytes_accessed).data);
log_none("Found %lld files on drive %s (bytes_accessed: %s)", mft->file_count, drive_path.data, format_bytes(mft->bytes_accessed).data);
drive->file_count = mft->file_count;
drive->bytes_accessed = mft->bytes_accessed;
drive->time_to_enumerate = (f32)(GetUnixTimestamp() - start_time);
// #TODO: Generate parent_indices from record_id and parent_id
Timed_Block_Print("NTFS_MFT_read_raw: generate parent_indices");
// 1. Setup hash table:
s64 path_count = item_count(&drive->data->paths);
for (s64 i = 0; i < path_count; i += 1) {
table_set(&drive->data->path_table, (*drive->data->paths.record_ids)[i], (s32)i);
}
// Link directories:
array_resize(*drive->data->paths.parent_indices, path_count, /*init*/false);
s64 fail_count = 0;
for (s64 i = 0; i < path_count; i += 1) {
u32 parent_id = (*drive->data->paths.parent_ids)[i];
bool parent_exists = 0;
s32 previous_index = find_previous_index(drive->data, parent_id, &parent_exists);
(*drive->data->paths.parent_indices)[i] = previous_index; // -1 if failed.
fail_count += (s64)(!parent_exists);
}
// Link files:
s64 file_count = item_count(&drive->data->files);
array_resize(*drive->data->files.parent_indices, file_count, false);
for (s64 i = 0; i < file_count; i += 1) {
u32 parent_id = (*drive->data->files.parent_ids)[i];
bool parent_exists = 0;
s32 previous_index = find_previous_index(drive->data, parent_id, &parent_exists);
(*drive->data->files.parent_indices)[i] = previous_index; // -1 if failed.
fail_count += (s64)(!parent_exists);
}
if (fail_count) {
log_warning("[%s] Failed to find parent for %lld items", drive_path.data, fail_count);
}
cleanup_after_enumeration(drive->data);
return NO_ERROR;
}

View File

@ -166,16 +166,6 @@ void Ex1_Control_Panel () { using namespace ImGui;
}
}
if (ex1_ntfs.threads_started && !ex1_ntfs.threads_in_flight.count) {
// All threads are complete, we're free to clean up remaining memory
push_allocator(GPAllocator());
array_free(ex1_ntfs.threads);
array_free(ex1_ntfs.threads_in_flight);
// Instead maybe we should just memset this to zero.
reset_struct(&ex1_ntfs);
}
if (ex1_ntfs.threads_in_flight.count) {
Text("Threads in flight: %d", ex1_ntfs.threads_in_flight.count);
@ -196,6 +186,17 @@ void Ex1_Control_Panel () { using namespace ImGui;
}
}
}
if (ex1_ntfs.threads_started && !ex1_ntfs.threads_in_flight.count) {
// All threads are complete, we're free to clean up remaining memory
push_allocator(GPAllocator());
array_free(ex1_ntfs.threads);
array_free(ex1_ntfs.threads_in_flight);
// Instead maybe we should just memset this to zero.
reset_struct(&ex1_ntfs);
}
End();
}