// So NTFS (and most systems) sort the tree by default in lexicographical descending order. // For lookups, this often isn't that useful if you only know substrings of the path. // struct BTNode { // u16 key_count; // BTNode* keys; // u16 allocated; // }; // struct B_Tree { // BTNode* root; // Allocator allocator; // }; // A compact collection of data with sorting indices // Maybe we can make B+ trees for sorting according to // size and modtime. // It really doesn't make sense to store data in memory as a B-tree except // if we need ordered insertions and deletes. // // Returns offset force_inline u32 AddString_NoCount (Serializer* serializer, u8* data, u8 count) { // #TODO: , bool null_terminate=false u32 original_count = (u32)serializer->count; u8* current_point = &serializer->data[original_count]; s64 final_count = serializer->allocated + (count * sizeof(u8)); if (serializer->allocated < final_count) { array_reserve(*serializer, final_count); } memcpy(current_point, data, count * sizeof(u8)); serializer->count += count * sizeof(u8); return original_count; } constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22 // template struct DFS_Array { Serializer* wstrings; // is a ArenaArray Serializer* strings; ArenaArray* offsets; // offsets into strings->data ArenaArray* lengths; // this type may vary Not sure if I should make it a template argument. Seems yucky. ArenaArray* modtimes; ArenaArray* sizes; ArenaArray* parent_indices; // #Temporary arrays for linking files/dirs to their parent directory, if present. ArenaArray* record_ids; ArenaArray* parent_ids; }; struct DFS_Value { wstring wpath; u64 modtime; s64 size; s32 parent_index; string full_path; }; struct Dense_FS { // Link to OS_Drive DFS_Array paths; DFS_Array files; ArenaTable path_table; // . OS_Drive* drive; // backlink for reference. }; // Need the drive letter too! string get_full_path (string drive_label, Dense_FS* dfs, s32 first_parent_index, string file_name) { s32 parent_index = first_parent_index; DFS_Array* paths = &dfs->paths; auto_release_temp(); Array path_list_reverse; path_list_reverse.allocator = temp(); array_add(path_list_reverse, file_name); while (parent_index > 0) { // -1 means we reached the drive letter (root) u32 path_offset = (*paths->offsets)[parent_index]; u8* path_data_ptr = (u8*)((paths->strings->data) + path_offset); string next_dir = {(s64)(*paths->lengths)[parent_index], path_data_ptr}; array_add(path_list_reverse, next_dir); parent_index = (*paths->parent_indices)[parent_index]; } // Drive letter with colon: Assert(drive_label.count >= 2); string drive_letter_with_colon = {2, drive_label.data}; array_add(path_list_reverse, drive_letter_with_colon); // reset_string_builder(sb, true); String_Builder* sb = new_string_builder(); for (s64 i = path_list_reverse.count-1; i >= 0; i -= 1) { append(sb, path_list_reverse[i]); if (i != 0) { append(sb, "/"); } } // return copy_string(string_view(sb)); return builder_to_string(sb); } string get_full_path_from_index (OS_Drive* drive, DFS_Array* dfsa, s64 i) { u32 path_offset = (*dfsa->offsets)[i]; u8* path_data_ptr = (u8*)((dfsa->strings->data) + path_offset); string file_name = {(*dfsa->lengths)[i], path_data_ptr}; return get_full_path(drive->label, drive->data, (*dfsa->parent_indices)[i], file_name); } DFS_Value get_value (Dense_FS* dfs, DFS_Array* dfsa, s64 i) { u32 path_offset = (*dfsa->offsets)[i]; u8* path_data_ptr = (u8*)((dfsa->strings->data) + path_offset); // u16* path_wide_ptr = (u16*)((dfsa->wstrings->data) + path_offset); // wstring path_wide = {(*dfsa->lengths)[i], path_wide_ptr}; DFS_Value dfsv; dfsv.modtime = (*dfsa->modtimes)[i]; dfsv.size = (s64)(*dfsa->sizes)[i]; dfsv.parent_index = (*dfsa->parent_indices)[i]; // #TODO: I cna just use strings. // string path_utf8 = wide_to_utf8(path_wide.data, (s32)path_wide.count); // dfsv.full_path = get_full_path_from_offset(dfs, dfsv.parent_index, path_utf8); dfsv.full_path = {(*dfsa->lengths)[i], path_data_ptr}; return dfsv; } s64 item_count (DFS_Array* dfsa) { return dfsa->offsets->count; } void initialize (DFS_Array* dfsa) { Assert(dfsa != nullptr); dfsa->wstrings = new_serializer(Arena_Reserve::Size_2G); dfsa->strings = new_serializer(Arena_Reserve::Size_2G); dfsa->offsets = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); dfsa->lengths = arena_array_new (DFS_Preallocation_Count, Arena_Reserve::Size_2G); dfsa->modtimes = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); dfsa->sizes = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); dfsa->record_ids = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); dfsa->parent_ids = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); dfsa->parent_indices = arena_array_new(DFS_Preallocation_Count, Arena_Reserve::Size_2G); } void release (DFS_Array* dfsa) { free_serializer(dfsa->wstrings); free_serializer(dfsa->strings); if (is_valid(dfsa->offsets)) { arena_array_free(*dfsa->offsets); } if (is_valid(dfsa->lengths)) { arena_array_free(*dfsa->lengths); } if (is_valid(dfsa->modtimes)) { arena_array_free(*dfsa->modtimes); } if (is_valid(dfsa->sizes)) { arena_array_free(*dfsa->sizes); } if (is_valid(dfsa->record_ids)) { arena_array_free(*dfsa->record_ids); } if (is_valid(dfsa->parent_ids)) { arena_array_free(*dfsa->parent_ids); } if (is_valid(dfsa->parent_indices)) { arena_array_free(*dfsa->parent_indices); } zero_struct(dfsa); } force_inline void release (Dense_FS* dfs) { Timed_Block_Print("release: Dense_FS*"); dfs->drive = nullptr; // just a link release(&dfs->paths); release(&dfs->files); if (table_is_valid(&dfs->path_table)) { table_release(&dfs->path_table); } } internal void Dense_FS_initialize (Dense_FS* dfs) { initialize(&dfs->paths); initialize(&dfs->files); table_init(&dfs->path_table, 1048576); // 2^20 // dfs->path_table.hash_function = table_hash_function_fnv1a; // default. dfs->path_table.hash_function = sdbm_hash; dfs->path_table.compare_function = u32_keys_match; } void initialize (Dense_FS* dfs, OS_Drive* drive) { Assert(drive != nullptr); Assert(dfs != nullptr); // Is there a less stupid way of doing this? dfs->drive = drive; drive->data = dfs; Dense_FS_initialize(dfs); } s32 find_previous_index (Dense_FS* dfs, u32 record_id, bool* success) { s32 result = -1; (*success) = table_find(&dfs->path_table, record_id, &result); return result; } void cleanup_after_enumeration(Dense_FS* dfs) { table_release(&dfs->path_table); reset_struct(&dfs->path_table); arena_array_free(*dfs->paths.record_ids); arena_array_free(*dfs->paths.parent_ids); arena_array_free(*dfs->files.record_ids); arena_array_free(*dfs->files.parent_ids); // Unfortunately, we have to do this whenever we free, because we're using raw pointers. dfs->paths.record_ids = nullptr; dfs->paths.parent_ids = nullptr; dfs->files.record_ids = nullptr; dfs->files.parent_ids = nullptr; }