230 lines
7.3 KiB
C++
230 lines
7.3 KiB
C++
// So NTFS (and most systems) sort the tree by default in lexicographical descending order.
|
|
// For lookups, this often isn't that useful if you only know substrings of the path.
|
|
|
|
|
|
// struct BTNode {
|
|
// u16 key_count;
|
|
// BTNode* keys;
|
|
// u16 allocated;
|
|
// };
|
|
|
|
// struct B_Tree {
|
|
// BTNode* root;
|
|
// Allocator allocator;
|
|
// };
|
|
|
|
// A compact collection of data with sorting indices
|
|
// Maybe we can make B+ trees for sorting according to
|
|
// size and modtime.
|
|
|
|
// It really doesn't make sense to store data in memory as a B-tree except
|
|
// if we need ordered insertions and deletes.
|
|
//
|
|
|
|
// Returns offset
|
|
force_inline u32 AddString_NoCount (Serializer* serializer, u8* data, u8 count) { // #TODO: , bool null_terminate=false
|
|
u32 original_count = (u32)serializer->count;
|
|
u8* current_point = &serializer->data[original_count];
|
|
|
|
s64 final_count = serializer->allocated + (count * sizeof(u8));
|
|
|
|
if (serializer->allocated < final_count) {
|
|
array_reserve(*serializer, final_count);
|
|
}
|
|
|
|
memcpy(current_point, data, count * sizeof(u8));
|
|
serializer->count += count * sizeof(u8);
|
|
|
|
return original_count;
|
|
}
|
|
|
|
constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22
|
|
|
|
// template <typename Length_Type>
|
|
struct DFS_Array {
|
|
Serializer* wstrings; // is a ArenaArray<u8>
|
|
Serializer* strings;
|
|
|
|
ArenaArray<u32>* offsets; // offsets into strings->data
|
|
ArenaArray<u8>* lengths; // this type may vary <hmmm> Not sure if I should make it a template argument. Seems yucky.
|
|
ArenaArray<u64>* modtimes;
|
|
ArenaArray<u64>* sizes;
|
|
|
|
ArenaArray<s32>* parent_indices;
|
|
|
|
// #Temporary arrays for linking files/dirs to their parent directory, if present.
|
|
ArenaArray<u32>* record_ids;
|
|
ArenaArray<u32>* parent_ids;
|
|
};
|
|
|
|
struct DFS_Value {
|
|
wstring wpath;
|
|
u64 modtime;
|
|
s64 size;
|
|
s32 parent_index;
|
|
string full_path;
|
|
};
|
|
|
|
struct Dense_FS { // Link to OS_Drive
|
|
DFS_Array paths;
|
|
DFS_Array files;
|
|
|
|
ArenaTable<u32, s32> path_table; // <entry_id, array_offset>.
|
|
|
|
OS_Drive* drive; // backlink for reference.
|
|
};
|
|
|
|
// Need the drive letter too!
|
|
string get_full_path (string drive_label, Dense_FS* dfs, s32 first_parent_index, string file_name) {
|
|
s32 parent_index = first_parent_index;
|
|
|
|
DFS_Array* paths = &dfs->paths;
|
|
|
|
auto_release_temp();
|
|
Array<string> path_list_reverse;
|
|
path_list_reverse.allocator = temp();
|
|
|
|
array_add(path_list_reverse, file_name);
|
|
|
|
while (parent_index > 0) { // -1 means we reached the drive letter (root)
|
|
u32 path_offset = (*paths->offsets)[parent_index];
|
|
u8* path_data_ptr = (u8*)((paths->strings->data) + path_offset);
|
|
string next_dir = {(s64)(*paths->lengths)[parent_index], path_data_ptr};
|
|
array_add(path_list_reverse, next_dir);
|
|
|
|
parent_index = (*paths->parent_indices)[parent_index];
|
|
}
|
|
|
|
// Drive letter with colon:
|
|
Assert(drive_label.count >= 2);
|
|
string drive_letter_with_colon = {2, drive_label.data};
|
|
|
|
array_add(path_list_reverse, drive_letter_with_colon);
|
|
|
|
// reset_string_builder(sb, true);
|
|
String_Builder* sb = new_string_builder();
|
|
|
|
for (s64 i = path_list_reverse.count-1; i >= 0; i -= 1) {
|
|
append(sb, path_list_reverse[i]);
|
|
if (i != 0) { append(sb, "/"); }
|
|
}
|
|
|
|
// return copy_string(string_view(sb));
|
|
return builder_to_string(sb);
|
|
}
|
|
|
|
string get_full_path_from_index (OS_Drive* drive, DFS_Array* dfsa, s64 i) {
|
|
u32 path_offset = (*dfsa->offsets)[i];
|
|
u8* path_data_ptr = (u8*)((dfsa->strings->data) + path_offset);
|
|
string file_name = {(*dfsa->lengths)[i], path_data_ptr};
|
|
return get_full_path(drive->label, drive->data, (*dfsa->parent_indices)[i], file_name);
|
|
}
|
|
|
|
DFS_Value get_value (Dense_FS* dfs, DFS_Array* dfsa, s64 i) {
|
|
u32 path_offset = (*dfsa->offsets)[i];
|
|
u8* path_data_ptr = (u8*)((dfsa->strings->data) + path_offset);
|
|
// u16* path_wide_ptr = (u16*)((dfsa->wstrings->data) + path_offset);
|
|
// wstring path_wide = {(*dfsa->lengths)[i], path_wide_ptr};
|
|
DFS_Value dfsv;
|
|
dfsv.modtime = (*dfsa->modtimes)[i];
|
|
dfsv.size = (s64)(*dfsa->sizes)[i];
|
|
dfsv.parent_index = (*dfsa->parent_indices)[i];
|
|
// #TODO: I cna just use strings.
|
|
// string path_utf8 = wide_to_utf8(path_wide.data, (s32)path_wide.count);
|
|
// dfsv.full_path = get_full_path_from_offset(dfs, dfsv.parent_index, path_utf8);
|
|
dfsv.full_path = {(*dfsa->lengths)[i], path_data_ptr};
|
|
return dfsv;
|
|
}
|
|
|
|
s64 item_count (DFS_Array* dfsa) {
|
|
return dfsa->offsets->count;
|
|
}
|
|
|
|
void initialize (DFS_Array* dfsa) {
|
|
Assert(dfsa != nullptr);
|
|
dfsa->wstrings = new_serializer(Arena_Reserve::Size_2G);
|
|
dfsa->strings = new_serializer(Arena_Reserve::Size_2G);
|
|
|
|
dfsa->offsets = arena_array_new<u32>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
|
|
dfsa->lengths = arena_array_new<u8> (DFS_Preallocation_Count, Arena_Reserve::Size_2G);
|
|
dfsa->modtimes = arena_array_new<u64>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
|
|
dfsa->sizes = arena_array_new<u64>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
|
|
|
|
dfsa->record_ids = arena_array_new<u32>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
|
|
dfsa->parent_ids = arena_array_new<u32>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
|
|
|
|
dfsa->parent_indices = arena_array_new<s32>(DFS_Preallocation_Count, Arena_Reserve::Size_2G);
|
|
}
|
|
|
|
void release (DFS_Array* dfsa) {
|
|
free_serializer(dfsa->wstrings);
|
|
free_serializer(dfsa->strings);
|
|
|
|
if (is_valid(dfsa->offsets)) { arena_array_free(*dfsa->offsets); }
|
|
if (is_valid(dfsa->lengths)) { arena_array_free(*dfsa->lengths); }
|
|
if (is_valid(dfsa->modtimes)) { arena_array_free(*dfsa->modtimes); }
|
|
if (is_valid(dfsa->sizes)) { arena_array_free(*dfsa->sizes); }
|
|
|
|
if (is_valid(dfsa->record_ids)) { arena_array_free(*dfsa->record_ids); }
|
|
if (is_valid(dfsa->parent_ids)) { arena_array_free(*dfsa->parent_ids); }
|
|
|
|
if (is_valid(dfsa->parent_indices)) { arena_array_free(*dfsa->parent_indices); }
|
|
|
|
zero_struct(dfsa);
|
|
}
|
|
|
|
force_inline void release (Dense_FS* dfs) {
|
|
Timed_Block_Print("release: Dense_FS*");
|
|
dfs->drive = nullptr; // just a link
|
|
|
|
release(&dfs->paths);
|
|
release(&dfs->files);
|
|
|
|
if (table_is_valid(&dfs->path_table)) {
|
|
table_release(&dfs->path_table);
|
|
}
|
|
}
|
|
|
|
internal void Dense_FS_initialize (Dense_FS* dfs) {
|
|
initialize(&dfs->paths);
|
|
initialize(&dfs->files);
|
|
|
|
table_init(&dfs->path_table, 1048576); // 2^20
|
|
// dfs->path_table.hash_function = table_hash_function_fnv1a; // default.
|
|
dfs->path_table.hash_function = sdbm_hash;
|
|
dfs->path_table.compare_function = u32_keys_match;
|
|
}
|
|
|
|
void initialize (Dense_FS* dfs, OS_Drive* drive) {
|
|
Assert(drive != nullptr); Assert(dfs != nullptr);
|
|
// Is there a less stupid way of doing this?
|
|
dfs->drive = drive;
|
|
drive->data = dfs;
|
|
|
|
Dense_FS_initialize(dfs);
|
|
}
|
|
|
|
s32 find_previous_index (Dense_FS* dfs, u32 record_id, bool* success) {
|
|
s32 result = -1;
|
|
|
|
(*success) = table_find(&dfs->path_table, record_id, &result);
|
|
|
|
return result;
|
|
}
|
|
|
|
void cleanup_after_enumeration(Dense_FS* dfs) {
|
|
table_release(&dfs->path_table);
|
|
reset_struct(&dfs->path_table);
|
|
|
|
arena_array_free(*dfs->paths.record_ids);
|
|
arena_array_free(*dfs->paths.parent_ids);
|
|
arena_array_free(*dfs->files.record_ids);
|
|
arena_array_free(*dfs->files.parent_ids);
|
|
|
|
// Unfortunately, we have to do this whenever we free, because we're using raw pointers.
|
|
dfs->paths.record_ids = nullptr;
|
|
dfs->paths.parent_ids = nullptr;
|
|
dfs->files.record_ids = nullptr;
|
|
dfs->files.parent_ids = nullptr;
|
|
}
|