Parsing NTFS MFT with file size.

This commit is contained in:
Musa Mahmood 2025-12-04 23:44:52 -05:00
parent 3a22156ed3
commit 849b8210e0
13 changed files with 443 additions and 68 deletions

View File

@ -25,7 +25,15 @@ struct Allocator {
void* data;
bool operator ! () {
return (proc == nullptr);
return (proc == nullptr);
}
bool operator==(const Allocator& other) const {
return proc == other.proc && data == other.data;
}
bool operator!=(const Allocator& other) const {
return !(*this == other);
}
};

View File

@ -207,3 +207,54 @@ s64 arena_usage_committed_bytes (Arena* arena) { return (s64)(arena->first_uncom
Allocator allocator (Arena* arena) {
return { arena_allocator_proc, arena };
}
struct Auto_Reset {
Arena* arena;
u8* starting_point;
Auto_Reset(Arena* arena) {
Assert(is_valid(arena));
this->arena = arena;
this->starting_point = arena->current_point;
}
// #TODO: Implement with ExpandableArena
// Auto_Reset(ExpandableArena* arena_ex) {
// Auto_Reset((Arena*)arena_ex);
// }
~Auto_Reset() {
arena->current_point = starting_point;
}
};
struct Auto_Release {
bool is_expandable;
ExpandableArena* arena_ex;
Arena* arena;
u8* starting_point;
Auto_Release(ExpandableArena* arena_ex) {
this->arena_ex = arena_ex;
this->arena = arena_ex->current;
this->starting_point = arena_ex->current->current_point;
this->is_expandable = true;
}
Auto_Release(Arena* arena) {
this->arena_ex = nullptr;
this->arena = arena;
this->starting_point = arena->current_point;
this->is_expandable = false;
Assert(is_valid(arena));
}
~Auto_Release() {
if (is_expandable) {
arena_reset_to(arena_ex, arena, starting_point);
} else {
arena->current_point = starting_point;
free_pages_down_to(arena, arena->initial_commit_page_count);
}
}
};

View File

@ -114,41 +114,6 @@ void arena_delete (Arena* arena);
Arena_Reserve next_reserve_size (s64 size);
struct Auto_Reset {
Arena* arena;
u8* starting_point;
Auto_Reset(Arena* arena) {
Assert(is_valid(arena));
this->arena = arena;
this->starting_point = arena->current_point;
}
Auto_Reset(ExpandableArena* arena_ex) {
Auto_Reset((Arena*)arena_ex);
}
~Auto_Reset() {
arena->current_point = starting_point;
}
};
struct Auto_Release {
Arena* arena;
u8* starting_point;
Auto_Release(Arena* arena) {
this->arena = arena;
this->starting_point = arena->current_point;
Assert(is_valid(arena));
}
~Auto_Release() {
arena->current_point = starting_point;
free_pages_down_to(arena, arena->initial_commit_page_count);
}
};
struct Push_Alignment { // #rename to Arena_Push_Alignment?
Arena* arena;
u16 original_alignment;

View File

@ -187,11 +187,10 @@ force_inline s64 Next_Power_Of_Two(s64 v) {
Push_Arena Concat(_push_arena_guard_, __LINE__)(x)
#define push_expandable_arena(x) \
Push_Expandable_Arena Concat(_push_ex_arena_guard_, __LINE__)(x)
#define auto_release_temp() \
auto_release(temp());
#define auto_release(x) \
Auto_Release Concat(_auto_release_guard_, __LINE__)(x)
#define auto_release_temp() \
auto_release(thread_context()->temp);
#define thread_context() thread_local_context
#define temp() allocator(thread_context()->temp)

View File

@ -56,7 +56,7 @@ void* expandable_arena_alloc (ExpandableArena* arena_ex, s64 byte_count) {
if (result_end > arena->first_uncommitted_page) {
if (result_end > arena_address_limit(arena)) {
// Pick an appropriate reserve size that will fit this allocation.
Arena_Reserve new_min_reserve = next_reserve_size(byte_count);
Arena_Reserve new_min_reserve = next_reserve_size(byte_count + sizeof(Arena));
if (arena->reserve_size > new_min_reserve) {
new_min_reserve = arena->reserve_size;
}
@ -73,7 +73,7 @@ void* expandable_arena_alloc (ExpandableArena* arena_ex, s64 byte_count) {
result = Align<u8*>(new_arena->current_point, new_arena->alignment);
result_end = result + byte_count;
if (result_end > arena_address_limit(new_arena)) {
if (result_end > new_arena->first_uncommitted_page) {
extend_committed_pages(new_arena, result_end);
}
} else {
@ -94,6 +94,21 @@ Allocator allocator (ExpandableArena* arena_ex) {
return { expandable_arena_allocator_proc, arena_ex };
}
// #TODO: currently this keeps the final arena's memory. Fix this!
void arena_reset_to (ExpandableArena* arena_ex, Arena* last_arena, u8* starting_point) {
// going backwards from end of arena list
for (s64 i = arena_ex->next_arenas.count-1; i > 0; i -= 1) {
Arena* arena = arena_ex->next_arenas[i];
if (arena == last_arena) { // return to starting_point
arena_ex->current = arena;
arena_ex->current->current_point = starting_point;
break;
}
release_arena(arena);
array_unordered_remove_by_index(arena_ex->next_arenas, i);
}
}
void arena_reset (ExpandableArena* arena_ex, bool free_extra_pages) {
if (!is_valid(arena_ex)) return;

View File

@ -24,5 +24,6 @@ bool is_valid (ExpandableArena* arena);
void* expandable_arena_alloc (ExpandableArena* arena_ex, s64 byte_count);
u8* expandable_arena_start (ExpandableArena* arena_ex);
Allocator allocator (ExpandableArena* arena_ex);
void arena_reset_to (ExpandableArena* arena_ex, Arena* last_arena, u8* starting_point);
void arena_reset (ExpandableArena* arena_ex, bool free_extra_pages=true);
force_inline void arena_delete (ExpandableArena* arena_ex);

View File

@ -266,8 +266,7 @@ template <typename T, typename U> bool table_remove (Table<T, U>* table, T key,
return false;
}
// #TODO: we need a for expansion iterator?
// table_find_multiple (put results in Temp-backed Array<>, and return it as an ArrayView<T>) {
// #TODO: We should allow setting an allocator instead of defaulting to temp()?
template <typename T, typename U> ArrayView<U> table_find_multiple (Table<T, U>* table, T key, U* value) {
Array<U> results;
results.allocator = temp();
@ -307,11 +306,7 @@ template <typename T, typename U> ArrayView<U> table_find_multiple (Table<T, U>*
return to_view(results);
}
// #TODO:
// find_or_add is kind of like table_set, but used when you
// just want a pointer to the value, which you can fill in.
// find_or_add is kind of like table_set, but used when you just want a pointer to the value, which you can fill in.
template <typename T, typename U> U* table_find_or_add (Table<T, U>* table, T key, bool* newly_added) {
U* value = table_find_pointer(table, key);
if (value) {
@ -324,12 +319,3 @@ template <typename T, typename U> U* table_find_or_add (Table<T, U>* table, T ke
(*newly_added) = true;
return value;
}
// find_or_add :: (table: *Table, key: table.Key_Type) -> (entry: *table.Value_Type, newly_added: bool) {
// value := table_find_pointer(table, key);
// if value return value, false;
// new_value: table.Value_Type;
// value = table_add(table, key, new_value);
// return value, true;
// }

View File

@ -1,11 +1,10 @@
#pragma once
// #TODO: #strings:
// [ ] Always null-terminate strings!
// [x] Always null-terminate strings!
// [ ] How do I accept variadic arguments of any type to my print function?
// [ ] Need to sort out how formatted strings and string builders are allocated
// [ ] Separate functions for temp alloc (tprint??)
// [ ] API needs to be completely overhauled
// [ ] I should also put path manipulation here or in a separate file.
// [ ] I should also put path manipulation here or in a separate file?
struct string {
s64 count;

View File

@ -69,6 +69,19 @@ string format_cycles (u64 ticks) {
return format_string("%1.2f %s", count_f64, units[unit_index].data);
}
string format_bytes (s64 bytes) {
string units[6] = { "B", "KB", "MB", "GB", "TB", "PB" };
f64 count_f64 = (f64)bytes;
s32 unit_index = 0;
while (count_f64 >= 1024 && unit_index < (5)) {
count_f64 /= 1024.0;
unit_index += 1;
}
return format_string("%1.3f %s", count_f64, units[unit_index].data);
}
struct timed_block_print {
string block_name;
u64 start_tick;

View File

@ -978,13 +978,20 @@ bool Win32_Discover_Drives () {
return true;
}
bool Win32_Drive_Exists (string drive_letter) {
// Drive label includes `:\`
bool Win32_Drive_Exists (string drive_label) {
push_allocator(temp());
LPCWSTR drive_letter_wide = (LPCWSTR)utf8_to_wide(drive_letter).data;
UINT type = GetDriveTypeW(drive_letter_wide);
LPCWSTR drive_label_wide = (LPCWSTR)utf8_to_wide(drive_label).data;
UINT type = GetDriveTypeW(drive_label_wide);
return (type != DRIVE_UNKNOWN && type != DRIVE_NO_ROOT_DIR);
// Alternative method:
// return (bool)GetVolumeInformationW(drive_letter_wide, nullptr, 0, nullptr, nullptr, nullptr, nullptr, 0);
// return (bool)GetVolumeInformationW(drive_label_wide, nullptr, 0, nullptr, nullptr, nullptr, nullptr, 0);
}
string Win32_drive_letter (string any_path) {
// #TODO: remove leading `\\.\` if present, assert if drive letter is invalid.
// we copy so it is null-terminated, and can be used as %s in format_string.
return copy_string({1, any_path.data});
}
// #TODO: #window_creation

314
lib/OS/OS_Win32_NTFS.cpp Normal file
View File

@ -0,0 +1,314 @@
// Reference: https://handmade.network/forums/articles/t/7002-tutorial_parsing_the_mft
#pragma pack(push,1)
struct NTFS_BootSector {
u8 jump[3];
u8 name[8];
u16 bytesPerSector;
u8 sectorsPerCluster;
u16 reservedSectors;
u8 unused0[3];
u16 unused1;
u8 media;
u16 unused2;
u16 sectorsPerTrack;
u16 headsPerCylinder;
u32 hiddenSectors;
u32 unused3;
u32 unused4;
u64 totalSectors;
u64 mftStart;
u64 mftMirrorStart;
u32 clustersPerFileRecord;
u32 clustersPerIndexBlock;
u64 serialNumber;
u32 checksum;
u8 bootloader[426];
u16 bootSignature;
};
struct NTFS_FileRecordHeader {
u32 magic;
u16 updateSequenceOffset;
u16 updateSequenceSize;
u64 logSequence;
u16 sequenceNumber;
u16 hardLinkCount;
u16 firstAttributeOffset;
u16 inUse : 1;
u16 isDirectory : 1;
u32 usedSize;
u32 allocatedSize;
u64 fileReference;
u16 nextAttributeID;
u16 unused;
u32 recordNumber;
};
struct NTFS_AttributeHeader {
u32 attributeType;
u32 length;
u8 nonResident;
u8 nameLength;
u16 nameOffset;
u16 flags;
u16 attributeID;
};
struct NTFS_ResidentAttributeHeader : NTFS_AttributeHeader {
u32 attributeLength;
u16 attributeOffset;
u8 indexed;
u8 unused;
};
struct NTFS_FileNameAttributeHeader : NTFS_ResidentAttributeHeader {
u64 parentRecordNumber : 48;
u64 sequenceNumber : 16;
u64 creationTime;
u64 modificationTime;
u64 metadataModificationTime;
u64 readTime;
u64 allocatedSize;
u64 realSize;
u32 flags;
u32 repase;
u8 fileNameLength;
u8 namespaceType;
u16 fileName[1];
};
struct NTFS_NonResidentAttributeHeader : NTFS_AttributeHeader {
u64 firstCluster;
u64 lastCluster;
u16 dataRunsOffset;
u16 compressionUnit;
u32 unused;
u64 attributeAllocated; // allocatedSize
u64 attributeSize; // dataSize
u64 streamDataSize; // initializedSize
// u64 compressedSize;
};
struct NTFS_RunHeader {
u8 lengthFieldBytes : 4;
u8 offsetFieldBytes : 4;
};
#pragma pack(pop)
struct NTFS_File {
u32 parent_id;
u32 record_id;
bool is_directory;
u8 name_count;
u16* name_data;
u64 file_modtime; // FILETIME?
u64 file_size;
string name_utf8;
};
constexpr s64 NTFS_MFT_File_Record_Size = 1024; // File Entry Block
constexpr s64 NTFS_MFT_Files_Per_Buffer = 65536;
// #rename: should be NTFS_MFT_Internal
struct NTFS_MFT_Internal {
ArrayView<u8> mft_file;
ArrayView<u8> mft_buffer;
HANDLE handle;
#if BUILD_DEBUG
s64 bytes_accessed;
s64 file_count;
#endif
};
NTFS_MFT_Internal* new_ntfs_drive () { // call with temp
NTFS_MFT_Internal* mft = New<NTFS_MFT_Internal>(true);
mft->mft_file = ArrayView<u8>(NTFS_MFT_File_Record_Size);
mft->mft_buffer = ArrayView<u8>(NTFS_MFT_File_Record_Size * NTFS_MFT_Files_Per_Buffer); // 64 MB
return mft;
}
// I need a better name for this!
bool NTFS_read_internal (NTFS_MFT_Internal* mft, void* buffer, u64 from, u64 count) {
s32 high = (s32)(from >> 32);
SetFilePointer(mft->handle, (s32)(from & 0xFFFFFFFF), (PLONG)&high, FILE_BEGIN);
u32 bytes_accessed_internal;
ReadFile(mft->handle, buffer, (DWORD)count, (LPDWORD)&bytes_accessed_internal, nullptr);
mft->bytes_accessed += bytes_accessed_internal;
Assert(bytes_accessed_internal == count);
return bytes_accessed_internal == count;
}
enum class NTFS_Block_Flag : s32 {
idk = 0,
idk2 = 1,
};
struct NTFS_MFT_Copy {
u32 error_code;
// Array<ArrayView<u8>> mft_blocks; // idk if this is correct!
// Array<NTFS_Block_Flag> mft_flags;
};
// #TODO: make a version where we just reconstruct the MFT as a deque in memory.
NTFS_MFT_Copy* NTFS_MFT_read_raw (string drive_path) {
Table<string, Win32_Drive>* drive_table = &global_win32_state.system_info.drives;
// We should be able to fetch from the table using table_find_pointer
Assert(context_allocator() != temp()); // pointless as we're releasing temp end-of-scope
Allocator primary_allocator = context_allocator();
auto_release_temp();
push_allocator(temp());
NTFS_MFT_Copy* mft_copy = New<NTFS_MFT_Copy>(primary_allocator); //use context allocator.
string drive_letter = Win32_drive_letter(drive_path);
string create_file_target = format_string("\\\\.\\%s:", drive_letter.data);
HANDLE file_handle = CreateFileA((LPCSTR)create_file_target.data, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, OPEN_EXISTING, 0, nullptr);
if (file_handle == INVALID_HANDLE_VALUE) {
log_error("CreateFileA failed on target %s", create_file_target.data);
log_error_code_and_string();
mft_copy->error_code = GetLastError();
return mft_copy;
}
push_allocator(primary_allocator);
NTFS_MFT_Internal* mft = new_ntfs_drive();
mft->handle = file_handle;
bool success;
NTFS_BootSector boot_sector;
success = NTFS_read_internal(mft, &boot_sector, 0, 512);
Assert(success);
u64 bytes_per_cluster = (boot_sector.bytesPerSector * boot_sector.sectorsPerCluster);
success = NTFS_read_internal(mft, mft->mft_file.data, boot_sector.mftStart * bytes_per_cluster, NTFS_MFT_File_Record_Size);
Assert(success);
NTFS_FileRecordHeader* file_record_start = (NTFS_FileRecordHeader*)mft->mft_file.data;
if (file_record_start->magic != 0x454C4946) {
log_error("[NTFS_read_drive_raw] Magic number check failed! This drive is not NTFS or is corrupted!");
return false;
}
NTFS_AttributeHeader* attribute = (NTFS_AttributeHeader*)(mft->mft_file.data + file_record_start->firstAttributeOffset);
NTFS_NonResidentAttributeHeader* data_attribute = nullptr;
u64 approximate_record_count = 0;
while (true) {
if (attribute->attributeType == 0x80) {
data_attribute = (NTFS_NonResidentAttributeHeader*)attribute;
} else if (attribute->attributeType == 0xB0) {
approximate_record_count = ((NTFS_NonResidentAttributeHeader*)attribute)->attributeSize * 8;
} else if (attribute->attributeType == 0xFFFFFFFF) {
break;
}
attribute = (NTFS_AttributeHeader*) ((u8*) attribute + attribute->length);
} // while (true)
Assert(data_attribute != nullptr);
NTFS_RunHeader* dataRun = (NTFS_RunHeader*)((u8*)data_attribute + data_attribute->dataRunsOffset);
u64 cluster_number = 0, records_processed = 0;
// outer loop
while (((u8*)dataRun - (u8*)data_attribute) < data_attribute->length && dataRun->lengthFieldBytes) {
u64 length = 0, offset = 0;
for (s64 i = 0; i < dataRun->lengthFieldBytes; i += 1) {
length |= (u64)(((u8*)dataRun)[1 + i]) << (i * 8);
}
for (s64 i = 0; i < dataRun->offsetFieldBytes; i += 1) {
offset |= (u64)(((u8*)dataRun)[1 + dataRun->lengthFieldBytes + i]) << (i * 8);
}
if (offset & ((u64) 1 << (dataRun->offsetFieldBytes * 8 - 1))) {
for (s64 i = dataRun->offsetFieldBytes; i < 8; i += 1) {
offset |= (u64)(0xFF << (i * 8));
}
}
cluster_number += offset;
dataRun = (NTFS_RunHeader*)((u8*)dataRun + 1 + dataRun->lengthFieldBytes + dataRun->offsetFieldBytes);
u64 files_remaining = length * bytes_per_cluster / NTFS_MFT_File_Record_Size;
u64 position_in_block = 0;
while (files_remaining) { // enumerate files in chunks of 65536
u64 files_to_load = NTFS_MFT_Files_Per_Buffer;
if (files_remaining < NTFS_MFT_Files_Per_Buffer) {
files_to_load = files_remaining;
}
NTFS_read_internal(mft, mft->mft_buffer.data, cluster_number * bytes_per_cluster + position_in_block, files_to_load * NTFS_MFT_File_Record_Size);
position_in_block += files_to_load * NTFS_MFT_File_Record_Size;
files_remaining -= files_to_load;
for (s64 i = 0; i < (s64)files_to_load; i += 1) { // load
// Even on an SSD, processing the file records takes only a fraction of the time to read the data, so there's not much point in multithreading this:
NTFS_FileRecordHeader* fileRecord = (NTFS_FileRecordHeader*)(mft->mft_buffer.data + NTFS_MFT_File_Record_Size * i);
records_processed += 1;
// A file record may be blank or unused; just skip it.
if (!fileRecord->inUse) continue;
NTFS_AttributeHeader* attribute = (NTFS_AttributeHeader*) ((u8*)fileRecord + fileRecord->firstAttributeOffset);
Assert(fileRecord->magic == 0x454C4946);
if (file_record_start->magic != 0x454C4946) {
log_error("[NTFS_read_drive_raw] Magic number check failed! This drive is likely corrupted!");
return false;
}
// inner loop
NTFS_File file = {};
while ((u8*)attribute - (u8*)fileRecord < NTFS_MFT_File_Record_Size) {
if (attribute->attributeType == 0x30) { // $FILE_NAME
NTFS_FileNameAttributeHeader* fileNameAttribute = (NTFS_FileNameAttributeHeader*)attribute;
if (fileNameAttribute->namespaceType != 2 && !fileNameAttribute->nonResident) {
file.parent_id = (u32)fileNameAttribute->parentRecordNumber; // truncate
file.record_id = fileRecord->recordNumber;
file.name_count = fileNameAttribute->fileNameLength;
file.name_data = (u16*)fileNameAttribute->fileName;
file.name_utf8 = wide_to_utf8(file.name_data, file.name_count);
file.file_modtime = (u64)fileNameAttribute->modificationTime;
file.is_directory = fileRecord->isDirectory;
// We need to get size from the data attribute
// #TODO: continue from here!
mft->file_count += 1;
}
}
if (attribute->attributeType == 0x80) { // $DATA
NTFS_NonResidentAttributeHeader* nonresident_attribute = (NTFS_NonResidentAttributeHeader*)attribute;
file.file_size = nonresident_attribute->attributeSize;
}
if (attribute->attributeType == 0xFFFFFFFF) {
break;
}
attribute = (NTFS_AttributeHeader*)((u8*)attribute + attribute->length);
} // while: inner loop
} // for i: 0..files_to_load-1
} // while: files_remaining
} // while: outer loop
log("Found %lld files (bytes_accessed: %s)", mft->file_count, format_bytes(mft->bytes_accessed).data);
CloseHandle(file_handle);
return nullptr;
}
// Version where we transform the format so it's optimal for sorting and searching:
// bool NTFS_MFT_read_and_convert () { // pass the structure we want to read into!
// auto_release_temp();
// push_allocator(temp());
// NTFS_MFT_Internal* mft = new_ntfs_drive();
// }

View File

@ -50,6 +50,7 @@
#if OS_WINDOWS
# include "lib/OS/OS_Win32.cpp"
# include "lib/OS/OS_Win32_NTFS.cpp"
#endif
#include "lib/Base/Thread_Group.cpp"

View File

@ -341,25 +341,41 @@ struct ImGui_Font_Size {
void ImGui_Debug_Panel () {
ImGui::Begin("Debug Panel");
Table<string, Win32_Drive>* drive_table = &global_win32_state.system_info.drives;
if (ImGui::Button("Debug_Break()")) {
debug_break();
}
if (ImGui::Button("Discover drives (intentional failure)")) {
if (ImGui::Button("Discover drives") || !table_is_valid(drive_table)) {
Win32_Discover_Drives();
}
Table<string, Win32_Drive>* drive_table = &global_win32_state.system_info.drives;
ImGui::Text("drive_table is valid: %d", table_is_valid(drive_table));
// Most basic Table iterator
s32 current_index = 0;
for (s64 i = 0; i < drive_table->allocated; i += 1) {
Table_Entry<string, Win32_Drive>* entry = &drive_table->entries[i]; // we should take ptr here if we want to modify?
if (entry->hash > HASH_TABLE_FIRST_VALID_HASH) {
// #TODO: #MOVE THIS + maybe don't check this every frame!
entry->value.is_present = Win32_Drive_Exists(entry->value.label);
// #TODO #MOVE THIS + maybe don't check this every frame!
// entry->value.is_present = Win32_Drive_Exists(entry->value.label);
if (entry->value.label.data == nullptr) continue;
ImGui::Text(" > [%d] drive letter: %s (is_present: %d)", current_index, entry->value.label.data, entry->value.is_present);
current_index += 1;
ImGui::SameLine();
push_allocator(temp());
char* button_label = (char*)format_string("Read NTFS MFT Raw##%s", entry->value.label.data).data;
if (ImGui::Button(button_label)) {
push_arena(thread_context()->arena);
// auto_release(thread_context()->arena);
auto result = NTFS_MFT_read_raw(entry->value.label);
if (result->error_code != 0) {
log("[NTFS_MFT_read_raw] operation failed");
}
}
}
}