// Reference: https://handmade.network/forums/articles/t/7002-tutorial_parsing_the_mft #pragma pack(push,1) struct NTFS_BootSector { u8 jump[3]; u8 name[8]; u16 bytesPerSector; u8 sectorsPerCluster; u16 reservedSectors; u8 unused0[3]; u16 unused1; u8 media; u16 unused2; u16 sectorsPerTrack; u16 headsPerCylinder; u32 hiddenSectors; u32 unused3; u32 unused4; u64 totalSectors; u64 mftStart; u64 mftMirrorStart; u32 clustersPerFileRecord; u32 clustersPerIndexBlock; u64 serialNumber; u32 checksum; u8 bootloader[426]; u16 bootSignature; }; struct NTFS_FileRecordHeader { u32 magic; u16 updateSequenceOffset; u16 updateSequenceSize; u64 logSequence; u16 sequenceNumber; u16 hardLinkCount; u16 firstAttributeOffset; u16 inUse : 1; u16 isDirectory : 1; u32 usedSize; u32 allocatedSize; u64 fileReference; u16 nextAttributeID; u16 unused; u32 recordNumber; }; struct NTFS_AttributeHeader { u32 attributeType; u32 length; u8 nonResident; u8 nameLength; u16 nameOffset; u16 flags; u16 attributeID; }; struct NTFS_ResidentAttributeHeader : NTFS_AttributeHeader { u32 attributeLength; u16 attributeOffset; u8 indexed; u8 unused; }; struct NTFS_FileNameAttributeHeader : NTFS_ResidentAttributeHeader { u64 parentRecordNumber : 48; u64 sequenceNumber : 16; u64 creationTime; u64 modificationTime; u64 metadataModificationTime; u64 readTime; u64 allocatedSize; u64 realSize; u32 flags; u32 repase; u8 fileNameLength; u8 namespaceType; u16 fileName[1]; }; struct NTFS_NonResidentAttributeHeader : NTFS_AttributeHeader { u64 firstCluster; u64 lastCluster; u16 dataRunsOffset; u16 compressionUnit; u32 unused; u64 attributeAllocated; // allocatedSize u64 attributeSize; // dataSize u64 streamDataSize; // initializedSize // u64 compressedSize; }; struct NTFS_RunHeader { u8 lengthFieldBytes : 4; u8 offsetFieldBytes : 4; }; #pragma pack(pop) struct NTFS_File { u32 parent_id; u32 record_id; bool is_directory; u8 name_count; u16* name_data; u64 file_modtime; // FILETIME? u64 file_size; string name_utf8; }; constexpr s64 NTFS_MFT_File_Record_Size = 1024; // File Entry Block constexpr s64 NTFS_MFT_Files_Per_Buffer = 65536; // #rename: should be NTFS_MFT_Internal struct NTFS_MFT_Internal { ArrayView mft_file; ArrayView mft_buffer; HANDLE handle; #if BUILD_DEBUG s64 bytes_accessed; s64 file_count; #endif }; NTFS_MFT_Internal* new_ntfs_mft_internal () { // call with temp NTFS_MFT_Internal* mft = New(true); mft->mft_file = ArrayView(NTFS_MFT_File_Record_Size); mft->mft_buffer = ArrayView(NTFS_MFT_File_Record_Size * NTFS_MFT_Files_Per_Buffer); // 64 MB return mft; } // I need a better name for this! bool NTFS_read_internal (NTFS_MFT_Internal* mft, void* buffer, u64 from, u64 count) { s32 high = (s32)(from >> 32); SetFilePointer(mft->handle, (s32)(from & 0xFFFFFFFF), (PLONG)&high, FILE_BEGIN); u32 bytes_accessed_internal; ReadFile(mft->handle, buffer, (DWORD)count, (LPDWORD)&bytes_accessed_internal, nullptr); mft->bytes_accessed += bytes_accessed_internal; Assert(bytes_accessed_internal == count); return bytes_accessed_internal == count; } // #TODO: Release resources if we face an early return! // #TODO: Maybe this doesn't need to return a value? Return an Error* instead. Dense_FS* NTFS_MFT_read_raw (OS_Drive* drive) { auto start_time = GetUnixTimestamp(); Assert(drive != nullptr); if (drive == nullptr) { return nullptr; } string drive_path = drive->label; Assert(context_allocator() != temp()); // pointless as we're releasing temp end-of-scope Allocator primary_allocator = context_allocator(); auto_release_temp(); push_allocator(temp()); string drive_letter = Win32_drive_letter(drive_path); string create_file_target = format_string("\\\\.\\%s:", drive_letter.data); HANDLE file_handle = CreateFileA((LPCSTR)create_file_target.data, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, OPEN_EXISTING, 0, nullptr); if (file_handle == INVALID_HANDLE_VALUE) { log_error("CreateFileA failed on target %s", create_file_target.data); log_error_code_and_string(); return nullptr; } NTFS_MFT_Internal* mft = new_ntfs_mft_internal(); mft->handle = file_handle; push_allocator(primary_allocator); bool success; NTFS_BootSector boot_sector; success = NTFS_read_internal(mft, &boot_sector, 0, 512); Assert(success); u64 bytes_per_cluster = (boot_sector.bytesPerSector * boot_sector.sectorsPerCluster); success = NTFS_read_internal(mft, mft->mft_file.data, boot_sector.mftStart * bytes_per_cluster, NTFS_MFT_File_Record_Size); Assert(success); NTFS_FileRecordHeader* file_record_start = (NTFS_FileRecordHeader*)mft->mft_file.data; if (file_record_start->magic != 0x454C4946) { log_error("[NTFS_read_drive_raw] Magic number check failed! This drive is not NTFS or is corrupted!"); return nullptr; } NTFS_AttributeHeader* attribute = (NTFS_AttributeHeader*)(mft->mft_file.data + file_record_start->firstAttributeOffset); NTFS_NonResidentAttributeHeader* data_attribute = nullptr; u64 approximate_record_count = 0; while (true) { if (attribute->attributeType == 0x80) { data_attribute = (NTFS_NonResidentAttributeHeader*)attribute; } else if (attribute->attributeType == 0xB0) { approximate_record_count = ((NTFS_NonResidentAttributeHeader*)attribute)->attributeSize * 8; } else if (attribute->attributeType == 0xFFFFFFFF) { break; } attribute = (NTFS_AttributeHeader*) ((u8*) attribute + attribute->length); } // while (true) Assert(data_attribute != nullptr); NTFS_RunHeader* dataRun = (NTFS_RunHeader*)((u8*)data_attribute + data_attribute->dataRunsOffset); u64 cluster_number = 0, records_processed = 0; // outer loop while (((u8*)dataRun - (u8*)data_attribute) < data_attribute->length && dataRun->lengthFieldBytes) { u64 length = 0, offset = 0; for (u8 i = 0; i < dataRun->lengthFieldBytes; i += 1) { length |= (u64)(((u8*)dataRun)[1 + i]) << (i * 8); } for (u8 i = 0; i < dataRun->offsetFieldBytes; i += 1) { offset |= (u64)(((u8*)dataRun)[1 + dataRun->lengthFieldBytes + i]) << (i * 8); } if (offset & ((u64) 1 << (dataRun->offsetFieldBytes * 8 - 1))) { for (s64 i = dataRun->offsetFieldBytes; i < 8; i += 1) { offset |= ((u64)0xFF << (u64)(i * 8)); } } cluster_number += offset; dataRun = (NTFS_RunHeader*)((u8*)dataRun + 1 + dataRun->lengthFieldBytes + dataRun->offsetFieldBytes); u64 files_remaining = length * bytes_per_cluster / NTFS_MFT_File_Record_Size; u64 position_in_block = 0; while (files_remaining) { // enumerate files in chunks of 65536 u64 files_to_load = NTFS_MFT_Files_Per_Buffer; if (files_remaining < NTFS_MFT_Files_Per_Buffer) { files_to_load = files_remaining; } NTFS_read_internal(mft, mft->mft_buffer.data, cluster_number * bytes_per_cluster + position_in_block, files_to_load * NTFS_MFT_File_Record_Size); position_in_block += files_to_load * NTFS_MFT_File_Record_Size; files_remaining -= files_to_load; for (s64 i = 0; i < (s64)files_to_load; i += 1) { // load // Even on an SSD, processing the file records takes only a fraction of the time to read the data, so there's not much point in multithreading this: NTFS_FileRecordHeader* fileRecord = (NTFS_FileRecordHeader*)(mft->mft_buffer.data + NTFS_MFT_File_Record_Size * i); records_processed += 1; // A file record may be blank or unused; just skip it. if (!fileRecord->inUse) continue; NTFS_AttributeHeader* attribute = (NTFS_AttributeHeader*)((u8*)fileRecord + fileRecord->firstAttributeOffset); Assert(fileRecord->magic == 0x454C4946); if (fileRecord->magic != 0x454C4946) { log_error("[NTFS_read_drive_raw] Magic number check failed! This drive is likely corrupted!"); return nullptr; } // inner loop NTFS_File file = {}; while ((u8*)attribute - (u8*)fileRecord < NTFS_MFT_File_Record_Size) { if (attribute->attributeType == 0x30) { // $FILE_NAME NTFS_FileNameAttributeHeader* fileNameAttribute = (NTFS_FileNameAttributeHeader*)attribute; if (fileNameAttribute->namespaceType != 2 && !fileNameAttribute->nonResident) { file.parent_id = (u32)fileNameAttribute->parentRecordNumber; // truncate file.record_id = fileRecord->recordNumber; file.name_count = fileNameAttribute->fileNameLength; file.name_data = (u16*)fileNameAttribute->fileName; // file.name_utf8 = wide_to_utf8(file.name_data, file.name_count); // @Allocates file.file_modtime = (u64)fileNameAttribute->modificationTime; file.is_directory = fileRecord->isDirectory; // We need to get size from the data attribute // #TODO: continue from here! mft->file_count += 1; } } if (attribute->attributeType == 0x80) { // $DATA NTFS_NonResidentAttributeHeader* nonresident_attribute = (NTFS_NonResidentAttributeHeader*)attribute; file.file_size = nonresident_attribute->attributeSize; } if (attribute->attributeType == 0xFFFFFFFF) { break; } attribute = (NTFS_AttributeHeader*)((u8*)attribute + attribute->length); } // while: inner loop } // for i: 0..files_to_load-1 } // while: files_remaining } // while: outer loop CloseHandle(file_handle); log("Found %lld files (bytes_accessed: %s)", mft->file_count, format_bytes(mft->bytes_accessed).data); drive->file_count = mft->file_count; drive->bytes_accessed = mft->bytes_accessed; drive->time_to_enumerate = (f32)(GetUnixTimestamp() - start_time); return drive->data; } struct NTFS_Enumeration_Task { Arena* pool; // small arena just for results ArrayView drives; // Should be part of OS_Drive! }; s64 ntfs_enumeration_thread_proc (Thread* thread) { auto task = thread_task(NTFS_Enumeration_Task); log("[ntfs_enumeration_thread_proc] (Thread index: %lld) Task pointer: %p", thread->index, task); for_each(d, task->drives) { auto result = NTFS_MFT_read_raw(task->drives[d]); if (result == nullptr) return 1; } return 0; }