Musa-Cpp-Lib-V2/lib/OS/OS_Win32_NTFS.cpp

317 lines
11 KiB
C++

// Reference: https://handmade.network/forums/articles/t/7002-tutorial_parsing_the_mft
#pragma pack(push,1)
struct NTFS_BootSector {
u8 jump[3];
u8 name[8];
u16 bytesPerSector;
u8 sectorsPerCluster;
u16 reservedSectors;
u8 unused0[3];
u16 unused1;
u8 media;
u16 unused2;
u16 sectorsPerTrack;
u16 headsPerCylinder;
u32 hiddenSectors;
u32 unused3;
u32 unused4;
u64 totalSectors;
u64 mftStart;
u64 mftMirrorStart;
u32 clustersPerFileRecord;
u32 clustersPerIndexBlock;
u64 serialNumber;
u32 checksum;
u8 bootloader[426];
u16 bootSignature;
};
struct NTFS_FileRecordHeader {
u32 magic;
u16 updateSequenceOffset;
u16 updateSequenceSize;
u64 logSequence;
u16 sequenceNumber;
u16 hardLinkCount;
u16 firstAttributeOffset;
u16 inUse : 1;
u16 isDirectory : 1;
u32 usedSize;
u32 allocatedSize;
u64 fileReference;
u16 nextAttributeID;
u16 unused;
u32 recordNumber;
};
struct NTFS_AttributeHeader {
u32 attributeType;
u32 length;
u8 nonResident;
u8 nameLength;
u16 nameOffset;
u16 flags;
u16 attributeID;
};
struct NTFS_ResidentAttributeHeader : NTFS_AttributeHeader {
u32 attributeLength;
u16 attributeOffset;
u8 indexed;
u8 unused;
};
struct NTFS_FileNameAttributeHeader : NTFS_ResidentAttributeHeader {
u64 parentRecordNumber : 48;
u64 sequenceNumber : 16;
u64 creationTime;
u64 modificationTime;
u64 metadataModificationTime;
u64 readTime;
u64 allocatedSize;
u64 realSize;
u32 flags;
u32 repase;
u8 fileNameLength;
u8 namespaceType;
u16 fileName[1];
};
struct NTFS_NonResidentAttributeHeader : NTFS_AttributeHeader {
u64 firstCluster;
u64 lastCluster;
u16 dataRunsOffset;
u16 compressionUnit;
u32 unused;
u64 attributeAllocated; // allocatedSize
u64 attributeSize; // dataSize
u64 streamDataSize; // initializedSize
// u64 compressedSize;
};
struct NTFS_RunHeader {
u8 lengthFieldBytes : 4;
u8 offsetFieldBytes : 4;
};
#pragma pack(pop)
struct NTFS_File {
u32 parent_id;
u32 record_id;
bool is_directory;
u8 name_count;
u16* name_data;
u64 file_modtime; // FILETIME?
u64 file_size;
string name_utf8;
};
constexpr s64 NTFS_MFT_File_Record_Size = 1024; // File Entry Block
constexpr s64 NTFS_MFT_Files_Per_Buffer = 65536;
// #rename: should be NTFS_MFT_Internal
struct NTFS_MFT_Internal {
ArrayView<u8> mft_file;
ArrayView<u8> mft_buffer;
HANDLE handle;
#if BUILD_DEBUG
s64 bytes_accessed;
s64 file_count;
#endif
};
NTFS_MFT_Internal* new_ntfs_drive () { // call with temp
NTFS_MFT_Internal* mft = New<NTFS_MFT_Internal>(true);
mft->mft_file = ArrayView<u8>(NTFS_MFT_File_Record_Size);
mft->mft_buffer = ArrayView<u8>(NTFS_MFT_File_Record_Size * NTFS_MFT_Files_Per_Buffer); // 64 MB
return mft;
}
// I need a better name for this!
bool NTFS_read_internal (NTFS_MFT_Internal* mft, void* buffer, u64 from, u64 count) {
s32 high = (s32)(from >> 32);
SetFilePointer(mft->handle, (s32)(from & 0xFFFFFFFF), (PLONG)&high, FILE_BEGIN);
u32 bytes_accessed_internal;
ReadFile(mft->handle, buffer, (DWORD)count, (LPDWORD)&bytes_accessed_internal, nullptr);
mft->bytes_accessed += bytes_accessed_internal;
Assert(bytes_accessed_internal == count);
return bytes_accessed_internal == count;
}
// #TODO: Release resources if we face an early return!
Dense_FS* NTFS_MFT_read_raw (string drive_path) {
Table<string, Win32_Drive>* drive_table = &global_win32_state.system_info.drives;
bool just_added = false;
Win32_Drive* drive = table_find_or_add(drive_table, drive_path, &just_added);
Assert(just_added == false && drive != nullptr);
if (drive == nullptr) {
return nullptr;
}
Assert(context_allocator() != temp()); // pointless as we're releasing temp end-of-scope
Allocator primary_allocator = context_allocator();
auto_release_temp();
push_allocator(temp());
string drive_letter = Win32_drive_letter(drive_path);
string create_file_target = format_string("\\\\.\\%s:", drive_letter.data);
HANDLE file_handle = CreateFileA((LPCSTR)create_file_target.data, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, OPEN_EXISTING, 0, nullptr);
if (file_handle == INVALID_HANDLE_VALUE) {
log_error("CreateFileA failed on target %s", create_file_target.data);
log_error_code_and_string();
return nullptr;
}
push_allocator(primary_allocator);
NTFS_MFT_Internal* mft = new_ntfs_drive();
mft->handle = file_handle;
bool success;
NTFS_BootSector boot_sector;
success = NTFS_read_internal(mft, &boot_sector, 0, 512);
Assert(success);
u64 bytes_per_cluster = (boot_sector.bytesPerSector * boot_sector.sectorsPerCluster);
success = NTFS_read_internal(mft, mft->mft_file.data, boot_sector.mftStart * bytes_per_cluster, NTFS_MFT_File_Record_Size);
Assert(success);
NTFS_FileRecordHeader* file_record_start = (NTFS_FileRecordHeader*)mft->mft_file.data;
if (file_record_start->magic != 0x454C4946) {
log_error("[NTFS_read_drive_raw] Magic number check failed! This drive is not NTFS or is corrupted!");
return false;
}
NTFS_AttributeHeader* attribute = (NTFS_AttributeHeader*)(mft->mft_file.data + file_record_start->firstAttributeOffset);
NTFS_NonResidentAttributeHeader* data_attribute = nullptr;
u64 approximate_record_count = 0;
while (true) {
if (attribute->attributeType == 0x80) {
data_attribute = (NTFS_NonResidentAttributeHeader*)attribute;
} else if (attribute->attributeType == 0xB0) {
approximate_record_count = ((NTFS_NonResidentAttributeHeader*)attribute)->attributeSize * 8;
} else if (attribute->attributeType == 0xFFFFFFFF) {
break;
}
attribute = (NTFS_AttributeHeader*) ((u8*) attribute + attribute->length);
} // while (true)
Assert(data_attribute != nullptr);
NTFS_RunHeader* dataRun = (NTFS_RunHeader*)((u8*)data_attribute + data_attribute->dataRunsOffset);
u64 cluster_number = 0, records_processed = 0;
// outer loop
while (((u8*)dataRun - (u8*)data_attribute) < data_attribute->length && dataRun->lengthFieldBytes) {
u64 length = 0, offset = 0;
for (s64 i = 0; i < dataRun->lengthFieldBytes; i += 1) {
length |= (u64)(((u8*)dataRun)[1 + i]) << (i * 8);
}
for (s64 i = 0; i < dataRun->offsetFieldBytes; i += 1) {
offset |= (u64)(((u8*)dataRun)[1 + dataRun->lengthFieldBytes + i]) << (i * 8);
}
if (offset & ((u64) 1 << (dataRun->offsetFieldBytes * 8 - 1))) {
for (s64 i = dataRun->offsetFieldBytes; i < 8; i += 1) {
offset |= (u64)(0xFF << (i * 8));
}
}
cluster_number += offset;
dataRun = (NTFS_RunHeader*)((u8*)dataRun + 1 + dataRun->lengthFieldBytes + dataRun->offsetFieldBytes);
u64 files_remaining = length * bytes_per_cluster / NTFS_MFT_File_Record_Size;
u64 position_in_block = 0;
while (files_remaining) { // enumerate files in chunks of 65536
u64 files_to_load = NTFS_MFT_Files_Per_Buffer;
if (files_remaining < NTFS_MFT_Files_Per_Buffer) {
files_to_load = files_remaining;
}
NTFS_read_internal(mft, mft->mft_buffer.data, cluster_number * bytes_per_cluster + position_in_block, files_to_load * NTFS_MFT_File_Record_Size);
position_in_block += files_to_load * NTFS_MFT_File_Record_Size;
files_remaining -= files_to_load;
for (s64 i = 0; i < (s64)files_to_load; i += 1) { // load
// Even on an SSD, processing the file records takes only a fraction of the time to read the data, so there's not much point in multithreading this:
NTFS_FileRecordHeader* fileRecord = (NTFS_FileRecordHeader*)(mft->mft_buffer.data + NTFS_MFT_File_Record_Size * i);
records_processed += 1;
// A file record may be blank or unused; just skip it.
if (!fileRecord->inUse) continue;
NTFS_AttributeHeader* attribute = (NTFS_AttributeHeader*) ((u8*)fileRecord + fileRecord->firstAttributeOffset);
Assert(fileRecord->magic == 0x454C4946);
if (file_record_start->magic != 0x454C4946) {
log_error("[NTFS_read_drive_raw] Magic number check failed! This drive is likely corrupted!");
return false;
}
// inner loop
NTFS_File file = {};
while ((u8*)attribute - (u8*)fileRecord < NTFS_MFT_File_Record_Size) {
if (attribute->attributeType == 0x30) { // $FILE_NAME
NTFS_FileNameAttributeHeader* fileNameAttribute = (NTFS_FileNameAttributeHeader*)attribute;
if (fileNameAttribute->namespaceType != 2 && !fileNameAttribute->nonResident) {
file.parent_id = (u32)fileNameAttribute->parentRecordNumber; // truncate
file.record_id = fileRecord->recordNumber;
file.name_count = fileNameAttribute->fileNameLength;
file.name_data = (u16*)fileNameAttribute->fileName;
file.name_utf8 = wide_to_utf8(file.name_data, file.name_count);
file.file_modtime = (u64)fileNameAttribute->modificationTime;
file.is_directory = fileRecord->isDirectory;
// We need to get size from the data attribute
// #TODO: continue from here!
mft->file_count += 1;
}
}
if (attribute->attributeType == 0x80) { // $DATA
NTFS_NonResidentAttributeHeader* nonresident_attribute = (NTFS_NonResidentAttributeHeader*)attribute;
file.file_size = nonresident_attribute->attributeSize;
}
if (attribute->attributeType == 0xFFFFFFFF) {
break;
}
attribute = (NTFS_AttributeHeader*)((u8*)attribute + attribute->length);
} // while: inner loop
} // for i: 0..files_to_load-1
} // while: files_remaining
} // while: outer loop
log("Found %lld files (bytes_accessed: %s)", mft->file_count, format_bytes(mft->bytes_accessed).data);
CloseHandle(file_handle);
return nullptr;
}
struct NTFS_Enumeration_Task {
Arena* pool; // small arena just for results
OS_Drive* drive;
string drive_path; // The drive path we want to enumerate
};
s64 ntfs_enumeration_thread_proc (Thread* thread) {
auto task = thread_task(NTFS_Enumeration_Task);
log("(Thread index: %lld) Task pointer: %p", thread->index, task);
// NTFS_MFT_read_raw|#TODO:
Sleep(100); // #temp
return 0;
}