Musa-Cpp-Lib-V2/lib/OS/OS_Win32_NTFS.cpp

588 lines
19 KiB
C++

// Reference: https://handmade.network/forums/articles/t/7002-tutorial_parsing_the_mft
#pragma pack(push,1)
struct NTFS_BootSector {
u8 jump[3];
u8 name[8];
u16 bytesPerSector;
u8 sectorsPerCluster;
u16 reservedSectors;
u8 unused0[3];
u16 unused1;
u8 media;
u16 unused2;
u16 sectorsPerTrack;
u16 headsPerCylinder;
u32 hiddenSectors;
u32 unused3;
u32 unused4;
u64 totalSectors;
u64 mftStart;
u64 mftMirrorStart;
u32 clustersPerFileRecord;
u32 clustersPerIndexBlock;
u64 serialNumber;
u32 checksum;
u8 bootloader[426];
u16 bootSignature;
};
struct NTFS_FileRecordHeader {
u32 magic;
u16 updateSequenceOffset;
u16 updateSequenceSize;
u64 logSequence;
u16 sequenceNumber;
u16 hardLinkCount;
u16 firstAttributeOffset;
u16 inUse : 1;
u16 isDirectory : 1;
u32 usedSize;
u32 allocatedSize;
u64 fileReference;
u16 nextAttributeID;
u16 unused;
u32 recordNumber;
};
struct NTFS_AttributeHeader {
u32 attributeType;
u32 length;
u8 nonResident;
u8 nameLength;
u16 nameOffset;
u16 flags;
u16 attributeID;
};
struct NTFS_ResidentAttributeHeader : NTFS_AttributeHeader {
u32 attributeLength;
u16 attributeOffset;
u8 indexed;
u8 unused;
};
struct NTFS_FileNameAttributeHeader : NTFS_ResidentAttributeHeader {
u64 parentRecordNumber : 48;
u64 sequenceNumber : 16;
u64 creationTime;
u64 modificationTime;
u64 metadataModificationTime;
u64 readTime;
u64 allocatedSize;
u64 realSize;
u32 flags;
u32 repase;
u8 fileNameLength;
u8 namespaceType;
u16 fileName[1];
};
struct NTFS_NonResidentAttributeHeader : NTFS_AttributeHeader {
u64 firstCluster;
u64 lastCluster;
u16 dataRunsOffset;
u16 compressionUnit;
u32 unused;
u64 attributeAllocated; // allocatedSize
u64 attributeSize; // dataSize
u64 streamDataSize; // initializedSize
u64 compressedSize;
};
struct NTFS_RunHeader {
u8 lengthFieldBytes : 4;
u8 offsetFieldBytes : 4;
};
#pragma pack(pop)
struct NTFS_File {
u32 parent_id;
u32 record_id;
u16* name_data;
u64 file_modtime; // FILETIME
u64 file_size;
u8 name_count;
bool is_directory;
};
constexpr s64 NTFS_MFT_File_Record_Size = 1024; // File Entry Block
constexpr s64 NTFS_MFT_Files_Per_Buffer = 65536;
// #rename: should be NTFS_MFT_Internal
struct NTFS_MFT_Internal {
ArrayView<u8> mft_file;
ArrayView<u8> mft_buffer;
HANDLE handle;
s64 bytes_accessed;
s64 file_count;
};
void add_record (Dense_FS* dfs, NTFS_File* file) {
DFS_Array* array;
if (file->is_directory) {
array = &dfs->paths;
} else {
array = &dfs->files;
}
// UTF-8 (string) version
string s = wide_to_utf8(file->name_data, file->name_count);
u32 offset = AddString_NoCount(array->strings, s.data, (u8)s.count);
// I need the full path for this lol.
// file_length(s, (s64*)&file->file_size);
// UTF-16LE (wstring) version
// u32 offset = AddString_NoCount(array->wstrings, (u8*)file->name_data, file->name_count * sizeof(u16));
array_add(*array->parent_ids, file->parent_id);
array_add(*array->record_ids, file->record_id);
array_add(*array->lengths, file->name_count);
array_add(*array->offsets, (u32)offset);
array_add(*array->modtimes, file->file_modtime);
array_add(*array->sizes, file->file_size);
}
NTFS_MFT_Internal* new_ntfs_mft_internal () { // call with temp
NTFS_MFT_Internal* mft = New<NTFS_MFT_Internal>(true);
mft->mft_file = ArrayView<u8>(NTFS_MFT_File_Record_Size);
mft->mft_buffer = ArrayView<u8>(NTFS_MFT_File_Record_Size * NTFS_MFT_Files_Per_Buffer); // 64 MB
return mft;
}
// I need a better name for this!
bool NTFS_read_internal (NTFS_MFT_Internal* mft, void* buffer, u64 from, u64 count) {
s32 high = (s32)(from >> 32);
SetFilePointer(mft->handle, (s32)(from & 0xFFFFFFFF), (PLONG)&high, FILE_BEGIN);
u32 bytes_accessed_internal;
ReadFile(mft->handle, buffer, (DWORD)count, (LPDWORD)&bytes_accessed_internal, nullptr);
mft->bytes_accessed += bytes_accessed_internal;
Assert(bytes_accessed_internal == count);
return bytes_accessed_internal == count;
}
// #TODO: Release resources if we face an early return!
// #TODO: Maybe this doesn't need to return a value? Return an Error* instead.
Error* NTFS_MFT_read_raw (OS_Drive* drive) {
auto start_time = GetUnixTimestamp();
Assert(drive != nullptr);
if (drive == nullptr) { return nullptr; }
string drive_path = drive->label;
Assert(context_allocator() != temp()); // pointless as we're releasing temp end-of-scope
// Allocator primary_allocator = context_allocator();
auto_release_temp();
push_allocator(temp());
string drive_letter = Win32_drive_letter(drive_path);
string create_file_target = format_string("\\\\.\\%s:", drive_letter.data);
HANDLE file_handle = CreateFileA((LPCSTR)create_file_target.data, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, OPEN_EXISTING, 0, nullptr);
if (file_handle == INVALID_HANDLE_VALUE) {
log_error("CreateFileA failed on target %s", create_file_target.data);
os_log_error();
return nullptr;
}
NTFS_MFT_Internal* mft = new_ntfs_mft_internal();
mft->handle = file_handle;
// push_allocator(primary_allocator);
bool success;
NTFS_BootSector boot_sector;
success = NTFS_read_internal(mft, &boot_sector, 0, 512);
Assert(success);
u64 bytes_per_cluster = (boot_sector.bytesPerSector * boot_sector.sectorsPerCluster);
success = NTFS_read_internal(mft, mft->mft_file.data, boot_sector.mftStart * bytes_per_cluster, NTFS_MFT_File_Record_Size);
Assert(success);
NTFS_FileRecordHeader* file_record_start = (NTFS_FileRecordHeader*)mft->mft_file.data;
if (file_record_start->magic != 0x454C4946) {
log_error("[NTFS_read_drive_raw] Magic number check failed! This drive is not NTFS or is corrupted!");
return nullptr;
}
NTFS_AttributeHeader* attribute = (NTFS_AttributeHeader*)(mft->mft_file.data + file_record_start->firstAttributeOffset);
NTFS_NonResidentAttributeHeader* data_attribute = nullptr;
u64 approximate_record_count = 0;
while (true) {
if (attribute->attributeType == 0x80) {
data_attribute = (NTFS_NonResidentAttributeHeader*)attribute;
} else if (attribute->attributeType == 0xB0) {
approximate_record_count = ((NTFS_NonResidentAttributeHeader*)attribute)->attributeSize * 8;
} else if (attribute->attributeType == 0xFFFFFFFF) {
break;
}
attribute = (NTFS_AttributeHeader*) ((u8*) attribute + attribute->length);
} // while (true)
Assert(data_attribute != nullptr);
// #dense_fs_alloc
drive->data = New<Dense_FS>(GPAllocator());
initialize(drive->data, drive);
NTFS_RunHeader* dataRun = (NTFS_RunHeader*)((u8*)data_attribute + data_attribute->dataRunsOffset);
u64 cluster_number = 0, records_processed = 0;
// outer loop
while (((u8*)dataRun - (u8*)data_attribute) < data_attribute->length && dataRun->lengthFieldBytes) {
u64 length = 0, offset = 0;
for (u8 i = 0; i < dataRun->lengthFieldBytes; i += 1) {
length |= (u64)(((u8*)dataRun)[1 + i]) << (i * 8);
}
for (u8 i = 0; i < dataRun->offsetFieldBytes; i += 1) {
offset |= (u64)(((u8*)dataRun)[1 + dataRun->lengthFieldBytes + i]) << (i * 8);
}
if (offset & ((u64) 1 << (dataRun->offsetFieldBytes * 8 - 1))) {
for (s64 i = dataRun->offsetFieldBytes; i < 8; i += 1) {
offset |= ((u64)0xFF << (u64)(i * 8));
}
}
cluster_number += offset;
dataRun = (NTFS_RunHeader*)((u8*)dataRun + 1 + dataRun->lengthFieldBytes + dataRun->offsetFieldBytes);
u64 files_remaining = length * bytes_per_cluster / NTFS_MFT_File_Record_Size;
u64 position_in_block = 0;
while (files_remaining) { // enumerate files in chunks of 65536
u64 files_to_load = NTFS_MFT_Files_Per_Buffer;
if (files_remaining < NTFS_MFT_Files_Per_Buffer) {
files_to_load = files_remaining;
}
NTFS_read_internal(mft, mft->mft_buffer.data, cluster_number * bytes_per_cluster + position_in_block, files_to_load * NTFS_MFT_File_Record_Size);
position_in_block += files_to_load * NTFS_MFT_File_Record_Size;
files_remaining -= files_to_load;
for (s64 i = 0; i < (s64)files_to_load; i += 1) { // load
// Even on an SSD, processing the file records takes only a fraction of the time to read the data, so there's not much point in multithreading this:
NTFS_FileRecordHeader* fileRecord = (NTFS_FileRecordHeader*)(mft->mft_buffer.data + NTFS_MFT_File_Record_Size * i);
records_processed += 1;
// A file record may be blank or unused; just skip it.
if (!fileRecord->inUse) continue;
NTFS_AttributeHeader* attribute = (NTFS_AttributeHeader*)((u8*)fileRecord + fileRecord->firstAttributeOffset);
Assert(fileRecord->magic == 0x454C4946);
if (fileRecord->magic != 0x454C4946) {
log_error("[NTFS_read_drive_raw] Magic number check failed! This drive is likely corrupted!");
return nullptr;
}
// inner loop
NTFS_File file = {};
while ((u8*)attribute - (u8*)fileRecord < NTFS_MFT_File_Record_Size) {
if (attribute->attributeType == 0x30) { // $FILE_NAME
NTFS_FileNameAttributeHeader* fileNameAttribute = (NTFS_FileNameAttributeHeader*)attribute;
if (fileNameAttribute->namespaceType != 2 && !fileNameAttribute->nonResident) {
file.parent_id = (u32)fileNameAttribute->parentRecordNumber; // truncate
file.record_id = fileRecord->recordNumber;
file.is_directory = fileRecord->isDirectory;
file.name_count = fileNameAttribute->fileNameLength;
file.name_data = (u16*)fileNameAttribute->fileName;
file.file_modtime = (u64)fileNameAttribute->modificationTime;
}
}
/* #NOTE: File size doesn't work at all, so just use slower WinAPI for now :(
if (attribute->attributeType == 0x80 && attribute->nameLength == 0) { // $DATA
// #TODO: Check if file is compressed then access compressedSize
bool is_compressed = (attribute->flags & 0x0800) == 0x0800;
if ((bool)attribute->nonResident) {
NTFS_NonResidentAttributeHeader* nonresident_attribute = (NTFS_NonResidentAttributeHeader*)attribute;
if (is_compressed) {
file.file_size = nonresident_attribute->compressedSize;
} else {
file.file_size = nonresident_attribute->attributeSize; // keep bottom 48-bits
}
} else {
NTFS_ResidentAttributeHeader* res = (NTFS_ResidentAttributeHeader*)attribute;
file.file_size = res->attributeLength;
}
Assert(file.file_size < GB(64));
if (file.is_directory) { file.file_size = 0; }
}*/
if (attribute->attributeType == 0xFFFFFFFF) {
add_record(drive->data, &file);
// See Dense_FS drive->data
mft->file_count += 1;
break;
}
attribute = (NTFS_AttributeHeader*)((u8*)attribute + attribute->length);
} // while: inner loop
} // for i: 0..files_to_load-1
} // while: files_remaining
} // while: outer loop
CloseHandle(file_handle);
log_none("Found %lld files on drive %s (bytes_accessed: %s)", mft->file_count, drive_path.data, format_bytes(mft->bytes_accessed).data);
// #TODO: Generate parent_indices from record_id and parent_id
Timed_Block_Print("NTFS_MFT_read_raw: generate parent_indices");
// 1. Setup hash table:
s64 path_count = item_count(&drive->data->paths);
// #TODO: Before we start inserting stuff into the table we should ensure we have enough space
// for everything. See table_ensure_space ::
for (s64 i = 0; i < path_count; i += 1) {
table_set(&drive->data->path_table, (*drive->data->paths.record_ids)[i], (s32)i);
}
// Link directories:
array_resize(*drive->data->paths.parent_indices, path_count, /*init*/false);
s64 fail_count = 0;
for (s64 i = 0; i < path_count; i += 1) {
u32 parent_id = (*drive->data->paths.parent_ids)[i];
bool parent_exists = 0;
s32 previous_index = find_previous_index(drive->data, parent_id, &parent_exists);
fail_count += (s64)(!parent_exists);
if (!parent_exists) {
(*drive->data->paths.parent_indices)[i] = -1; // -1 if failed
} else {
(*drive->data->paths.parent_indices)[i] = previous_index;
}
}
// Link files:
s64 file_count = item_count(&drive->data->files);
array_resize(*drive->data->files.parent_indices, file_count, false);
for (s64 i = 0; i < file_count; i += 1) {
u32 parent_id = (*drive->data->files.parent_ids)[i];
bool parent_exists = 0;
s32 previous_index = find_previous_index(drive->data, parent_id, &parent_exists);
fail_count += (s64)(!parent_exists);
if (!parent_exists) {
(*drive->data->files.parent_indices)[i] = -1;
} else {
(*drive->data->files.parent_indices)[i] = previous_index;
}
}
// For all files and directories with a parent, find the parent(s) and get the file size!.
if (fail_count) {
log_warning("[%s] Failed to find parent for %lld items", drive_path.data, fail_count);
}
cleanup_after_enumeration(drive->data);
drive->file_count = mft->file_count;
drive->bytes_accessed = mft->bytes_accessed;
drive->time_to_enumerate = (f32)(GetUnixTimestamp() - start_time);
log_none("[%s] SUCCESS: total time to enumerate %.3f seconds", drive_path.data, drive->time_to_enumerate);
return NO_ERROR;
}
struct NTFS_Enumeration_Task {
Arena* pool; // small arena just for results
ArrayView<OS_Drive*> drives;
// Should be part of OS_Drive!
Error* error;
};
s64 ntfs_enumeration_thread_proc (Thread* thread) {
auto task = thread_task(NTFS_Enumeration_Task);
log("[ntfs_enumeration_thread_proc] (Thread index: %lld) Task pointer: %p", thread->index, task);
for_each(d, task->drives) {
task->error = NTFS_MFT_read_raw(task->drives[d]);
// What we actually want to do here is push all our errors to return to the main thread.
if (task->error) return 1;
}
return 0;
}
void os_clear_drive_data () {
ArrayView<OS_Drive*> drives = os_get_available_drives();
for_each(d, drives) {
OS_Drive* drive = drives[d];
release(drive->data);
drive->data = nullptr;
}
}
constexpr u32 Win32_Drive_Magic_Number = 0x41b5c7a9;
struct NTFS_Drive {
s32 radio_button;
s32 index;
};
// #TEMPORARY STRUCTURE FOR EXPERIMENTATION.
struct NTFS_Workspace {
Array<OS_Drive*> drives;
Array<NTFS_Drive> supplementary;
Arena* arena;
// s32 results_to_show;
};
global NTFS_Workspace ntfs_workspace;
bool ntfs_workspace_files_loaded () {
if (ntfs_workspace.drives.count == 0) return false;
return true;
}
bool Deserialize_Win32_Drives (string file_path) {
Timed_Block_Print("Deserialize_Win32_Drives");
push_allocator(temp());
auto_release_temp();
Deserializer deserializer = read_entire_file(file_path, true);
if (deserializer.count == 0) return false;
auto d = &deserializer;
auto drive_table = get_drive_table();
u32 magic_number; s32 drive_count;
Read(d, &magic_number);
Assert(magic_number == Win32_Drive_Magic_Number);
Read(d, &drive_count);
ntfs_workspace.arena = next_arena(Arena_Reserve::Size_64G);
push_arena(ntfs_workspace.arena);
Assert(ntfs_workspace.drives.count == 0);
array_resize(ntfs_workspace.supplementary, drive_count);
// ntfs_workspace.drives.allocator = GPAllocator();
log("[Deserialize_Win32_Drives] drive_count: %d", drive_count);
for (s32 i = 0; i < drive_count; i += 1) {
// look up disk based on drive_label
s32 index = 0;
Read(d, &index); Assert(i == index);
string drive_label = {};
ReadString16(d, drive_label);
OS_Drive** drive_ptr = array_add(ntfs_workspace.drives);
(*drive_ptr) = New<OS_Drive>();
OS_Drive* drive = *drive_ptr;
drive->label = copy_string(drive_label);
ReadString16(d, drive->volume_name);
Read(d, &drive->type);
Read(d, &drive->file_system);
Read(d, &drive->full_size);
Read(d, &drive->free_space);
Read(d, &drive->serial_number);
Read(d, &drive->max_component_length);
Read(d, &drive->file_system_flags);
drive->data = New<Dense_FS>();
Dense_FS_initialize(drive->data);
{ // (Dense_FS):paths
DFS_Array paths = drive->data->paths;
ReadToArenaArray(d, paths.strings);
// ReadToArenaArray(d, paths.wstrings);
ReadToArenaArray(d, paths.offsets);
ReadToArenaArray(d, paths.lengths);
ReadToArenaArray(d, paths.modtimes);
ReadToArenaArray(d, paths.sizes);
ReadToArenaArray(d, paths.parent_indices);
}
{ // (Dense_FS):files
DFS_Array files = drive->data->files;
ReadToArenaArray(d, files.strings);
// ReadToArenaArray(d, files.wstrings);
ReadToArenaArray(d, files.offsets);
ReadToArenaArray(d, files.lengths);
ReadToArenaArray(d, files.modtimes);
ReadToArenaArray(d, files.sizes);
ReadToArenaArray(d, files.parent_indices);
}
}
return true;
}
bool Serialize_Win32_Drives (ArrayView<Win32_Drive*> drives, string file_path) {
Timed_Block_Print("Serialize_Win32_Drives");
File f = file_open(file_path, true, false, true);
if (!file_is_valid(f)) return false;
Serializer* s = new_serializer(Arena_Reserve::Size_64G);
// #TODO #Serialization Unfortunately, there's a lot of needless copying here
// it would be a lot nicer if we could just write-file in place. idk how to do that though ;_;
// Serialize drive count;
Add(s, (u32)Win32_Drive_Magic_Number);
Add(s, (s32)drives.count);
for_each(d, drives) {
Win32_Drive* drive = drives[d];
// First, serialize the drive header:
Add(s, (s32)d);
AddString16(s, drive->label);
AddString16(s, drive->volume_name);
Add(s, drive->type);
Add(s, drive->file_system);
Add(s, drive->full_size);
Add(s, drive->free_space);
Add(s, drive->serial_number);
Add(s, drive->max_component_length);
Add(s, drive->file_system_flags);
// Write to file and reset
// (Dense_FS)
Assert(drive->data);
{ // (Dense_FS):paths
DFS_Array paths = drive->data->paths;
// Note these are all prefixed with their respective lengths.
AddArray(s, to_view(*paths.strings));
// AddArray(s, to_view(*paths.wstrings));
AddArray(s, to_view(*paths.offsets));
AddArray(s, to_view(*paths.lengths));
AddArray(s, to_view(*paths.modtimes));
AddArray(s, to_view(*paths.sizes));
AddArray(s, to_view(*paths.parent_indices));
}
{ // (Dense_FS):files
DFS_Array files = drive->data->files;
AddArray(s, to_view(*files.strings));
// AddArray(s, to_view(*files.wstrings));
AddArray(s, to_view(*files.offsets));
AddArray(s, to_view(*files.lengths));
AddArray(s, to_view(*files.modtimes));
AddArray(s, to_view(*files.sizes));
AddArray(s, to_view(*files.parent_indices));
}
file_write(&f, to_view(*s));
reset_serializer(s);
}
file_close(&f);
free_serializer(s);
return true;
}