diff --git a/build.jai b/build.jai index 9dd8a16..0c0b9ad 100644 --- a/build.jai +++ b/build.jai @@ -2,7 +2,7 @@ // To generate the intermediate to see how many lines are being compiled, in x64 Native Tools Command Prompt for VS2022 or whatever // cl /P /EP exe_main.cpp // tokei exe_main.i -VERSION :: "0.1a"; +VERSION :: "0.2"; #run,stallable build_cpp_project(); @@ -101,7 +101,7 @@ os_target: Operating_System_Tag = .WINDOWS; generate_meta_file :: (debug: bool) { sb: String_Builder; append(*sb, "#pragma once\n\n"); - print_to_builder(*sb, "constexpr const char* MUSA_LIB_VERSION = \"%\";\n", VERSION); + print_to_builder(*sb, "const char* MUSA_LIB_VERSION = \"%\";\n", VERSION); print_to_builder(*sb, "#define BUILD_DEBUG %\n", cast(s32)debug); print_to_builder(*sb, "#define OS_WINDOWS %\n", ifx os_target == .WINDOWS then 1 else 0); diff --git a/exe_main.cpp b/exe_main.cpp index 46540d1..909adff 100644 --- a/exe_main.cpp +++ b/exe_main.cpp @@ -23,6 +23,7 @@ #include "lib/third_party/dear-imgui/imgui_impl_dx11.h" #include "src/ImGui_Supplementary.cpp" + #include "src/String_Analysis.cpp" #include "src/explorer_main.cpp" #endif diff --git a/lib/Base/Arena.cpp b/lib/Base/Arena.cpp index 95f66ac..fa57883 100644 --- a/lib/Base/Arena.cpp +++ b/lib/Base/Arena.cpp @@ -254,3 +254,59 @@ struct Auto_Release { } } }; + +// #FixedArena procedures: +FixedArena* bootstrap_fixed_arena (s64 size, Allocator backing_allocator) { + push_allocator(backing_allocator); + Assert(size >= sizeof(FixedArena)); + ArrayView memory = ArrayView(size); + FixedArena* result = (FixedArena*)memory.data; + result->memory = memory; + result->cursor = sizeof(FixedArena); + result->allocator = backing_allocator; + return result; +} + +force_inline void destroy_arena (FixedArena* arena) { + Delete(arena->allocator, arena); +} + +Allocator allocator (FixedArena* arena) { + return { fixed_arena_allocator_proc, arena }; +} + +void* fixed_arena_allocator_proc (Allocator_Mode mode, s64 requested_size, s64 old_size, void* old_memory, void* allocator_data) { + constexpr s64 DEFAULT_ALIGNMENT = 16; // maybe make this modifiable as part of FixedArena struct? + FixedArena* arena = (FixedArena*)allocator_data; + Assert(arena != nullptr); + + switch (mode) { + case Allocator_Mode::ALLOCATE: { + arena->cursor = Align(arena->cursor, DEFAULT_ALIGNMENT); + void* result = &arena->memory[arena->cursor]; + arena->cursor += requested_size; + Assert(arena->cursor <= arena->memory.count); + return result; + } break; + case Allocator_Mode::RESIZE: { + arena->cursor = Align(arena->cursor, DEFAULT_ALIGNMENT); + void* result = &arena->memory[arena->cursor]; + arena->cursor += requested_size; + Assert(arena->cursor <= arena->memory.count); + s64 size_to_copy = old_size < requested_size ? old_size : requested_size; + if (result && size_to_copy) { memcpy(result, old_memory, size_to_copy); } + return result; + } break; + case Allocator_Mode::DEALLOCATE: { + return nullptr; // unused + } break; + case Allocator_Mode::DETAILS: { + if (allocator_data == nullptr) { + return "fixed_arena_allocator_proc: data pointer is null!"; + } + return "fixed_arena_allocator_proc: with valid data"; + } break; + } + + return nullptr; +} \ No newline at end of file diff --git a/lib/Base/Arena.h b/lib/Base/Arena.h index 7eca7e0..dcbd3d2 100644 --- a/lib/Base/Arena.h +++ b/lib/Base/Arena.h @@ -138,4 +138,20 @@ struct Push_Alignment { // #rename to Arena_Push_Alignment? // Do this later: // arena_lock_pages :: (using arena: *Arena, start_address: *u8, byte_count: s64) -// arena_unlock_pages :: (using arena: *Arena, start_address: *u8, byte_count: s64) \ No newline at end of file +// arena_unlock_pages :: (using arena: *Arena, start_address: *u8, byte_count: s64) + +// #FixedArena is a super simple arena where you allocate a fixed block up front (fully committed), +// and use it as-is. +// #NOTE: we can save space be always backing with a known allocator (e.g. GPAllocator()). +struct FixedArena { + ArrayView memory; + s64 cursor; + Allocator allocator; +}; + +void* fixed_arena_allocator_proc (Allocator_Mode mode, s64 requested_size, s64 old_size, void* old_memory, void* allocator_data); + +// #FixedArena API +FixedArena* bootstrap_fixed_arena (s64 size, Allocator backing_allocator); +force_inline void destroy_arena (FixedArena* arena); +Allocator allocator (FixedArena* arena); diff --git a/lib/Base/Arena_Free_List.cpp b/lib/Base/Arena_Free_List.cpp index e3b15b0..0cf87d1 100644 --- a/lib/Base/Arena_Free_List.cpp +++ b/lib/Base/Arena_Free_List.cpp @@ -86,6 +86,17 @@ void release_arena (Arena* arena, bool delete_extra_pages) { // } } +s64 bytes_in_use (ArrayView arenas) { + // does not include overhead from committed pages! + s64 sum = 0; + + for (s64 i = 0; i < arenas.count; i += 1) { + sum += arena_usage_bytes(arenas[i]); + } + + return sum; +} + s64 committed_bytes (ArrayView arenas) { s64 sum = 0; diff --git a/lib/Base/Array.h b/lib/Base/Array.h index 8b89b89..4d4b34c 100644 --- a/lib/Base/Array.h +++ b/lib/Base/Array.h @@ -182,8 +182,6 @@ void array_add (Array& src, T new_item) { src.data[src.count] = new_item; src.count += 1; - // auto dst_ptr = &src.data[src.count-1]; - // memcpy(dst_ptr, &new_item, sizeof(T)); } template @@ -194,6 +192,16 @@ s64 array_find (Array& src, T item) { return -1; } +template +bool array_add_if_unique (Array& src, T new_item) { + if (array_find(src, new_item) == -1) { + array_add(src, new_item); + return true; + } + + return false; +} + template void array_ordered_remove_by_index (Array& src, s64 index) { Assert(index >= 0); Assert(index < src.count); diff --git a/lib/Base/Base.h b/lib/Base/Base.h index 9489411..ec4a377 100644 --- a/lib/Base/Base.h +++ b/lib/Base/Base.h @@ -3,6 +3,9 @@ #define LANG_CPP 1 #define BUILD_CONSOLE_INTERFACE BUILD_DEBUG +#include // vsnprintf +#include // va_list, ... + #if ARCH_CPU_X64 #include "CPU_X64.cpp" #define PLATFORM_MEMORY_PAGE_SIZE 4096 @@ -13,15 +16,12 @@ #error "CPU not supported (yet)!" #endif -#include // vsnprintf -#include // va_list, ... - - #if OS_WINDOWS #define WIN32_LEAN_AND_MEAN #include #undef ERROR // why... #undef NO_ERROR // ugh... + #include #else #error "This configuration is NOT supported. Only Windows with MSVC is currently supported." #endif @@ -185,7 +185,7 @@ force_inline s64 Next_Power_Of_Two(s64 v) { #define auto_release(x) \ Auto_Release Concat(_auto_release_guard_, __LINE__)(x) #define auto_release_temp() \ - auto_release(thread_context()->temp); + auto_release(thread_context()->temp) #define thread_context() thread_local_context #define temp() allocator(thread_context()->temp) diff --git a/lib/Base/Base_Thread_Context.cpp b/lib/Base/Base_Thread_Context.cpp index baccae4..5fda429 100644 --- a/lib/Base/Base_Thread_Context.cpp +++ b/lib/Base/Base_Thread_Context.cpp @@ -63,4 +63,4 @@ void temp_reset_keeping_memory() { void temp_reset() { // alias: reset_temporary_storage. Thread_Context* context = thread_context(); arena_reset(context->temp, true); -} \ No newline at end of file +} diff --git a/lib/Base/CPU_X64.cpp b/lib/Base/CPU_X64.cpp index 6a09f2c..63d8c2b 100644 --- a/lib/Base/CPU_X64.cpp +++ b/lib/Base/CPU_X64.cpp @@ -1,11 +1,21 @@ #if OS_WINDOWS #include +int cpu_max_feature_leaf () { + int cpuInfo[4]; + + __cpuid(cpuInfo, 0); + int maxLeaf = cpuInfo[0]; + + return maxLeaf; +} + int CPU_Base_Frequency() { int cpuInfo[4] = {0}; // Call CPUID with EAX = 0x16 (Base CPU Frequency) - __cpuid(cpuInfo, 0x16); + if (cpu_max_feature_leaf() >= 0x16) + __cpuid(cpuInfo, 0x16); return cpuInfo[0]; } diff --git a/lib/Base/ErrorType.cpp b/lib/Base/ErrorType.cpp index a94fe18..47c3d68 100644 --- a/lib/Base/ErrorType.cpp +++ b/lib/Base/ErrorType.cpp @@ -4,7 +4,8 @@ enum class ErrorClass: s32 { NONE = 0, // should not be used, just to avoid a default value being assigned. WARNING = 1, ERROR = 2, - FATAL = 3 + FATAL = 3, + TODO = 4, }; // #downcasts to string @@ -38,6 +39,10 @@ char* error_severity (ErrorClass severity) { case ErrorClass::FATAL: { return "[FATAL ERROR]"; } break; + case ErrorClass::TODO: { + return "[TODO]"; + } break; + } return ""; } @@ -48,6 +53,9 @@ string to_string (Error* error) { return { error->count, error->data }; } +#define log_todo(fmt, ...) \ + Log_Error_2(__FILE__, __FUNCTION__, __LINE__, ErrorClass::TODO, fmt, ##__VA_ARGS__) + #define log_fatal_error(fmt, ...) \ Log_Error_2(__FILE__, __FUNCTION__, __LINE__, ErrorClass::FATAL, fmt, ##__VA_ARGS__) @@ -136,6 +144,7 @@ void push_error (Thread_Context* tctx, Error* new_error) { tctx->current_error = new_error; switch (new_error->severity) { + case ErrorClass::TODO: case ErrorClass::NONE: case ErrorClass::WARNING: { print(to_string(new_error)); diff --git a/lib/Base/Expandable_Arena.cpp b/lib/Base/Expandable_Arena.cpp index dbff1ab..5f5702f 100644 --- a/lib/Base/Expandable_Arena.cpp +++ b/lib/Base/Expandable_Arena.cpp @@ -107,8 +107,15 @@ Allocator allocator (ExpandableArena* arena_ex) { } // #TODO: currently this keeps the final arena's memory. Fix this! +// This is not implemented correctly! void arena_reset_to (ExpandableArena* arena_ex, Arena* last_arena, u8* starting_point) { // going backwards from end of arena list + + if (!arena_ex->next_arenas.count) { + arena_ex->current_point = starting_point; + return; + } + // for (s64 i = arena_ex->next_arenas.count-1; i >= 0; i -= 1) { for_each_reverse(i, arena_ex->next_arenas) { Arena* arena = arena_ex->next_arenas[i]; diff --git a/lib/Base/RadixSort.cpp b/lib/Base/RadixSort.cpp new file mode 100644 index 0000000..1322246 --- /dev/null +++ b/lib/Base/RadixSort.cpp @@ -0,0 +1,128 @@ +struct RadixSort { + ArrayView ranks; + ArrayView ranks2; + Allocator allocator; + bool valid_ranks; +}; + +void radix_sort_init (RadixSort* r, u32 items_to_allocate) { + if (r->allocator.proc == nullptr) { + r->allocator = context_allocator(); + } + push_allocator(r->allocator); + r->ranks = ArrayView(items_to_allocate); + r->ranks2 = ArrayView(items_to_allocate); + r->valid_ranks = false; +} + +void radix_sort_free (RadixSort* r) { + Assert(r->allocator.proc != nullptr); + push_allocator(r->allocator); + array_free(r->ranks); + array_free(r->ranks2); +} + +// RadixSort provides an array of indices in sorted order. +u32 rank (RadixSort* r, s64 i) { + Assert(r != nullptr); +#if ARRAY_ENABLE_BOUNDS_CHECKING + if (i < 0 || i >= r->ranks.count) { debug_break(); /*INDEX OOB*/ } +#endif + return r->ranks[i]; +} + +template void create_histograms (RadixSort* r, T* buffer, u32 count, u32* histogram) { + constexpr u32 bucket_count = sizeof(T); + // Init bucket pointers: + u32* h[bucket_count] = {}; + for (u32 i = 0; i < bucket_count; i += 1) { + h[i] = histogram + (256 * i); + } + + // Build histogram: + u8* p = (u8*)buffer; + u8* pe = (p + count * sizeof(T)); + + while (p != pe) { + h[0][*p] += 1; p += 1; + if (bucket_count > 1) { // how to make compile time if? + h[1][*p] += 1; p += 1; + + if (bucket_count > 2) { + h[2][*p] += 1; p += 1; + h[3][*p] += 1; p += 1; + + if (bucket_count == 8) { + h[4][*p] += 1; p += 1; + h[5][*p] += 1; p += 1; + h[6][*p] += 1; p += 1; + h[7][*p] += 1; p += 1; + } + } + } + } +} + +template void radix_sort (RadixSort* r, T* input, u32 count) { + constexpr u32 T_SIZE = sizeof(T); + // Allocate histograms & offsets on the stack: + u32 histogram [256 * T_SIZE] = {}; + u32* link [256]; + + create_histograms(r, input, count, histogram); + + // Radix sort, j is the pass number, (0 = LSB, P = MSB) + for (u32 j = 0; j < T_SIZE; j += 1) { + u32* h = &histogram[j * 256]; + + u8* input_bytes = (u8*)input; + input_bytes += j; // Assumes little endian! + + if (h[input_bytes[0]] == count) { + continue; + } + + // Create offsets + link[0] = r->ranks2.data; + for (u32 i = 1; i < 256; i += 1) { // 1..255 + link[i] = link[i-1] + h[i-1]; + } + + // Perform Radix Sort + if (!r->valid_ranks) { + for (u32 i = 0; i < count; i += 1) { + *link[input_bytes[i*T_SIZE]] = i; + link[input_bytes[i*T_SIZE]] += 1; + } + r->valid_ranks = true; + } else { + for (u32 i = 0; i < count; i += 1) { + u32 idx = r->ranks[i]; + *link[input_bytes[idx*T_SIZE]] = idx; + link[input_bytes[idx*T_SIZE]] += 1; + } + } + + // Swap pointers for next pass. Valid indices - the most recent ones - are in ranks after the swap. + ArrayView ranks2_temp = r->ranks2; + r->ranks2 = r->ranks; + r->ranks = ranks2_temp; + } + + // All values were equal; generate linear ranks + if (!r->valid_ranks) { + for (u32 i = 0; i < count; i += 1) { + r->ranks[i] = i; + r->valid_ranks = true; + } + } +} + +// NOTE: For a small number of elements it's more efficient to use insertion sort +void radix_sort_u64 (RadixSort* r, u64* input, u32 count) { + if (input == nullptr || count == 0) return; + if (r->ranks.count == 0) { + radix_sort_init(r, count); + } + radix_sort(r, input, count); +} \ No newline at end of file diff --git a/lib/Base/Serializer.h b/lib/Base/Serializer.h index 3ef80f5..9cfd8ed 100644 --- a/lib/Base/Serializer.h +++ b/lib/Base/Serializer.h @@ -146,4 +146,19 @@ force_inline void ReadString16 (Deserializer* ds, string& s) { // #no_alloc ReadStringView(ds, s, (s64)str_len); } -// ReadString_MakeCopy... +// This is specialized for filesystem storage of strings. +force_inline u32 AddString_NoCount (Serializer* serializer, u8* data, s16 count) { + u32 original_count = (u32)serializer->count; + u8* current_point = &serializer->data[original_count]; + + s64 final_count = serializer->allocated + (count * sizeof(u8)); + + if (serializer->allocated < final_count) { + array_reserve(*serializer, final_count); + } + + memcpy(current_point, data, count * sizeof(u8)); + serializer->count += count * sizeof(u8); + + return original_count; +} \ No newline at end of file diff --git a/lib/Base/Threads.cpp b/lib/Base/Threads.cpp index 253d9c7..df83fe5 100644 --- a/lib/Base/Threads.cpp +++ b/lib/Base/Threads.cpp @@ -70,3 +70,14 @@ struct Thread_Group { bool started = false; bool should_exit = false; }; + +// This might be too slow. +s32 get_thread_index (Thread_Group* group, s32 thread_index) { + for_each(w, group->worker_info) { + if (group->worker_info[w].thread.index == thread_index) { + return (s32)w; // zero-indexed to thread group + } + } + + return -1; +} \ No newline at end of file diff --git a/lib/Base/Unicode.cpp b/lib/Base/Unicode.cpp new file mode 100644 index 0000000..c9b5ffe --- /dev/null +++ b/lib/Base/Unicode.cpp @@ -0,0 +1,52 @@ +constexpr u8 trailing_bytes_for_utf8[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5}; +constexpr u32 UNI_REPLACEMENT_CHAR = 0x0000FFFD; +constexpr u8 utf8_inital_byte_mask[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 }; +constexpr u32 UNI_MAX_UTF32 = 0x7FFFFFFF; + +bool character_utf8_to_utf32 (u8* data, s64 source_length, u32* utf32, s64* source_length_out) { + u8 first_character = data[0]; + s32 continuation_bytes = trailing_bytes_for_utf8[first_character]; + + if ((continuation_bytes + 1) > source_length) { + (*utf32) = UNI_REPLACEMENT_CHAR; + (*source_length_out) = source_length; + return false; + } + + u32 ch = data[0] & utf8_inital_byte_mask[continuation_bytes]; + + for (s64 i = 1; i < continuation_bytes + 1; i += 1) { + ch = ch << 6; + //if strict ... + ch |= data[i] & 0x3F; + } + + // #if strict... {} + (*utf32) = ch; + (*source_length_out) = continuation_bytes + 1; + + if (ch > UNI_MAX_UTF32) { + (*utf32) = UNI_REPLACEMENT_CHAR; + } + + return true; +} + +bool next_utf8_to_utf32 (string& s, u32* utf32_char_out) { + s64 codepoint_source_length; + bool success = character_utf8_to_utf32(s.data, s.count, utf32_char_out, &codepoint_source_length); + + s.data += codepoint_source_length; + s.count -= codepoint_source_length; + + Assert(s.count >= 0); + return success; +} diff --git a/lib/Base/run_tests.cpp b/lib/Base/run_tests.cpp index cb0124c..95301c3 100644 --- a/lib/Base/run_tests.cpp +++ b/lib/Base/run_tests.cpp @@ -1,5 +1,5 @@ void run_pre_setup_tests() { - // #no_context: context will not be initialized at this point. + // #no_context: context will not be initialized at this point, so log() doesn't work printf("Running pre-setup tests...\n"); printf("\nFinished running pre-setup tests...\n"); } diff --git a/lib/OS/OS_Filesystem.cpp b/lib/OS/OS_Filesystem.cpp index 940d91a..a33f838 100644 --- a/lib/OS/OS_Filesystem.cpp +++ b/lib/OS/OS_Filesystem.cpp @@ -21,23 +21,6 @@ // if we need ordered insertions and deletes. // -// Returns offset -force_inline u32 AddString_NoCount (Serializer* serializer, u8* data, u8 count) { // #TODO: , bool null_terminate=false - u32 original_count = (u32)serializer->count; - u8* current_point = &serializer->data[original_count]; - - s64 final_count = serializer->allocated + (count * sizeof(u8)); - - if (serializer->allocated < final_count) { - array_reserve(*serializer, final_count); - } - - memcpy(current_point, data, count * sizeof(u8)); - serializer->count += count * sizeof(u8); - - return original_count; -} - constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22 // template diff --git a/lib/OS/OS_Win32.cpp b/lib/OS/OS_Win32.cpp index df937e9..52e1726 100644 --- a/lib/OS/OS_Win32.cpp +++ b/lib/OS/OS_Win32.cpp @@ -1,10 +1,10 @@ // #TODO: #OS_Win32 // [ ] #Exception handling code in `Win32_Exception_Filter` // [~] #Thread cleanup: in `thread_deinit` is there any requirement to cleanup child threads? - // - I think no? Threads should handle their own lifetimes, and the parent threads should ensure child threads are complete + // - I think: no? Threads should handle their own lifetimes, and the parent threads should ensure child threads are complete // before terminating. + // Or we can move child threads up to the parent? -#if OS_WINDOWS constexpr s64 FILETIME_TO_UNIX = 116444736000000000i64; f64 GetUnixTimestamp () { FILETIME fileTime; @@ -31,7 +31,20 @@ u64 FILETIME_to_ticks (FILETIME fileTime) { return ticks; } -#endif +string format_time_datetime (FILETIME ft) { + SYSTEMTIME stUTC, st; + FileTimeToSystemTime(&ft, &stUTC); + SystemTimeToTzSpecificLocalTime(nullptr, &stUTC, &st); + + return format_string("%04u-%02u-%02u %02u:%02u:%02u.%03u", + st.wYear, + st.wMonth, + st.wDay, + st.wHour, + st.wMinute, + st.wSecond, + st.wMilliseconds); +} struct OS_System_Info { // #cpuid @@ -240,7 +253,6 @@ internal void Win32_Entry_Point (int argc, WCHAR **argv) { // [ ] Get Working directory (info->working_path) // [ ] GetEnvironmentStringsW temp_reset(); - printf("Hello there!\n\n"); } C_LINKAGE DWORD OS_Windows_Thread_Entry_Point (void* parameter) { @@ -1117,323 +1129,361 @@ string os_get_machine_name () { // [ ] get_mouse_pointer_position // [ ] ... What APIs do I need for Keyboard -struct Enumeration_Work { - string first_directory; - s32 parent_index; - - Arena* thread_arena; // pointer to relevant tctx->arena - // Directories - ArenaArray* d_offsets; - ArenaArray* d_lengths; - ArenaArray* d_parent_indices; - ArenaArray* d_sizes; - ArenaArray* d_modtime; - // Files - ArenaArray* offsets; - ArenaArray* lengths; - ArenaArray* parent_indices; - ArenaArray* sizes; - ArenaArray* modtime; -}; +// #FileEnumerationST -struct Files_Combined_Results { - // ArenaArray full_path; - ArenaArray* name; - ArenaArray* parent_indices; +struct STFE_Results { + Serializer* strings; // Serializer? + ArenaArray* offsets; + ArenaArray* lengths; ArenaArray* sizes; - ArenaArray* modtime; + ArenaArray* modtimes; }; -struct Drive_Enumeration { // master thread struct - Arena* arena; +void init (STFE_Results* results) { + results->strings = (Serializer*)arena_array_new (1024*1024*4*16, Arena_Reserve::Size_2G); + results->offsets = arena_array_new(1024*1024*4, Arena_Reserve::Size_2G); + results->lengths = arena_array_new(1024*1024*4, Arena_Reserve::Size_2G); + results->sizes = arena_array_new(1024*1024*4, Arena_Reserve::Size_2G); + results->modtimes = arena_array_new(1024*1024*4, Arena_Reserve::Size_2G); +} + +void STFE_Results_Free (STFE_Results* results) { + arena_array_free(*results->strings); + arena_array_free(*results->offsets); + arena_array_free(*results->lengths); + arena_array_free(*results->sizes); + arena_array_free(*results->modtimes); +} + +struct ST_File_Enumeration { // global state ArrayView drives; Thread* master_thread; - s32 thread_count; + STFE_Results dirs; + STFE_Results files; + s32 directories_enumerated; // going sequentially bool thread_started; bool thread_completed; - Files_Combined_Results paths; - Files_Combined_Results files; - - s32 work_added = 0; - s32 work_completed = 0; + f64 start_time; + f64 end_time; }; -void push_root (Drive_Enumeration* de, string label, s32 index) { - array_add(*de->paths.name, label); - array_add(*de->paths.parent_indices, index); - array_add(*de->paths.sizes, (u64)0); - array_add(*de->paths.modtime, (u64)0); -} +global ST_File_Enumeration* stfe; -global Drive_Enumeration* drive_enumeration; - -string directory_get_full_path (Drive_Enumeration* de, s64 index) { - push_allocator(GPAllocator()); // to copy from String_Builder - Files_Combined_Results* f = &de->paths; - string dir_name = (*f->name)[index]; - s32 parent_index = (*f->parent_indices)[index]; - s32 next_parent = (*f->parent_indices)[parent_index]; - - Array paths; - paths.allocator = temp(); - - array_add(paths, (*f->name)[parent_index]); - - while (parent_index != next_parent) { - parent_index = next_parent; - next_parent = (*f->parent_indices)[parent_index]; - array_add(paths, (*f->name)[parent_index]); - } - - // while (parent_index > -1) { // should be while(true) - // - // s32 next_parent = (*f->parent_indices)[parent_index]; - // if (parent_index == next_parent) break; - // s32 parent_index = next_parent; - // } - - // go in reverse order and add together string - String_Builder* sb = new_string_builder(Arena_Reserve::Size_64K); - for (s64 i = paths.count-1; i >= 0; i -= 1) { - append(sb, paths[i]); - append(sb, "\\"); - } - append(sb, dir_name); - - return builder_to_string(sb); -} - -void update_results (Drive_Enumeration* de, Enumeration_Work* ew) { - // merge results and release resources! - // unfortunately this is a LOT of copying! - for_each(i, (*ew->d_offsets)) { - u8* string_ptr = (ew->thread_arena->memory_base + (*ew->d_offsets)[i]); - string name = {(*ew->d_lengths)[i], string_ptr}; - array_add(*de->paths.name, name); - array_add(*de->paths.parent_indices, (*ew->d_parent_indices)[i]); - array_add(*de->paths.sizes, (*ew->d_sizes)[i]); - array_add(*de->paths.modtime, (*ew->d_modtime)[i]); - } - for_each(i, (*ew->offsets)) { - u8* string_ptr = (ew->thread_arena->memory_base + (*ew->offsets)[i]); - string name = {(*ew->lengths)[i], string_ptr}; - array_add(*de->files.name, name); - array_add(*de->files.parent_indices, (*ew->parent_indices)[i]); - array_add(*de->files.sizes, (*ew->sizes)[i]); - array_add(*de->files.modtime, (*ew->modtime)[i]); - } -} - -void add_record (Enumeration_Work* ew, WIN32_FIND_DATAW* find_data, string name, s32 parent_index=-1) { - u32 offset = (u32)(name.data - ew->thread_arena->memory_base); - bool is_directory = (find_data->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; - u64 size = ((u64)find_data->nFileSizeHigh << 32) | ((u64)find_data->nFileSizeLow & 0xFFFFFFFF); - - if (is_directory) { - array_add((*ew->d_offsets), offset); - array_add((*ew->d_lengths), (s16)name.count); - array_add((*ew->d_parent_indices), parent_index); // #TODO #parent_index - array_add((*ew->d_sizes), size); - array_add((*ew->d_modtime), FILETIME_to_ticks(find_data->ftLastWriteTime)); - } else { - array_add((*ew->offsets), offset); - array_add((*ew->lengths), (s16)name.count); - array_add((*ew->parent_indices), parent_index); // #TODO #parent_index - array_add((*ew->sizes), size); - array_add((*ew->modtime), FILETIME_to_ticks(find_data->ftLastWriteTime)); - } -} - -Thread_Continue_Status file_enumeration_thread_group_proc (Thread_Group* group, Thread* thread, void* work) { - // 1. setup userdata as an Arena*: - Arena* result_arena; - if (!thread->context->userdata) { - result_arena = next_arena(Arena_Reserve::Size_64G); - thread->context->userdata = result_arena; - } else { - result_arena = (Arena*)thread->context->userdata; - } - - Enumeration_Work* enum_work = (Enumeration_Work*)work; - enum_work->thread_arena = (Arena*)thread->context->userdata; - - enum_work->d_offsets = arena_array_new(4096, Arena_Reserve::Size_2M); - enum_work->d_lengths = arena_array_new(4096, Arena_Reserve::Size_2M); - enum_work->d_parent_indices = arena_array_new(4096, Arena_Reserve::Size_2M); - enum_work->d_sizes = arena_array_new(4096, Arena_Reserve::Size_2M); - enum_work->d_modtime = arena_array_new(4096, Arena_Reserve::Size_2M); - - enum_work->offsets = arena_array_new(4096, Arena_Reserve::Size_2M); - enum_work->lengths = arena_array_new(4096, Arena_Reserve::Size_2M); - enum_work->parent_indices = arena_array_new(4096, Arena_Reserve::Size_2M); - enum_work->sizes = arena_array_new(4096, Arena_Reserve::Size_2M); - enum_work->modtime = arena_array_new(4096, Arena_Reserve::Size_2M); - - // Validate thread context? - push_allocator(temp()); - auto_release_temp(); - - // log("file_enumeration_thread_group_proc, thread index: %d", thread->index); - - // MAKE SURE PATH IS NULL TERMINATED! - wstring wildcard_name = utf8_to_wide(format_string("%s\\*", enum_work->first_directory.data)); // #temp - WIN32_FIND_DATAW find_data; - HANDLE h = FindFirstFileExW((LPCWSTR)wildcard_name.data, FindExInfoBasic, &find_data, - FindExSearchNameMatch, nullptr, FIND_FIRST_EX_LARGE_FETCH); - if (h == INVALID_HANDLE_VALUE) { - return Thread_Continue_Status::CONTINUE; - } - - while (true) { - push_arena(result_arena); - string name = wide_to_utf8((u16*)find_data.cFileName); // #NOT_TEMP - bool should_continue = (name.count == 0 || name == "." || name == ".."); - if (should_continue) { - bool success = FindNextFileW(h, &find_data); - if (!success) - break; - continue; - } - - add_record(enum_work, &find_data, name, enum_work->parent_index); - - bool success = FindNextFileW(h, &find_data); - if (!success) break; - } - - FindClose(h); - return Thread_Continue_Status::CONTINUE; -} - -s64 multithreaded_file_enumeration_master_proc (Thread* thread) { - auto task = thread_task(Drive_Enumeration); - - push_arena(task->arena); - - Thread_Group* file_enum_thread_group = New(); - - s32 thread_count = os_cpu_physical_core_count(); - +void free_stfe_and_reset () { push_allocator(GPAllocator()); - thread_group_init(file_enum_thread_group, thread_count, file_enumeration_thread_group_proc, true); + array_free(stfe->drives); + + internal_free(stfe->master_thread); + + STFE_Results_Free(&stfe->dirs); + STFE_Results_Free(&stfe->files); + + internal_free(stfe); + stfe = nullptr; // final step +} + +string add_record (ST_File_Enumeration* stfe, string full_path, bool is_directory, WIN32_FIND_DATAW* find_data) { + // return the string copy! + if (is_directory) { + STFE_Results* r = &stfe->dirs; + u32 offset = AddString_NoCount(r->strings, full_path.data, (s16)full_path.count); + array_add((*r->offsets), offset); + array_add((*r->lengths), (s16)full_path.count); + // No size for directories. + u64 modtime = FILETIME_to_ticks(find_data->ftLastWriteTime); + array_add((*r->modtimes), modtime); + + string path_copy = {full_path.count, &r->strings->data[offset]}; + return path_copy; + } else { + STFE_Results* r = &stfe->files; + u32 offset = AddString_NoCount(r->strings, full_path.data, (s16)full_path.count); + array_add((*r->offsets), offset); + array_add((*r->lengths), (s16)full_path.count); + + u64 size = ((u64)find_data->nFileSizeHigh << 32) | ((u64)find_data->nFileSizeLow & 0xFFFFFFFF); + u64 modtime = FILETIME_to_ticks(find_data->ftLastWriteTime); + array_add((*r->sizes), size); + array_add((*r->modtimes), modtime); + + string path_copy = {full_path.count, &r->strings->data[offset]}; + return path_copy; + } + + Assert(false); + return {}; +} + +s32 count_paths (ST_File_Enumeration* stfe) { + STFE_Results* r = &stfe->dirs; + return (s32)r->offsets->count; +} +s32 count_files (ST_File_Enumeration* stfe) { + STFE_Results* r = &stfe->files; + return (s32)r->offsets->count; +} + +// #UI #TEMP - just for visualization! +string get_file_copy (ST_File_Enumeration* stfe, s64 index) { + STFE_Results* r = &stfe->files; + Assert(index >= 0 && index < count_files(stfe)); + s64 strlength = (*r->lengths)[index]; + u32 offset = (*r->offsets)[index]; + u8* string_ptr = &r->strings->data[offset]; + string file = {strlength, string_ptr}; + return copy_string(file); +} + +string get_file_string_view (ST_File_Enumeration* stfe, s64 index) { + STFE_Results* r = &stfe->files; + s64 strlength = (*r->lengths)[index]; + u32 offset = (*r->offsets)[index]; + u8* string_ptr = &r->strings->data[offset]; + string file = {strlength, string_ptr}; + return file; +} + +string get_path_copy (ST_File_Enumeration* stfe, s64 index) { + STFE_Results* r = &stfe->dirs; + Assert(index >= 0 && index < count_paths(stfe)); + s64 strlength = (*r->lengths)[index]; + u32 offset = (*r->offsets)[index]; + u8* string_ptr = &r->strings->data[offset]; + string path = {strlength, string_ptr}; + return copy_string(path); +} +s64 get_file_size_bytes (ST_File_Enumeration* stfe, s64 index) { + STFE_Results* r = &stfe->files; + return (s64)(*r->sizes)[index]; +} + +FILETIME get_file_modtime (ST_File_Enumeration* stfe, s64 index) { + STFE_Results* r = &stfe->files; + FILETIME ft; + memcpy(&ft, &(*r->modtimes)[index], sizeof(u64)); + return ft; +} + +FILETIME get_path_modtime (ST_File_Enumeration* stfe, s64 index) { + STFE_Results* r = &stfe->dirs; + FILETIME ft; + memcpy(&ft, &(*r->modtimes)[index], sizeof(u64)); + return ft; +} + +s64 win32_file_enum_thread_proc (Thread* thread) { + auto task = thread_task(ST_File_Enumeration); + init(&task->dirs); + init(&task->files); + + // Allocates to thread_context->arena, which is cleaned up + // when the thread completes. see: thread_deinit + Array paths_to_enumerate; for_each(d, task->drives) { - auto work = New(GPAllocator()); //replace with arena bootstrap? - work->first_directory = task->drives[d]->label; // this includes the colon-slash, (e.g. `C:\`). - work->parent_index = (s32)d; // #HACK? - // add label root to combined results, so we can look it up later! - push_root(task, work->first_directory, work->parent_index); - - add_work(file_enum_thread_group, work); - task->work_added += 1; - } - - start(file_enum_thread_group); - // set task completed. - - s64 path_index = task->drives.count; - - // #TODO: Get completed work! - while (task->work_completed < task->work_added) { - auto_release_temp(); - ArrayView cw = get_completed_work(file_enum_thread_group); - for_each(i, cw) { - auto ew = (Enumeration_Work*)cw[i]; - update_results(task, ew); - - arena_array_free(*ew->d_offsets, false); - arena_array_free(*ew->d_lengths, false); - arena_array_free(*ew->d_parent_indices, false); - arena_array_free(*ew->d_sizes, false); - arena_array_free(*ew->d_modtime, false); - arena_array_free(*ew->offsets, false); - arena_array_free(*ew->lengths, false); - arena_array_free(*ew->parent_indices, false); - arena_array_free(*ew->sizes, false); - arena_array_free(*ew->modtime, false); - - string_free(ew->first_directory); - internal_free(ew); + string parent_directory = task->drives[d]->label; // includes a trailing slash + if (parent_directory.data[2] == (u8)'\\') { + parent_directory.count -= 1; //#hack to quickly remove trailing slash. } - task->work_completed += (s32)cw.count; + array_add(paths_to_enumerate, parent_directory); - // For each new directory: - // s64 dirs_to_enumerate = task->paths.name->count - path_index; - for (s64 i = path_index; i < task->paths.name->count; i += 1) { - auto work = New(GPAllocator()); - work->first_directory = directory_get_full_path(task, i);// need full name here! - work->parent_index = (s32)i; + while (paths_to_enumerate.count > 0) { + push_allocator(temp()); + auto_release_temp(); + // This needs to be null-terminated: + // #TODO: Replace this #LIFO array with an arena-backed FIFO stack (singly linked-list). + string next_directory = copy_string(pop(paths_to_enumerate)); // LIFO. maybe not the best way? + wstring wildcard_name = utf8_to_wide(format_string("%s\\*", next_directory.data)); - add_work(file_enum_thread_group, work); + WIN32_FIND_DATAW find_data; + HANDLE h = FindFirstFileExW((LPCWSTR)wildcard_name.data, FindExInfoBasic, &find_data, + FindExSearchNameMatch, nullptr, FIND_FIRST_EX_LARGE_FETCH); + if (h == INVALID_HANDLE_VALUE) { + log_error("FindFirstFileExW failed for %s", wide_to_utf8(wildcard_name.data, (s32)wildcard_name.count).data); + os_log_error(); + continue; + } - task->work_added += 1; - } - path_index = task->paths.name->count; - - Sleep(1); - log("work completed: %d/%d",task->work_completed, task->work_added); - } + while (true) { auto_release_temp(); + string name = wide_to_utf8((u16*)find_data.cFileName); + bool should_continue = (name.count == 0 || name == "." || name == ".."); + if (should_continue) { + bool success = FindNextFileW(h, &find_data); + if (!success) { break; } + continue; + } + + bool is_directory = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + string full_path = format_string("%s\\%s", next_directory.data, name.data); + string full_path_copy = add_record(task, full_path, is_directory, &find_data); + if (is_directory) { + array_add(paths_to_enumerate, full_path_copy); + } + + bool success = FindNextFileW(h, &find_data); + if (!success) break; + } // while (true) -> FindNextFileW + + FindClose(h); + } // while (parent_directory) + } // for_each(d, drives) - shutdown(file_enum_thread_group); + task->end_time = GetUnixTimestamp(); - task->thread_completed = true; return 0; } -void initialize (Files_Combined_Results* fcr) { - fcr->name = arena_array_new(4194304, Arena_Reserve::Size_2G); // 2GB @ 16-byte strings => 134.2M entries. 64 might be better here for really large file collections! - fcr->parent_indices = arena_array_new(4194304, Arena_Reserve::Size_2G); - fcr->sizes = arena_array_new(4194304, Arena_Reserve::Size_2G); - fcr->modtime = arena_array_new(4194304, Arena_Reserve::Size_2G); -} - -void run_multithreaded_enumeration_thread () { - // Need some struct to track the state of this operation. - Arena* arena = next_arena(Arena_Reserve::Size_64K); - push_arena(arena); +void os_run_file_enumeration_single_threaded () { + push_allocator(GPAllocator()); - drive_enumeration = New(); - (*drive_enumeration) = { - arena, - os_get_available_drives(), + stfe = New(); + (*stfe) = { + os_get_available_drives(), New(), - os_cpu_physical_core_count(), - 0, false, false, {}, {}, - 0, 0 + STFE_Results(), STFE_Results(), + 0, true, false, GetUnixTimestamp(), 0 }; - initialize(&drive_enumeration->paths); - initialize(&drive_enumeration->files); - - // We start 1 thread to run the thread group and track the threads - string thread_name = "Multithreaded Enumeration: Master Thread"; - bool success = thread_init(drive_enumeration->master_thread, - multithreaded_file_enumeration_master_proc, thread_name); + string thread_name = "Single Thread Enumeration - Master Thread"; + bool success = thread_init(stfe->master_thread, win32_file_enum_thread_proc, thread_name); + if (!success) { + log_error("Failed to initialize thread (stft->master_thread)"); + os_log_error(); + } Assert(success); - thread_start(drive_enumeration->master_thread, drive_enumeration); - drive_enumeration->thread_started = true; + thread_start(stfe->master_thread, stfe); } -bool file_enum_multithreading_started () { - if (drive_enumeration == nullptr) return false; - return drive_enumeration->thread_started; +constexpr u32 STFE_Magic_Number = 0x19075fee; + +bool Serialize_ST_File_Enumeration (string file_path) { + Timed_Block_Print("Serialize_ST_File_Enumeration"); + File f = file_open(file_path, true, false, true); + if (!file_is_valid(f)) return false; + bool success = true; + // #TODO #Serialization Unfortunately, there's a lot of needless copying here + // it would be a lot nicer if we could just write-file in place. idk how to do that though ;_; + Serializer* s = new_serializer(Arena_Reserve::Size_64G); + Add(s, (u32)STFE_Magic_Number); + Add(s, (s32)stfe->drives.count); + // Dirs: + STFE_Results* r = &stfe->dirs; + AddArray(s, to_view(*r->strings)); + AddArray(s, to_view(*r->offsets)); + AddArray(s, to_view(*r->lengths)); + AddArray(s, to_view(*r->modtimes)); + + // Files: + r = &stfe->files; + AddArray(s, to_view(*r->strings)); + AddArray(s, to_view(*r->offsets)); + AddArray(s, to_view(*r->lengths)); + AddArray(s, to_view(*r->sizes)); + AddArray(s, to_view(*r->modtimes)); + + success = file_write(&f, to_view(*s)); + reset_serializer(s); + + file_close(&f); + free_serializer(s); + + return success; } -bool file_enum_multithreading_active () { - if (drive_enumeration == nullptr) return false; - if (drive_enumeration->thread_completed) { - return false; +bool Deserialize_ST_File_Enumeration (string file_path) { + Timed_Block_Print("Deserialize_ST_File_Enumeration"); + push_allocator(GPAllocator()); + if (!stfe) stfe = New(); + (*stfe) = { + {}, + {}, + STFE_Results(), STFE_Results(), + 0, false, false, GetUnixTimestamp(), 0 + }; + + push_allocator(temp()); + auto_release_temp(); + + Deserializer deserializer = read_entire_file(file_path, true); + if (deserializer.count == 0) return false; + auto d = &deserializer; + + u32 magic_number; s32 drive_count; + Read(d, &magic_number); + Assert(magic_number == STFE_Magic_Number); + Read(d, &drive_count); + + init(&stfe->dirs); + init(&stfe->files); + + STFE_Results* r = &stfe->dirs; + ReadToArenaArray(d, r->strings); + ReadToArenaArray(d, r->offsets); + ReadToArenaArray(d, r->lengths); + ReadToArenaArray(d, r->modtimes); + r = &stfe->files; + ReadToArenaArray(d, r->strings); + ReadToArenaArray(d, r->offsets); + ReadToArenaArray(d, r->lengths); + ReadToArenaArray(d, r->sizes); + ReadToArenaArray(d, r->modtimes); + + stfe->thread_started = true; + stfe->thread_completed = true; + stfe->end_time = GetUnixTimestamp(); + + return true; +} + +// #USNJrnl stuff: + +// This should work even if our other indices are not ready yet! +bool USN_Journal_Monitoring_Ready(OS_Drive* drive) { + return (drive->jrnl.hVol != nullptr && drive->jrnl.hVol != INVALID_HANDLE_VALUE); +} + +void Win32_Enable_USN_Journal_Monitoring (ArrayView drives) { + push_allocator(temp()); + // #TODO: Put any relevant data into Win32_Drive. + for_each(d, drives) { + OS_Drive* drive = drives[d]; + if (drive->jrnl.no_permission) continue; + if (USN_Journal_Monitoring_Ready(drive)) continue; + string drive_letter = Win32_drive_letter(drive->label); + string create_file_target = format_string("\\\\.\\%s:", drive_letter.data); + drive->jrnl.hVol = CreateFileA((LPCSTR)create_file_target.data, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, + OPEN_EXISTING, 0, nullptr); + if (drive->jrnl.hVol == INVALID_HANDLE_VALUE) { + log_error("CreateFileA failed on target %s", create_file_target.data); + os_log_error(); + drive->jrnl.no_permission = true; + } } - if (drive_enumeration->thread_started) { - return true; - } - return false; } -// if (drive_enumeration != nullptr) { -// // Check if task is completed, clean up thread. -// discard arena and zero drive_enumeration. -// } \ No newline at end of file +void Query_USN_Journal (ArrayView drives) { + Win32_Enable_USN_Journal_Monitoring(drives); + for_each(d, drives) { + OS_Drive* drive = drives[d]; + if (!USN_Journal_Monitoring_Ready(drive)) continue; + USN_JOURNAL_DATA_V0 usn_jd; + DWORD bytes_returned; + BOOL ok = DeviceIoControl(drive->jrnl.hVol, FSCTL_QUERY_USN_JOURNAL, + nullptr, 0, + &usn_jd, sizeof(usn_jd), + &bytes_returned, + nullptr); + if (!ok) { + log_error("DeviceIoControl failed on target %s", drive->label.data); + os_log_error(); + return; + } + log("[DeviceIoControl] target %s", drive->label.data); + log(" > Journal ID: %llu", usn_jd.UsnJournalID); + log(" > First USN: %llu", usn_jd.FirstUsn); + debug_break(); // #TODO #continue + } +} diff --git a/lib/OS/OS_Win32.h b/lib/OS/OS_Win32.h index 9d34778..7bc37fb 100644 --- a/lib/OS/OS_Win32.h +++ b/lib/OS/OS_Win32.h @@ -176,6 +176,13 @@ File_System Win32_filesystem_from_string (string s) { return File_System::Unknown; } struct Dense_FS; // #hack forward declare! + +struct NTFS_USN_Journal { + bool no_permission; + HANDLE hVol; + // ArrayView changes; +}; + struct Win32_Drive { string label; string volume_name; @@ -192,6 +199,8 @@ struct Win32_Drive { s64 file_count; f32 time_to_enumerate; Dense_FS* data; + + NTFS_USN_Journal jrnl; }; typedef Win32_Drive OS_Drive; diff --git a/lib/OS/OS_Win32_File_Enumeration.cpp b/lib/OS/OS_Win32_File_Enumeration.cpp new file mode 100644 index 0000000..1897674 --- /dev/null +++ b/lib/OS/OS_Win32_File_Enumeration.cpp @@ -0,0 +1,334 @@ +struct Parent_Index { + s32 thread_index; // group->worker_info[thread_index].thread + s32 parent_index; // index into d_*offsets/lengths/etc. +}; + +struct File_Enumeration_Thread_Results { // #userdata + Arena* arena; // for strings + // Directories + ArenaArray* d_offsets; + ArenaArray* d_lengths; + ArenaArray* d_parent_indices; + ArenaArray* d_modtime; + // s64 dirs_enumerated = 0; + // Files + ArenaArray* f_offsets; + ArenaArray* f_lengths; + ArenaArray* f_parent_indices; + ArenaArray* f_sizes; + ArenaArray* f_modtime; +}; + +void initialize (File_Enumeration_Thread_Results* fcr) { // Preallocate for 2^22 files: + fcr->arena = next_arena(Arena_Reserve::Size_2G); + + fcr->d_offsets = arena_array_new(4194304, Arena_Reserve::Size_2G); + fcr->d_lengths = arena_array_new(4194304, Arena_Reserve::Size_2G); + fcr->d_parent_indices = arena_array_new(4194304, Arena_Reserve::Size_2G); + fcr->d_modtime = arena_array_new(4194304, Arena_Reserve::Size_2G); + + fcr->f_offsets = arena_array_new(4194304, Arena_Reserve::Size_2G); + fcr->f_lengths = arena_array_new(4194304, Arena_Reserve::Size_2G); + fcr->f_parent_indices = arena_array_new(4194304, Arena_Reserve::Size_2G); + fcr->f_sizes = arena_array_new(4194304, Arena_Reserve::Size_2G); + fcr->f_modtime = arena_array_new(4194304, Arena_Reserve::Size_2G); +} + +struct Enumeration_Work { + string first_directory; + Parent_Index parent; + bool is_root = false; + Array next; +}; + +struct Drive_Enumeration { // master thread struct + ArrayView drives; + Thread* master_thread; + + s32 thread_count; + bool thread_started; + bool thread_completed; + + // Files_Combined_Results paths; + // Files_Combined_Results files; + + s32 work_added = 0; + s32 work_completed = 0; +}; + +// void push_root (Drive_Enumeration* de, string label, s32 index) { +// array_add(*de->paths.name, label); +// array_add(*de->paths.parent_indices, index); +// array_add(*de->paths.sizes, (u64)0); +// array_add(*de->paths.modtime, (u64)0); +// } + +global Drive_Enumeration* drive_enumeration; + +// File_Enumeration_Thread_Results* results_from_thread_index (Thread_Group* group, s32 thread_index) { +// return ; +// } + +string path_from_parent_index (Thread_Group* group, Parent_Index pid, Parent_Index* next_pid) { + if (pid.parent_index == -1) return ""; + + auto results = (File_Enumeration_Thread_Results*)group->worker_info[pid.thread_index].thread.context->userdata; + u8* offset = (results->arena->memory_base + (*results->d_offsets)[pid.parent_index]); + u32 length = (*results->d_lengths)[pid.parent_index]; + + (*next_pid) = (*results->d_parent_indices)[pid.parent_index]; + + return {(s64)length, offset}; +} + +// This is much stupider and more complicated than I would like, unfortunately. +string directory_get_full_path (Thread_Group* group, Parent_Index pid, string dir_name) { + push_allocator(GPAllocator()); // to copy from String_Builder + + Array paths; + paths.allocator = temp(); + + Parent_Index this_pid = pid; + Parent_Index next_pid = {}; + string parent_dir = path_from_parent_index(group, this_pid, &next_pid); + array_add(paths, parent_dir); + + this_pid = next_pid; + next_pid = {}; + + while (this_pid.parent_index != -1) { + parent_dir = path_from_parent_index(group, this_pid, &next_pid); + array_add(paths, parent_dir); + + this_pid = next_pid; + next_pid = {}; + } + + // go in reverse order and add together string + String_Builder* sb = new_string_builder(Arena_Reserve::Size_64K); + for (s64 i = paths.count-1; i >= 0; i -= 1) { + append(sb, paths[i]); + append(sb, "\\"); + } + append(sb, dir_name); + + return builder_to_string(sb); +} + +/*void update_results (Drive_Enumeration* de, Enumeration_Work* ew) { + // merge results and release resources! + // unfortunately this is a LOT of copying! + for_each(i, (*ew->d_offsets)) { + u8* string_ptr = (ew->thread_arena->memory_base + (*ew->d_offsets)[i]); + string name = {(*ew->d_lengths)[i], string_ptr}; + array_add(*de->paths.name, name); + array_add(*de->paths.parent_indices, (*ew->d_parent_indices)[i]); + array_add(*de->paths.sizes, (*ew->d_sizes)[i]); + array_add(*de->paths.modtime, (*ew->d_modtime)[i]); + } + for_each(i, (*ew->offsets)) { + u8* string_ptr = (ew->thread_arena->memory_base + (*ew->offsets)[i]); + string name = {(*ew->lengths)[i], string_ptr}; + array_add(*de->files.name, name); + array_add(*de->files.parent_indices, (*ew->parent_indices)[i]); + array_add(*de->files.sizes, (*ew->sizes)[i]); + array_add(*de->files.modtime, (*ew->modtime)[i]); + } +}*/ + +void add_record (File_Enumeration_Thread_Results* results, + WIN32_FIND_DATAW* find_data, + string name, + Parent_Index parent_index, + bool is_directory) { + u32 offset = (u32)(name.data - results->arena->memory_base); + u64 size = ((u64)find_data->nFileSizeHigh << 32) | ((u64)find_data->nFileSizeLow & 0xFFFFFFFF); + u64 modtime = FILETIME_to_ticks(find_data->ftLastWriteTime); + + if (is_directory) { + array_add((*results->d_offsets), offset); + array_add((*results->d_lengths), (s16)name.count); + array_add((*results->d_parent_indices), parent_index); // #parent_index + array_add((*results->d_modtime), modtime); + } else { + array_add((*results->f_offsets), offset); + array_add((*results->f_lengths), (s16)name.count); + array_add((*results->f_parent_indices), parent_index); // #parent_index + array_add((*results->f_sizes), size); + array_add((*results->f_modtime), modtime); + } +} + +Thread_Continue_Status file_enumeration_thread_group_proc (Thread_Group* group, Thread* thread, void* work) { + // 1. setup userdata as an Arena*: + // #TODO: replace userdata with a struct that manages the thread-local data for this + // particular problem. This data can be rescued before we + File_Enumeration_Thread_Results* results; + if (!thread->context->userdata) { + thread->context->userdata = New(GPAllocator()); + initialize((File_Enumeration_Thread_Results*)thread->context->userdata); + } + results = (File_Enumeration_Thread_Results*)thread->context->userdata; + + Enumeration_Work* enum_work = (Enumeration_Work*)work; + + // Validate thread context? + push_allocator(temp()); + auto_release_temp(); + + // log("file_enumeration_thread_group_proc, thread index: %d", thread->index); + + // MAKE SURE PATH IS NULL TERMINATED! + wstring wildcard_name = utf8_to_wide(format_string("%s\\*", enum_work->first_directory.data)); // #temp + WIN32_FIND_DATAW find_data; + HANDLE h = FindFirstFileExW((LPCWSTR)wildcard_name.data, FindExInfoBasic, &find_data, + FindExSearchNameMatch, nullptr, FIND_FIRST_EX_LARGE_FETCH); + if (h == INVALID_HANDLE_VALUE) { + return Thread_Continue_Status::CONTINUE; + } + + s32 thread_index = get_thread_index(group, (s32)thread->index); // zero-indexed to thread group + + Parent_Index pi = enum_work->parent; + + push_arena(results->arena); + if (enum_work->is_root) { // see add_record + string name = copy_string(enum_work->first_directory); + u32 offset = (u32)(name.data - results->arena->memory_base); + s32 current_index = (s32)(*results->d_offsets).count; + pi = {thread_index, current_index}; + array_add((*results->d_offsets), offset); + array_add((*results->d_lengths), (s16)name.count); + Parent_Index root_pi = {thread_index, -1}; + array_add((*results->d_parent_indices), root_pi); // #parent_index + array_add((*results->d_modtime), (u64)0); + // results->dirs_enumerated += 1; + } + + while (true) { + string name = wide_to_utf8((u16*)find_data.cFileName); + bool should_continue = (name.count == 0 || name == "." || name == ".."); + if (should_continue) { + bool success = FindNextFileW(h, &find_data); + if (!success) + break; + continue; + } + + bool is_directory = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + Parent_Index next_index = {thread_index, enum_work->parent.parent_index}; + add_record(results, &find_data, name, next_index, is_directory); + + if (is_directory) { + push_allocator(GPAllocator()); + auto new_work = New(false); + new_work->first_directory = directory_get_full_path(group, pi, name); + new_work->parent = next_index; + new_work->is_root = false; + new_work->next = {}; + + array_add(enum_work->next, new_work); + } + + bool success = FindNextFileW(h, &find_data); + if (!success) break; + } + + FindClose(h); + return Thread_Continue_Status::CONTINUE; +} + +s64 multithreaded_file_enumeration_master_proc (Thread* thread) { + auto task = thread_task(Drive_Enumeration); + + Thread_Group* file_enum_thread_group = New(); + + s32 thread_count = os_cpu_physical_core_count(); + + push_allocator(GPAllocator()); + thread_group_init(file_enum_thread_group, thread_count, file_enumeration_thread_group_proc, true); + + + for_each(d, task->drives) { + auto work = New(GPAllocator(), false); //replace with arena bootstrap? + work->first_directory = task->drives[d]->label; // this includes the colon-slash, (e.g. `C:\`). + work->parent = {-1, -1}; // #HACK: (s32)d + work->is_root = true; + work->next = {}; + + add_work(file_enum_thread_group, work); + task->work_added += 1; + } + + start(file_enum_thread_group); + // set task completed. + + s64 path_index = task->drives.count; + + while (true) { + auto_release_temp(); + ArrayView cw = get_completed_work(file_enum_thread_group); + task->work_completed += (s32)cw.count; + for_each(i, cw) { + auto ew = (Enumeration_Work*)cw[i]; + + for_each(w, ew->next) { + auto new_work = ew->next[w]; + + add_work(file_enum_thread_group, new_work); + } + task->work_added += (s32)ew->next.count; + + string_free(ew->first_directory); + array_free(ew->next); + internal_free(ew); + } + + log("work completed: %d/%d",task->work_completed, task->work_added); + // if (task->work_completed >= task->work_added) break; + Sleep(1); + } + + shutdown(file_enum_thread_group); + + task->thread_completed = true; + return 0; +} + +void os_run_file_enumeration_multithreaded () { + // Need some struct to track the state of this operation. + drive_enumeration = New(); + (*drive_enumeration) = { + os_get_available_drives(), + New(), + os_cpu_physical_core_count(), + 0, 0, 0, 0 + }; + + // initialize(&drive_enumeration->paths); + // initialize(&drive_enumeration->files); + + // We start 1 thread to run the thread group and track the threads + string thread_name = "Multithreaded Enumeration: Master Thread"; + bool success = thread_init(drive_enumeration->master_thread, + multithreaded_file_enumeration_master_proc, thread_name); + Assert(success); + thread_start(drive_enumeration->master_thread, drive_enumeration); + drive_enumeration->thread_started = true; +} + +bool file_enum_multithreading_started () { + if (drive_enumeration == nullptr) return false; + return drive_enumeration->thread_started; +} + +bool file_enum_multithreading_active () { + if (drive_enumeration == nullptr) return false; + if (drive_enumeration->thread_completed) { + return false; + } + if (drive_enumeration->thread_started) { + return true; + } + return false; +} diff --git a/lib/OS/OS_Win32_NTFS.cpp b/lib/OS/OS_Win32_NTFS.cpp index e7567f2..7442f2d 100644 --- a/lib/OS/OS_Win32_NTFS.cpp +++ b/lib/OS/OS_Win32_NTFS.cpp @@ -63,7 +63,7 @@ struct NTFS_ResidentAttributeHeader : NTFS_AttributeHeader { }; struct NTFS_FileNameAttributeHeader : NTFS_ResidentAttributeHeader { - u64 parentRecordNumber : 48; + u64 parentRecordNumber : 48; // low 48 bits u64 sequenceNumber : 16; u64 creationTime; u64 modificationTime; @@ -97,7 +97,7 @@ struct NTFS_RunHeader { #pragma pack(pop) struct NTFS_File { - u32 parent_id; + u32 parent_id; // #TODO: FRNs should be 64-bit! u32 record_id; u16* name_data; u64 file_modtime; // FILETIME @@ -585,3 +585,183 @@ bool Serialize_Win32_Drives (ArrayView drives, string file_path) { return true; } +void ntfs_create_enumeration_threads (s32 thread_count) { + if (!ex1_ntfs.initialized) { Timed_Block_Print("Thread initialization (ntfs)"); + ex1_ntfs.initialized = true; + ex1_ntfs.threads = ArrayView(thread_count); + ex1_ntfs.threads_in_flight.allocator = GPAllocator(); + for_each(t, ex1_ntfs.threads) { + string thread_name = format_string("ntfs_enumeration_thread#%d", t); + bool success = thread_init(&ex1_ntfs.threads[t], ntfs_enumeration_thread_proc, thread_name); + Assert(success); + } + } +} + + +/* +void Ex1_show_ntfs_workspace () { using namespace ImGui; + + SliderInt("Select path index", &ex1w.path_select, 0, count_paths(stfe)-1); + Text("%s", get_path_copy(stfe, ex1w.path_select).data); + Text("time modified: %s", format_time_datetime(get_path_modtime(stfe, ex1w.path_select)).data); + // #TODO: modtime (to indextime) + SliderInt("Select file index", &ex1w.file_select, 0, count_files(stfe)-1); + Text("%s", get_file_copy(stfe, ex1w.file_select).data); + Text("size: %s", format_bytes(get_file_size_bytes(stfe, ex1w.file_select)).data); + Text("time modified: %s", format_time_datetime(get_file_modtime(stfe, ex1w.file_select)).data); + + push_allocator(temp()); + for_each(d, ntfs_workspace.drives) { + OS_Drive* drive = ntfs_workspace.drives[d]; + Text("%d. %s paths: %lld, files: %lld", + d, drive->label.data, + drive->data->paths.offsets->count, + drive->data->files.offsets->count); + } + // SliderInt("Results to Show", &ntfs_workspace.results_to_show, 0, 50); + for_each(d, ntfs_workspace.drives) { + OS_Drive* drive = ntfs_workspace.drives[d]; + // #TODO: Radio button for choosing between paths, files + char* rb1 = format_cstring("paths##%s", drive->label.data); + RadioButton(rb1, &ntfs_workspace.supplementary[d].radio_button, 1); + SameLine(); + char* rb2 = format_cstring("files##%s", drive->label.data); + RadioButton(rb2, &ntfs_workspace.supplementary[d].radio_button, 0); + SameLine(); + s32 max_count = (s32)drive->data->paths.offsets->count; + if (ntfs_workspace.supplementary[d].radio_button == 0) { + max_count = (s32)drive->data->files.offsets->count; + } + char* slider_label = format_cstring("%s index", drive->label.data); + if (SliderInt(slider_label, &ntfs_workspace.supplementary[d].index, 0, max_count)) { } + } + for_each(d, ntfs_workspace.drives) { + if (ntfs_workspace.supplementary[d].radio_button == 0) { // files + OS_Drive* drive = ntfs_workspace.drives[d]; + Dense_FS* dfs = drive->data; + DFS_Array* dfsa = &drive->data->files; + s64 file_index = ntfs_workspace.supplementary[d].index; + DFS_Value v = get_value(dfs, dfsa, file_index); + // #TODO NOTE: v.full_path is NOT the full path #rename + Text("Filename: %s, parent_id: %d", copy_string(v.full_path).data, v.parent_index); + string full_path = get_full_path_from_index(drive, dfsa, file_index); + Text("Full path: %s", full_path.data); + bool success = file_length(full_path, &v.size); // temp, obviously we don't wanna call this every frame lol + Text(" > size: %lld B", v.size); + Text(" > size: %s", format_bytes(v.size).data); + // Text(" > modtime: %s", + } else { + // DFS_Array* dfsa = &ntfs_workspace.drives[d]->data->paths; + + } + } +} +*/ + /*SeparatorText("ex1_ntfs"); + Text("Threads in flight count: %d", ex1_ntfs.threads_in_flight.count); + for_each(i, ex1_ntfs.threads) { + Text(" [%d] initialized: %d, has_context: %d, has_data: %d", + i, ex1_ntfs.threads[i].proc != nullptr, ex1_ntfs.threads[i].context != nullptr, ex1_ntfs.threads[i].data != nullptr); + }*/ + /*// #NTFS_MFT_RAW + push_allocator(GPAllocator()); + Array> drive_split; + drive_split.allocator = temp(); // this is only needed for this frame + + if (drives.count > os_cpu_physical_core_count()) { + s32 thread_count = os_cpu_physical_core_count(); + array_resize(drive_split, thread_count); + ntfs_create_enumeration_threads(thread_count); + + s32 threads_to_create = thread_count; + s64 drives_per_thread = (drives.count / thread_count); + s64 remainder = drives.count % thread_count; + s64 current_drive = 0; + + for_each(d, drive_split) { + if (d == drive_split.count) { + drive_split[d] = ArrayView(remainder); + } else { + drive_split[d] = ArrayView(drives_per_thread); + } + + for (s64 i = 0; i < drive_split[d].count; i += 1) { + drive_split[d][i] = drives[current_drive]; + current_drive += 1; + } + } + + debug_break(); // #TODO: Check that the work has been distributed correctly. + } else { // more threads than drives, or same amount + s32 thread_count = (s32)drives.count; + array_resize(drive_split, drives.count); + ntfs_create_enumeration_threads(thread_count); + + for_each(d, drives) { + auto drive = drives[d]; + drive_split[d] = ArrayView(1); // Arrays of size one are sad :pensive: + + drive_split[d][0] = drive; + } + } + + s64 active_thread_count = drive_split.count; + + ex1_ntfs.threads_started = true; + for (s64 t = 0; t < active_thread_count; t += 1) { + Thread* thread = &ex1_ntfs.threads[t]; + Arena* thread_arena = next_arena(Arena_Reserve::Size_64K); + push_arena(thread_arena); + auto thread_data = New(); + thread_data->pool = thread_arena; + thread_data->drives = drive_split[t]; + thread_start(thread, thread_data); + array_add(ex1_ntfs.threads_in_flight, thread); + }*/ + + + /* #NTFS_MFT_RAW + if (ex1_ntfs.threads_in_flight.count) { + Text("Threads in flight: %d", ex1_ntfs.threads_in_flight.count); + + for_each(t, ex1_ntfs.threads_in_flight) { + if (thread_is_done(ex1_ntfs.threads_in_flight[t])) { + push_allocator(GPAllocator()); + + Thread* thread = ex1_ntfs.threads_in_flight[t]; + auto task = thread_task(NTFS_Enumeration_Task); + array_free(task->drives); + + // make sure to retreive any data you need to from here! + release_arena(task->pool); + + thread_deinit(ex1_ntfs.threads_in_flight[t], false); + array_unordered_remove_by_index(ex1_ntfs.threads_in_flight, t); + t -= 1; // check this element index again! + } + } + }*/ + + /* #NTFS_MFT_RAW + if (ex1_ntfs.threads_started && !ex1_ntfs.threads_in_flight.count) { + // All threads are complete, we're free to clean up remaining memory + push_allocator(GPAllocator()); + array_free(ex1_ntfs.threads); + array_free(ex1_ntfs.threads_in_flight); + + // Instead maybe we should just memset this to zero. + reset_struct(&ex1_ntfs); + } + + // How do I tell when all files are enumerated? + // check drives[i]->data.paths.wstrings.count count? + if (all_drives_enumerated && Button("Save drive data")) { + string file_path = format_string_temp("%s_DriveData.bin", os_get_machine_name().data); + bool success = Serialize_Win32_Drives(drives, file_path); + if (!success) { log_error("Failed to save Win32_Drive data"); } + } + + if (all_drives_enumerated && Button("Clear all drive data")) { + os_clear_drive_data(); + }*/ diff --git a/lib/meta_generated.h b/lib/meta_generated.h index d2c1210..700a5eb 100644 --- a/lib/meta_generated.h +++ b/lib/meta_generated.h @@ -1,6 +1,6 @@ #pragma once -constexpr const char* MUSA_LIB_VERSION = "0.1a"; +const char* MUSA_LIB_VERSION = "0.2"; #define BUILD_DEBUG 1 #define OS_WINDOWS 1 #define OS_LINUX 0 diff --git a/lib_main.cpp b/lib_main.cpp index ce07948..0b2d8f6 100644 --- a/lib_main.cpp +++ b/lib_main.cpp @@ -36,6 +36,8 @@ #include "lib/Base/Arena.cpp" #include "lib/Base/String.cpp" +#include "lib/Base/Unicode.cpp" +#include "lib/Base/RadixSort.cpp" #include "lib/Base/Base_Thread_Context.cpp" #include "lib/Base/ErrorType.cpp" @@ -52,7 +54,7 @@ #if OS_WINDOWS # include "lib/OS/OS_Win32.cpp" -# include "lib/OS/OS_Win32_NTFS.cpp" +// # include "lib/OS/OS_Win32_NTFS.cpp" #endif #include "lib/Graphics.cpp" diff --git a/src/Base_Entry_Point.cpp b/src/Base_Entry_Point.cpp index c0d2b8e..3bd25ae 100644 --- a/src/Base_Entry_Point.cpp +++ b/src/Base_Entry_Point.cpp @@ -17,7 +17,9 @@ internal void Main_Entry_Point (int argc, WCHAR **argv); #endif internal void Main_Entry_Point (int argc, WCHAR **argv) { // #entry_point - set_cpu_base_frequency(3200); // REQUIRED FOR TIMING MODULE! will depend on CPU + // #TODO: Check if base frequency is even available. + u32 base_frequency = (u32)CPU_Base_Frequency(); + set_cpu_base_frequency(base_frequency); // REQUIRED FOR TIMING MODULE! will depend on CPU #if BASE_RUN_TESTS run_pre_setup_tests(); // #no_context: context will not be initialized at this point. diff --git a/src/Ex1.cpp b/src/Ex1.cpp index ee31965..a2a0c0e 100644 --- a/src/Ex1.cpp +++ b/src/Ex1.cpp @@ -1,6 +1,6 @@ struct ExplorerUI { - u8 search_input[64]; - u8 secondary_input[64]; + // u8 search_input[64]; + // u8 secondary_input[64]; }; struct Explorer { @@ -22,19 +22,6 @@ global ExplorerUI explorer_ui; global Explorer explorer; global Ex1_NTFS_Enumeration ex1_ntfs; -void ntfs_create_enumeration_threads (s32 thread_count) { - if (!ex1_ntfs.initialized) { Timed_Block_Print("Thread initialization (ntfs)"); - ex1_ntfs.initialized = true; - ex1_ntfs.threads = ArrayView(thread_count); - ex1_ntfs.threads_in_flight.allocator = GPAllocator(); - for_each(t, ex1_ntfs.threads) { - string thread_name = format_string("ntfs_enumeration_thread#%d", t); - bool success = thread_init(&ex1_ntfs.threads[t], ntfs_enumeration_thread_proc, thread_name); - Assert(success); - } - } -} - #define HOTKEY_ID_BRING_TO_FOREGROUND 1 #define VK_SPACE_KEY_CODE 0x20 // #define HOTKEY_ID_HIDE_TITLEBAR @@ -76,55 +63,112 @@ bool Ex1_check_key_combinations() { return false; } -void Ex1_show_ntfs_workspace () { using namespace ImGui; - push_allocator(temp()); - for_each(d, ntfs_workspace.drives) { - OS_Drive* drive = ntfs_workspace.drives[d]; - Text("%d. %s paths: %lld, files: %lld", - d, drive->label.data, - drive->data->paths.offsets->count, - drive->data->files.offsets->count); +// #Workspaces are FOR DEVELOPMENT ONLY. +struct Ex1_Workspace { + s32 path_select; + s32 file_select; + + RadixSort file_size_radix; + RadixSort file_modtime_radix; + RadixSort dir_modtime_radix; + bool sort_completed; + + // Reordered strings: + ArrayView files_sorted_by_size; + ArrayView files_sorted_by_modtime; +}; + +global Ex1_Workspace ex1w; + +void free_ex1_workspace_and_reset () { + if (ex1w.sort_completed) { + push_allocator(GPAllocator()); + + radix_sort_free(&ex1w.file_size_radix); + radix_sort_free(&ex1w.file_modtime_radix); + radix_sort_free(&ex1w.dir_modtime_radix); + + array_free(ex1w.files_sorted_by_size); + array_free(ex1w.files_sorted_by_modtime); + + zero_struct(&ex1w); } - // SliderInt("Results to Show", &ntfs_workspace.results_to_show, 0, 50); - for_each(d, ntfs_workspace.drives) { - OS_Drive* drive = ntfs_workspace.drives[d]; - // #TODO: Radio button for choosing between paths, files - char* rb1 = format_cstring("paths##%s", drive->label.data); - RadioButton(rb1, &ntfs_workspace.supplementary[d].radio_button, 1); - SameLine(); - char* rb2 = format_cstring("files##%s", drive->label.data); - RadioButton(rb2, &ntfs_workspace.supplementary[d].radio_button, 0); - SameLine(); - s32 max_count = (s32)drive->data->paths.offsets->count; - if (ntfs_workspace.supplementary[d].radio_button == 0) { - max_count = (s32)drive->data->files.offsets->count; - } - char* slider_label = format_cstring("%s index", drive->label.data); - if (SliderInt(slider_label, &ntfs_workspace.supplementary[d].index, 0, max_count)) { } +} + +// #TODO: Move all sort stuff to OS_Win32? +// Make a general version of this that takes two ArrayView and reorders. +// There's no need to do this until we have the filtered results. +void os_win32_reorder_files_by_radix (RadixSort* r, ArrayView* files, bool reverse_order=false) { + Timed_Block_Print("os_win32_reorder_files_by_radix"); + // Where are my source files!? + (*files) = ArrayView(r->ranks.count); + for_each(f, (*files)) { + // (*files)[f] = get_file_copy(stfe, r->ranks[f]); + (*files)[f] = get_file_string_view(stfe, r->ranks[f]); } - for_each(d, ntfs_workspace.drives) { - if (ntfs_workspace.supplementary[d].radio_button == 0) { // files - OS_Drive* drive = ntfs_workspace.drives[d]; - Dense_FS* dfs = drive->data; - DFS_Array* dfsa = &drive->data->files; - s64 file_index = ntfs_workspace.supplementary[d].index; - DFS_Value v = get_value(dfs, dfsa, file_index); - // #TODO NOTE: v.full_path is NOT the full path #rename - Text("Filename: %s, parent_id: %d", copy_string(v.full_path).data, v.parent_index); - string full_path = get_full_path_from_index(drive, dfsa, file_index); - Text("Full path: %s", full_path.data); - bool success = file_length(full_path, &v.size); // temp, obviously we don't wanna call this every frame lol - Text(" > size: %lld B", v.size); - Text(" > size: %s", format_bytes(v.size).data); - // Text(" > modtime: %s", idk how to convert FILETIME to calendar time - } else { - // DFS_Array* dfsa = &ntfs_workspace.drives[d]->data->paths; - - } +} + +void Ex1_show_enumeration_workspace () { using namespace ImGui; + push_imgui_window("Enumerated Data Workspace"); + + if (!ex1w.sort_completed) { + push_allocator(GPAllocator()); + Timed_Block_Print("radix_sort_u64: file sizes, file modtimes, directory modtimes"); + ArrayView sizes = to_view(*stfe->files.sizes); + radix_sort_u64(&ex1w.file_size_radix, sizes.data, (u32)sizes.count); + ArrayView file_modtimes = to_view(*stfe->files.modtimes); + radix_sort_u64(&ex1w.file_modtime_radix, file_modtimes.data, (u32)file_modtimes.count); + ArrayView dirs_modtimes = to_view(*stfe->dirs.modtimes); + radix_sort_u64(&ex1w.dir_modtime_radix, dirs_modtimes.data, (u32)dirs_modtimes.count); + // Create ArrayView, ArrayView sizes, and ArrayView modtimes + os_win32_reorder_files_by_radix(&ex1w.file_size_radix, &ex1w.files_sorted_by_size); + os_win32_reorder_files_by_radix(&ex1w.file_modtime_radix, &ex1w.files_sorted_by_modtime); + // reordering by the rank permutations generated by RadixSort. + ex1w.sort_completed = true; + } + + if (!ex1w.sort_completed) { return; } + + SeparatorText("Files ordered by modtime"); + s32 file_count = (s32)ex1w.files_sorted_by_modtime.count; + SliderInt("Select file index", &ex1w.file_select, 0, file_count-1); + string file_name = copy_string(ex1w.files_sorted_by_modtime[ex1w.file_select]); + Text("%s", file_name.data); + u32 radix_index = rank(&ex1w.file_modtime_radix, ex1w.file_select); + Text("date modified: %s", format_time_datetime(get_file_modtime(stfe, radix_index)).data); + + SeparatorText("Files ordered by size"); + file_count = (s32)ex1w.files_sorted_by_size.count; + // SliderInt("Select file index", &ex1w.file_select, 0, file_count-1); + file_name = copy_string(ex1w.files_sorted_by_size[ex1w.file_select]); + Text("%s", file_name.data); + radix_index = rank(&ex1w.file_size_radix, ex1w.file_select); + Text("size: %s", format_bytes(get_file_size_bytes(stfe, radix_index)).data); + // Text("date modified: %s", format_time_datetime(get_file_modtime(stfe, radix_index)).data); + + if (Button("Count unique UTF-8 characters")) { + count_unique_utf8_chars(); + } + + Text("unique_codepoints_utf32.count: %", unique_codepoints_utf32.count); + + for_each(u, unique_codepoints_utf32) { + Text("[%d] Code point as hex: 0x%X", u, unique_codepoints_utf32[u]); + } + + Text("files_sorted_by_size size in bytes: %lld", ex1w.files_sorted_by_size.count * sizeof(string)); + Text("files_sorted_by_modtime size in bytes: %lld", ex1w.files_sorted_by_modtime.count * sizeof(string)); + + for (s64 i = 1; i < 128; i += 1) { + u8 cstring[2] = {}; + cstring[0] = (u8)i; + cstring[1] = 0; + Text("codepoint[0x%X]: %s, count: %lld", i, cstring, count_ascii_codepoints[i]); } } void Ex1_Control_Panel () { using namespace ImGui; + f64 frame_time = GetUnixTimestamp(); Table* drive_table = get_drive_table(); push_imgui_window("Control Panel"); @@ -133,28 +177,39 @@ void Ex1_Control_Panel () { using namespace ImGui; if (/*Button("Discover drives") ||*/!table_is_valid(drive_table)) { Win32_Discover_Drives(); } // Text("ntfs_workspace_files_loaded: %s", ntfs_workspace_files_loaded()? "true": "false"); - if (ntfs_workspace_files_loaded()) { - Ex1_show_ntfs_workspace(); - return; - } + // if (ntfs_workspace_files_loaded()) { + // Ex1_show_ntfs_workspace(); + // return; + // } - Text("drive_table is valid: %d", table_is_valid(drive_table)); + bool all_drives_enumerated = stfe && stfe->thread_completed; push_allocator(temp()); ArrayView drives = os_get_available_drives(); // only includes drives that are ready. + - for_each(i, drives) { - OS_Drive* drive = drives[i]; - - Text(" > [%d] drive letter: %s (is_present: %d)", i + 1, drive->label.data, drive->is_present); - if (drive->time_to_enumerate != 0) { - SameLine(); - Text("Enumerated in %.2f seconds", drive->time_to_enumerate); + if (!USN_Journal_Monitoring_Ready(drives[0]) && Button("Enable USN Monitoring for all drives")) { + Win32_Enable_USN_Journal_Monitoring(drives); + } + if (USN_Journal_Monitoring_Ready(drives[0]) && Button("Query USN Journal")) { + Query_USN_Journal(drives); + } + + if (!all_drives_enumerated) { + // Text("drive_table is valid: %d", table_is_valid(drive_table)); + for_each(i, drives) { + OS_Drive* drive = drives[i]; + + Text(" > [%d] drive letter: %s (is_present: %d)", i + 1, drive->label.data, drive->is_present); + if (drive->time_to_enumerate != 0) { + SameLine(); + Text("Enumerated in %.2f seconds", drive->time_to_enumerate); + } + // SameLine(); + // if (Button(format_cstring("Read NTFS MFT Raw##%s", drive->label.data))) { + // push_arena(thread_context()->arena); + // Error* error = NTFS_MFT_read_raw(drive); + // } } - // SameLine(); - // if (Button(format_cstring("Read NTFS MFT Raw##%s", drive->label.data))) { - // push_arena(thread_context()->arena); - // Error* error = NTFS_MFT_read_raw(drive); - // } } s32 drives_enumerated = 0; @@ -167,128 +222,79 @@ void Ex1_Control_Panel () { using namespace ImGui; drives_enumerated += 1; } } - bool all_drives_enumerated = !ex1_ntfs.threads_in_flight.count - && (drives_enumerated == drives.count); + // bool all_drives_enumerated = !ex1_ntfs.threads_in_flight.count + // && (drives_enumerated == drives.count); - string file_path = format_string_temp("%s_DriveData.bin", os_get_machine_name().data); + // string file_path = format_string_temp("%s_DriveData.bin", os_get_machine_name().data); + string file_path = "D:/Projects/Cpp/Musa-Cpp-Lib-V2/bin/MUSA-PC3_DriveData.bin";// FIXED path. + Text("fixed file_path: %s", file_path.data); + if (!all_drives_enumerated && file_exists(file_path)) { // #autoload + Deserialize_ST_File_Enumeration(file_path); + } if (drives.count > 0 && !all_drives_enumerated && file_exists(file_path) && Button("Load from file (this machine)")) { - Deserialize_Win32_Drives(file_path); + Deserialize_ST_File_Enumeration(file_path); + // Deserialize_Win32_Drives(file_path); } - if (file_enum_multithreading_started()) { - if (thread_is_done(drive_enumeration->master_thread)) { - push_allocator(GPAllocator()); - // Thread* thread = drive_enumeration->master_thread; - // auto task = thread_task(Drive_Enumeration); - // Nothing to free? - thread_deinit(drive_enumeration->master_thread, true); + // if (file_enum_multithreading_started()) { + // if (thread_is_done(drive_enumeration->master_thread)) { + // push_allocator(GPAllocator()); + // // Thread* thread = drive_enumeration->master_thread; + // // auto task = thread_task(Drive_Enumeration); + // // Nothing to free? + // thread_deinit(drive_enumeration->master_thread, true); + // } + // } + + // #FileEnumerationST + if (stfe && stfe->thread_started) { + // print dirs enumerated, etc + if (!stfe->thread_completed) Text("Enumeration Thread Active (elapsed: %s)", format_time_seconds(frame_time-stfe->start_time).data); + if (stfe->dirs.offsets) { + s64 dirs_enumerated = stfe->dirs.offsets->count; + Text("Dirs enumerated: %lld", dirs_enumerated); + // if (!stfe->thread_completed) Text("Current dir: %s", get_last_path_copy(stfe).data); } + if (stfe->files.offsets) { + s64 files_enumerated = stfe->files.offsets->count; + Text("Files enumerated: %lld", files_enumerated); + // if (!stfe->thread_completed) Text("Current file: %s", get_last_file_copy(stfe).data); + } + + if (stfe->thread_completed) { + Text("String bytes stored: %s", format_bytes(stfe->dirs.strings->count + stfe->files.strings->count).data); + Text("Elapsed time: %s", format_time_seconds(stfe->end_time-stfe->start_time).data); + } + } + if (stfe && stfe->thread_started && !stfe->thread_completed) { + Assert(stfe->master_thread != nullptr); + if (thread_is_done(stfe->master_thread)) { + thread_deinit(stfe->master_thread, true); + stfe->thread_completed = true; + } + } + + if (all_drives_enumerated && Button("Save enumerated data")) { + if (!Serialize_ST_File_Enumeration(file_path)) { + log_error("Failed to write enumerated files"); + os_log_error(); + } + } + + if (all_drives_enumerated && Button("Reset State")) { + free_ex1_workspace_and_reset(); + free_stfe_and_reset(); + return; + } + + if (all_drives_enumerated) { + Ex1_show_enumeration_workspace(); } if (drives.count > 0 && !all_drives_enumerated && Button("Enumerate all NTFS drives")) { // && ex1_ntfs.initialized - // if drive count exceeds the number of threads, we need to group them so each thread - // can enumerate multiple drives. - // We need to distribute the drives across our available threads: - { run_multithreaded_enumeration_thread(); - return; - } - push_allocator(GPAllocator()); - Array> drive_split; - drive_split.allocator = temp(); // this is only needed for this frame - - if (drives.count > os_cpu_physical_core_count()) { - s32 thread_count = os_cpu_physical_core_count(); - array_resize(drive_split, thread_count); - ntfs_create_enumeration_threads(thread_count); - - s32 threads_to_create = thread_count; - s64 drives_per_thread = (drives.count / thread_count); - s64 remainder = drives.count % thread_count; - s64 current_drive = 0; - - for_each(d, drive_split) { - if (d == drive_split.count) { - drive_split[d] = ArrayView(remainder); - } else { - drive_split[d] = ArrayView(drives_per_thread); - } - - for (s64 i = 0; i < drive_split[d].count; i += 1) { - drive_split[d][i] = drives[current_drive]; - current_drive += 1; - } - } - - debug_break(); // #TODO: Check that the work has been distributed correctly. - } else { // more threads than drives, or same amount - s32 thread_count = (s32)drives.count; - array_resize(drive_split, drives.count); - ntfs_create_enumeration_threads(thread_count); - - for_each(d, drives) { - auto drive = drives[d]; - drive_split[d] = ArrayView(1); // Arrays of size one are sad :pensive: - - drive_split[d][0] = drive; - } - } - - s64 active_thread_count = drive_split.count; - - ex1_ntfs.threads_started = true; - for (s64 t = 0; t < active_thread_count; t += 1) { - Thread* thread = &ex1_ntfs.threads[t]; - Arena* thread_arena = next_arena(Arena_Reserve::Size_64K); - push_arena(thread_arena); - auto thread_data = New(); - thread_data->pool = thread_arena; - thread_data->drives = drive_split[t]; - thread_start(thread, thread_data); - array_add(ex1_ntfs.threads_in_flight, thread); - } - } - - if (ex1_ntfs.threads_in_flight.count) { - Text("Threads in flight: %d", ex1_ntfs.threads_in_flight.count); - - for_each(t, ex1_ntfs.threads_in_flight) { - if (thread_is_done(ex1_ntfs.threads_in_flight[t])) { - push_allocator(GPAllocator()); - - Thread* thread = ex1_ntfs.threads_in_flight[t]; - auto task = thread_task(NTFS_Enumeration_Task); - array_free(task->drives); - - // make sure to retreive any data you need to from here! - release_arena(task->pool); - - thread_deinit(ex1_ntfs.threads_in_flight[t], false); - array_unordered_remove_by_index(ex1_ntfs.threads_in_flight, t); - t -= 1; // check this element index again! - } - } - } - - if (ex1_ntfs.threads_started && !ex1_ntfs.threads_in_flight.count) { - // All threads are complete, we're free to clean up remaining memory - push_allocator(GPAllocator()); - array_free(ex1_ntfs.threads); - array_free(ex1_ntfs.threads_in_flight); - - // Instead maybe we should just memset this to zero. - reset_struct(&ex1_ntfs); - } - - // How do I tell when all files are enumerated? - // check drives[i]->data.paths.wstrings.count count? - if (all_drives_enumerated && Button("Save drive data")) { - string file_path = format_string_temp("%s_DriveData.bin", os_get_machine_name().data); - bool success = Serialize_Win32_Drives(drives, file_path); - if (!success) { log_error("Failed to save Win32_Drive data"); } - } - - if (all_drives_enumerated && Button("Clear all drive data")) { - os_clear_drive_data(); + os_run_file_enumeration_single_threaded(); + // os_run_file_enumeration_multithreaded(); // #disabled for now + return; } } @@ -296,14 +302,8 @@ void ImGui_Debug_Panel () { using namespace ImGui; push_allocator(temp()); Begin("Debug Panel"); - SeparatorText("ex1_ntfs"); - Text("Threads in flight count: %d", ex1_ntfs.threads_in_flight.count); - for_each(i, ex1_ntfs.threads) { - Text(" [%d] initialized: %d, has_context: %d, has_data: %d", - i, ex1_ntfs.threads[i].proc != nullptr, ex1_ntfs.threads[i].context != nullptr, ex1_ntfs.threads[i].data != nullptr); - } // #cpuid - // Text("[cpus] physical: %d, logical: %d, primary: %d, secondary: %d", os_cpu_physical_core_count(), os_cpu_logical_core_count(), os_cpu_primary_core_count(), os_cpu_secondary_core_count()); + Text("[cpus] physical: %d, logical: %d, primary: %d, secondary: %d", os_cpu_physical_core_count(), os_cpu_logical_core_count(), os_cpu_primary_core_count(), os_cpu_secondary_core_count()); { SeparatorText("Arena In-Use List"); lock_guard(&arena_free_list->mutex); for (u8 i = 0; i < Arena_Reserve_Count; i += 1) { @@ -338,6 +338,10 @@ void ImGui_Debug_Panel () { using namespace ImGui; SeparatorText("Child Threads"); SeparatorText("Errors"); ArrayView errors = get_all_errors(thread_context()); + if (errors.count && Button("Clear all errors")) { + clear_errors(thread_context()); + errors.count = 0; + } for_each(e, errors) { auto button_label = format_cstring("Clear##%d", e); if (Button(button_label)) { diff --git a/src/String_Analysis.cpp b/src/String_Analysis.cpp new file mode 100644 index 0000000..f61b923 --- /dev/null +++ b/src/String_Analysis.cpp @@ -0,0 +1,50 @@ +// 1. Count unique occurrences: + +s64 count_ascii_codepoints[128] = {}; +// s64 extended_codepoints[65536] = {}; + +Array unique_codepoints_utf32; + +force_inline void unique_codepoints_utf32_add_unique (u32 unique) { + array_add_if_unique(unique_codepoints_utf32, unique); +} + +void count_unique_chars_from_string (string s) { + string s_copy = s; + + while (s_copy.count > 0) { + if (s_copy.data[0] == 0x5C) { // `\` character + s_copy.data += 1; + s_copy.count -= 1; + } + u32 utf32_codepoint; s64 codepoint_source_length; + bool success = character_utf8_to_utf32(s_copy.data, s_copy.count, &utf32_codepoint, &codepoint_source_length); + + if (success) { + if (codepoint_source_length == 1) { + Assert(utf32_codepoint <= 127); + // Add to table + count_ascii_codepoints[utf32_codepoint] += 1; + } else { + unique_codepoints_utf32_add_unique(utf32_codepoint); + push_allocator(temp()); + string codepoint = copy_string(string_view(s_copy, 0, codepoint_source_length)); + log_todo("#TODO: keep track of unique codepoints. Codepoint: %s", codepoint); + } + } + + s_copy.data += codepoint_source_length; + s_copy.count -= codepoint_source_length; + } +} + +void count_unique_utf8_chars () { Timed_Block_Print("count_unique_utf8_chars"); + unique_codepoints_utf32.allocator = GPAllocator(); + + Assert(stfe != nullptr); + for (s64 i = 0; i < stfe->dirs.offsets->count; i += 1) { + string sample = get_file_string_view(stfe, i); + + count_unique_chars_from_string(sample); + } +} diff --git a/src/explorer_main.cpp b/src/explorer_main.cpp index a844810..5e0d0fa 100644 --- a/src/explorer_main.cpp +++ b/src/explorer_main.cpp @@ -44,7 +44,7 @@ void Explorer_ImGui_Application_Win32 () { // Setup Dear ImGui context IMGUI_CHECKVERSION(); - printf("ImGui Version %s \n", ImGui::GetVersion()); + log("ImGui Version %s \n", ImGui::GetVersion()); imgui_context = ImGui::CreateContext();