Replace multithreaded enumeration with single-threaded (temporarily) (#2)

Reviewed-on: #2
Co-authored-by: Musa Mahmood <Musasmahmood@gmail.com>
Co-committed-by: Musa Mahmood <Musasmahmood@gmail.com>
This commit is contained in:
Musa Mahmood 2025-12-17 15:18:19 +00:00 committed by Musa Mahmood
parent 5d44917853
commit d1182f3abd
27 changed files with 1458 additions and 520 deletions

View File

@ -2,7 +2,7 @@
// To generate the intermediate to see how many lines are being compiled, in x64 Native Tools Command Prompt for VS2022 or whatever
// cl /P /EP exe_main.cpp
// tokei exe_main.i
VERSION :: "0.1a";
VERSION :: "0.2";
#run,stallable build_cpp_project();
@ -101,7 +101,7 @@ os_target: Operating_System_Tag = .WINDOWS;
generate_meta_file :: (debug: bool) {
sb: String_Builder;
append(*sb, "#pragma once\n\n");
print_to_builder(*sb, "constexpr const char* MUSA_LIB_VERSION = \"%\";\n", VERSION);
print_to_builder(*sb, "const char* MUSA_LIB_VERSION = \"%\";\n", VERSION);
print_to_builder(*sb, "#define BUILD_DEBUG %\n", cast(s32)debug);
print_to_builder(*sb, "#define OS_WINDOWS %\n", ifx os_target == .WINDOWS then 1 else 0);

View File

@ -23,6 +23,7 @@
#include "lib/third_party/dear-imgui/imgui_impl_dx11.h"
#include "src/ImGui_Supplementary.cpp"
#include "src/String_Analysis.cpp"
#include "src/explorer_main.cpp"
#endif

View File

@ -254,3 +254,59 @@ struct Auto_Release {
}
}
};
// #FixedArena procedures:
FixedArena* bootstrap_fixed_arena (s64 size, Allocator backing_allocator) {
push_allocator(backing_allocator);
Assert(size >= sizeof(FixedArena));
ArrayView<u8> memory = ArrayView<u8>(size);
FixedArena* result = (FixedArena*)memory.data;
result->memory = memory;
result->cursor = sizeof(FixedArena);
result->allocator = backing_allocator;
return result;
}
force_inline void destroy_arena (FixedArena* arena) {
Delete(arena->allocator, arena);
}
Allocator allocator (FixedArena* arena) {
return { fixed_arena_allocator_proc, arena };
}
void* fixed_arena_allocator_proc (Allocator_Mode mode, s64 requested_size, s64 old_size, void* old_memory, void* allocator_data) {
constexpr s64 DEFAULT_ALIGNMENT = 16; // maybe make this modifiable as part of FixedArena struct?
FixedArena* arena = (FixedArena*)allocator_data;
Assert(arena != nullptr);
switch (mode) {
case Allocator_Mode::ALLOCATE: {
arena->cursor = Align<s64>(arena->cursor, DEFAULT_ALIGNMENT);
void* result = &arena->memory[arena->cursor];
arena->cursor += requested_size;
Assert(arena->cursor <= arena->memory.count);
return result;
} break;
case Allocator_Mode::RESIZE: {
arena->cursor = Align<s64>(arena->cursor, DEFAULT_ALIGNMENT);
void* result = &arena->memory[arena->cursor];
arena->cursor += requested_size;
Assert(arena->cursor <= arena->memory.count);
s64 size_to_copy = old_size < requested_size ? old_size : requested_size;
if (result && size_to_copy) { memcpy(result, old_memory, size_to_copy); }
return result;
} break;
case Allocator_Mode::DEALLOCATE: {
return nullptr; // unused
} break;
case Allocator_Mode::DETAILS: {
if (allocator_data == nullptr) {
return "fixed_arena_allocator_proc: data pointer is null!";
}
return "fixed_arena_allocator_proc: with valid data";
} break;
}
return nullptr;
}

View File

@ -138,4 +138,20 @@ struct Push_Alignment { // #rename to Arena_Push_Alignment?
// Do this later:
// arena_lock_pages :: (using arena: *Arena, start_address: *u8, byte_count: s64)
// arena_unlock_pages :: (using arena: *Arena, start_address: *u8, byte_count: s64)
// arena_unlock_pages :: (using arena: *Arena, start_address: *u8, byte_count: s64)
// #FixedArena is a super simple arena where you allocate a fixed block up front (fully committed),
// and use it as-is.
// #NOTE: we can save space be always backing with a known allocator (e.g. GPAllocator()).
struct FixedArena {
ArrayView<u8> memory;
s64 cursor;
Allocator allocator;
};
void* fixed_arena_allocator_proc (Allocator_Mode mode, s64 requested_size, s64 old_size, void* old_memory, void* allocator_data);
// #FixedArena API
FixedArena* bootstrap_fixed_arena (s64 size, Allocator backing_allocator);
force_inline void destroy_arena (FixedArena* arena);
Allocator allocator (FixedArena* arena);

View File

@ -86,6 +86,17 @@ void release_arena (Arena* arena, bool delete_extra_pages) {
// }
}
s64 bytes_in_use (ArrayView<Arena*> arenas) {
// does not include overhead from committed pages!
s64 sum = 0;
for (s64 i = 0; i < arenas.count; i += 1) {
sum += arena_usage_bytes(arenas[i]);
}
return sum;
}
s64 committed_bytes (ArrayView<Arena*> arenas) {
s64 sum = 0;

View File

@ -182,8 +182,6 @@ void array_add (Array<T>& src, T new_item) {
src.data[src.count] = new_item;
src.count += 1;
// auto dst_ptr = &src.data[src.count-1];
// memcpy(dst_ptr, &new_item, sizeof(T));
}
template <typename T>
@ -194,6 +192,16 @@ s64 array_find (Array<T>& src, T item) {
return -1;
}
template <typename T>
bool array_add_if_unique (Array<T>& src, T new_item) {
if (array_find(src, new_item) == -1) {
array_add(src, new_item);
return true;
}
return false;
}
template <typename T>
void array_ordered_remove_by_index (Array<T>& src, s64 index) {
Assert(index >= 0); Assert(index < src.count);

View File

@ -3,6 +3,9 @@
#define LANG_CPP 1
#define BUILD_CONSOLE_INTERFACE BUILD_DEBUG
#include <stdio.h> // vsnprintf
#include <cstdarg> // va_list, ...
#if ARCH_CPU_X64
#include "CPU_X64.cpp"
#define PLATFORM_MEMORY_PAGE_SIZE 4096
@ -13,15 +16,12 @@
#error "CPU not supported (yet)!"
#endif
#include <stdio.h> // vsnprintf
#include <cstdarg> // va_list, ...
#if OS_WINDOWS
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#undef ERROR // why...
#undef NO_ERROR // ugh...
#include <winioctl.h>
#else
#error "This configuration is NOT supported. Only Windows with MSVC is currently supported."
#endif
@ -185,7 +185,7 @@ force_inline s64 Next_Power_Of_Two(s64 v) {
#define auto_release(x) \
Auto_Release Concat(_auto_release_guard_, __LINE__)(x)
#define auto_release_temp() \
auto_release(thread_context()->temp);
auto_release(thread_context()->temp)
#define thread_context() thread_local_context
#define temp() allocator(thread_context()->temp)

View File

@ -63,4 +63,4 @@ void temp_reset_keeping_memory() {
void temp_reset() { // alias: reset_temporary_storage.
Thread_Context* context = thread_context();
arena_reset(context->temp, true);
}
}

View File

@ -1,11 +1,21 @@
#if OS_WINDOWS
#include <intrin.h>
int cpu_max_feature_leaf () {
int cpuInfo[4];
__cpuid(cpuInfo, 0);
int maxLeaf = cpuInfo[0];
return maxLeaf;
}
int CPU_Base_Frequency() {
int cpuInfo[4] = {0};
// Call CPUID with EAX = 0x16 (Base CPU Frequency)
__cpuid(cpuInfo, 0x16);
if (cpu_max_feature_leaf() >= 0x16)
__cpuid(cpuInfo, 0x16);
return cpuInfo[0];
}

View File

@ -4,7 +4,8 @@ enum class ErrorClass: s32 {
NONE = 0, // should not be used, just to avoid a default value being assigned.
WARNING = 1,
ERROR = 2,
FATAL = 3
FATAL = 3,
TODO = 4,
};
// #downcasts to string
@ -38,6 +39,10 @@ char* error_severity (ErrorClass severity) {
case ErrorClass::FATAL: {
return "[FATAL ERROR]";
} break;
case ErrorClass::TODO: {
return "[TODO]";
} break;
}
return "";
}
@ -48,6 +53,9 @@ string to_string (Error* error) {
return { error->count, error->data };
}
#define log_todo(fmt, ...) \
Log_Error_2(__FILE__, __FUNCTION__, __LINE__, ErrorClass::TODO, fmt, ##__VA_ARGS__)
#define log_fatal_error(fmt, ...) \
Log_Error_2(__FILE__, __FUNCTION__, __LINE__, ErrorClass::FATAL, fmt, ##__VA_ARGS__)
@ -136,6 +144,7 @@ void push_error (Thread_Context* tctx, Error* new_error) {
tctx->current_error = new_error;
switch (new_error->severity) {
case ErrorClass::TODO:
case ErrorClass::NONE:
case ErrorClass::WARNING: {
print(to_string(new_error));

View File

@ -107,8 +107,15 @@ Allocator allocator (ExpandableArena* arena_ex) {
}
// #TODO: currently this keeps the final arena's memory. Fix this!
// This is not implemented correctly!
void arena_reset_to (ExpandableArena* arena_ex, Arena* last_arena, u8* starting_point) {
// going backwards from end of arena list
if (!arena_ex->next_arenas.count) {
arena_ex->current_point = starting_point;
return;
}
// for (s64 i = arena_ex->next_arenas.count-1; i >= 0; i -= 1) {
for_each_reverse(i, arena_ex->next_arenas) {
Arena* arena = arena_ex->next_arenas[i];

128
lib/Base/RadixSort.cpp Normal file
View File

@ -0,0 +1,128 @@
struct RadixSort {
ArrayView<u32> ranks;
ArrayView<u32> ranks2;
Allocator allocator;
bool valid_ranks;
};
void radix_sort_init (RadixSort* r, u32 items_to_allocate) {
if (r->allocator.proc == nullptr) {
r->allocator = context_allocator();
}
push_allocator(r->allocator);
r->ranks = ArrayView<u32>(items_to_allocate);
r->ranks2 = ArrayView<u32>(items_to_allocate);
r->valid_ranks = false;
}
void radix_sort_free (RadixSort* r) {
Assert(r->allocator.proc != nullptr);
push_allocator(r->allocator);
array_free(r->ranks);
array_free(r->ranks2);
}
// RadixSort provides an array of indices in sorted order.
u32 rank (RadixSort* r, s64 i) {
Assert(r != nullptr);
#if ARRAY_ENABLE_BOUNDS_CHECKING
if (i < 0 || i >= r->ranks.count) { debug_break(); /*INDEX OOB*/ }
#endif
return r->ranks[i];
}
template <typename T> void create_histograms (RadixSort* r, T* buffer, u32 count, u32* histogram) {
constexpr u32 bucket_count = sizeof(T);
// Init bucket pointers:
u32* h[bucket_count] = {};
for (u32 i = 0; i < bucket_count; i += 1) {
h[i] = histogram + (256 * i);
}
// Build histogram:
u8* p = (u8*)buffer;
u8* pe = (p + count * sizeof(T));
while (p != pe) {
h[0][*p] += 1; p += 1;
if (bucket_count > 1) { // how to make compile time if?
h[1][*p] += 1; p += 1;
if (bucket_count > 2) {
h[2][*p] += 1; p += 1;
h[3][*p] += 1; p += 1;
if (bucket_count == 8) {
h[4][*p] += 1; p += 1;
h[5][*p] += 1; p += 1;
h[6][*p] += 1; p += 1;
h[7][*p] += 1; p += 1;
}
}
}
}
}
template <typename T> void radix_sort (RadixSort* r, T* input, u32 count) {
constexpr u32 T_SIZE = sizeof(T);
// Allocate histograms & offsets on the stack:
u32 histogram [256 * T_SIZE] = {};
u32* link [256];
create_histograms(r, input, count, histogram);
// Radix sort, j is the pass number, (0 = LSB, P = MSB)
for (u32 j = 0; j < T_SIZE; j += 1) {
u32* h = &histogram[j * 256];
u8* input_bytes = (u8*)input;
input_bytes += j; // Assumes little endian!
if (h[input_bytes[0]] == count) {
continue;
}
// Create offsets
link[0] = r->ranks2.data;
for (u32 i = 1; i < 256; i += 1) { // 1..255
link[i] = link[i-1] + h[i-1];
}
// Perform Radix Sort
if (!r->valid_ranks) {
for (u32 i = 0; i < count; i += 1) {
*link[input_bytes[i*T_SIZE]] = i;
link[input_bytes[i*T_SIZE]] += 1;
}
r->valid_ranks = true;
} else {
for (u32 i = 0; i < count; i += 1) {
u32 idx = r->ranks[i];
*link[input_bytes[idx*T_SIZE]] = idx;
link[input_bytes[idx*T_SIZE]] += 1;
}
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in ranks after the swap.
ArrayView<u32> ranks2_temp = r->ranks2;
r->ranks2 = r->ranks;
r->ranks = ranks2_temp;
}
// All values were equal; generate linear ranks
if (!r->valid_ranks) {
for (u32 i = 0; i < count; i += 1) {
r->ranks[i] = i;
r->valid_ranks = true;
}
}
}
// NOTE: For a small number of elements it's more efficient to use insertion sort
void radix_sort_u64 (RadixSort* r, u64* input, u32 count) {
if (input == nullptr || count == 0) return;
if (r->ranks.count == 0) {
radix_sort_init(r, count);
}
radix_sort(r, input, count);
}

View File

@ -146,4 +146,19 @@ force_inline void ReadString16 (Deserializer* ds, string& s) { // #no_alloc
ReadStringView(ds, s, (s64)str_len);
}
// ReadString_MakeCopy...
// This is specialized for filesystem storage of strings.
force_inline u32 AddString_NoCount (Serializer* serializer, u8* data, s16 count) {
u32 original_count = (u32)serializer->count;
u8* current_point = &serializer->data[original_count];
s64 final_count = serializer->allocated + (count * sizeof(u8));
if (serializer->allocated < final_count) {
array_reserve(*serializer, final_count);
}
memcpy(current_point, data, count * sizeof(u8));
serializer->count += count * sizeof(u8);
return original_count;
}

View File

@ -70,3 +70,14 @@ struct Thread_Group {
bool started = false;
bool should_exit = false;
};
// This might be too slow.
s32 get_thread_index (Thread_Group* group, s32 thread_index) {
for_each(w, group->worker_info) {
if (group->worker_info[w].thread.index == thread_index) {
return (s32)w; // zero-indexed to thread group
}
}
return -1;
}

52
lib/Base/Unicode.cpp Normal file
View File

@ -0,0 +1,52 @@
constexpr u8 trailing_bytes_for_utf8[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5};
constexpr u32 UNI_REPLACEMENT_CHAR = 0x0000FFFD;
constexpr u8 utf8_inital_byte_mask[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
constexpr u32 UNI_MAX_UTF32 = 0x7FFFFFFF;
bool character_utf8_to_utf32 (u8* data, s64 source_length, u32* utf32, s64* source_length_out) {
u8 first_character = data[0];
s32 continuation_bytes = trailing_bytes_for_utf8[first_character];
if ((continuation_bytes + 1) > source_length) {
(*utf32) = UNI_REPLACEMENT_CHAR;
(*source_length_out) = source_length;
return false;
}
u32 ch = data[0] & utf8_inital_byte_mask[continuation_bytes];
for (s64 i = 1; i < continuation_bytes + 1; i += 1) {
ch = ch << 6;
//if strict ...
ch |= data[i] & 0x3F;
}
// #if strict... {}
(*utf32) = ch;
(*source_length_out) = continuation_bytes + 1;
if (ch > UNI_MAX_UTF32) {
(*utf32) = UNI_REPLACEMENT_CHAR;
}
return true;
}
bool next_utf8_to_utf32 (string& s, u32* utf32_char_out) {
s64 codepoint_source_length;
bool success = character_utf8_to_utf32(s.data, s.count, utf32_char_out, &codepoint_source_length);
s.data += codepoint_source_length;
s.count -= codepoint_source_length;
Assert(s.count >= 0);
return success;
}

View File

@ -1,5 +1,5 @@
void run_pre_setup_tests() {
// #no_context: context will not be initialized at this point.
// #no_context: context will not be initialized at this point, so log() doesn't work
printf("Running pre-setup tests...\n");
printf("\nFinished running pre-setup tests...\n");
}

View File

@ -21,23 +21,6 @@
// if we need ordered insertions and deletes.
//
// Returns offset
force_inline u32 AddString_NoCount (Serializer* serializer, u8* data, u8 count) { // #TODO: , bool null_terminate=false
u32 original_count = (u32)serializer->count;
u8* current_point = &serializer->data[original_count];
s64 final_count = serializer->allocated + (count * sizeof(u8));
if (serializer->allocated < final_count) {
array_reserve(*serializer, final_count);
}
memcpy(current_point, data, count * sizeof(u8));
serializer->count += count * sizeof(u8);
return original_count;
}
constexpr s64 DFS_Preallocation_Count = 4194304; // 2^22
// template <typename Length_Type>

View File

@ -1,10 +1,10 @@
// #TODO: #OS_Win32
// [ ] #Exception handling code in `Win32_Exception_Filter`
// [~] #Thread cleanup: in `thread_deinit` is there any requirement to cleanup child threads?
// - I think no? Threads should handle their own lifetimes, and the parent threads should ensure child threads are complete
// - I think: no? Threads should handle their own lifetimes, and the parent threads should ensure child threads are complete
// before terminating.
// Or we can move child threads up to the parent?
#if OS_WINDOWS
constexpr s64 FILETIME_TO_UNIX = 116444736000000000i64;
f64 GetUnixTimestamp () {
FILETIME fileTime;
@ -31,7 +31,20 @@ u64 FILETIME_to_ticks (FILETIME fileTime) {
return ticks;
}
#endif
string format_time_datetime (FILETIME ft) {
SYSTEMTIME stUTC, st;
FileTimeToSystemTime(&ft, &stUTC);
SystemTimeToTzSpecificLocalTime(nullptr, &stUTC, &st);
return format_string("%04u-%02u-%02u %02u:%02u:%02u.%03u",
st.wYear,
st.wMonth,
st.wDay,
st.wHour,
st.wMinute,
st.wSecond,
st.wMilliseconds);
}
struct OS_System_Info {
// #cpuid
@ -240,7 +253,6 @@ internal void Win32_Entry_Point (int argc, WCHAR **argv) {
// [ ] Get Working directory (info->working_path)
// [ ] GetEnvironmentStringsW
temp_reset();
printf("Hello there!\n\n");
}
C_LINKAGE DWORD OS_Windows_Thread_Entry_Point (void* parameter) {
@ -1117,323 +1129,361 @@ string os_get_machine_name () {
// [ ] get_mouse_pointer_position
// [ ] ... What APIs do I need for Keyboard
struct Enumeration_Work {
string first_directory;
s32 parent_index;
Arena* thread_arena; // pointer to relevant tctx->arena
// Directories
ArenaArray<u32>* d_offsets;
ArenaArray<s16>* d_lengths;
ArenaArray<s32>* d_parent_indices;
ArenaArray<u64>* d_sizes;
ArenaArray<u64>* d_modtime;
// Files
ArenaArray<u32>* offsets;
ArenaArray<s16>* lengths;
ArenaArray<s32>* parent_indices;
ArenaArray<u64>* sizes;
ArenaArray<u64>* modtime;
};
// #FileEnumerationST
struct Files_Combined_Results {
// ArenaArray<string> full_path;
ArenaArray<string>* name;
ArenaArray<s32>* parent_indices;
struct STFE_Results {
Serializer* strings; // Serializer?
ArenaArray<u32>* offsets;
ArenaArray<s16>* lengths;
ArenaArray<u64>* sizes;
ArenaArray<u64>* modtime;
ArenaArray<u64>* modtimes;
};
struct Drive_Enumeration { // master thread struct
Arena* arena;
void init (STFE_Results* results) {
results->strings = (Serializer*)arena_array_new<u8> (1024*1024*4*16, Arena_Reserve::Size_2G);
results->offsets = arena_array_new<u32>(1024*1024*4, Arena_Reserve::Size_2G);
results->lengths = arena_array_new<s16>(1024*1024*4, Arena_Reserve::Size_2G);
results->sizes = arena_array_new<u64>(1024*1024*4, Arena_Reserve::Size_2G);
results->modtimes = arena_array_new<u64>(1024*1024*4, Arena_Reserve::Size_2G);
}
void STFE_Results_Free (STFE_Results* results) {
arena_array_free(*results->strings);
arena_array_free(*results->offsets);
arena_array_free(*results->lengths);
arena_array_free(*results->sizes);
arena_array_free(*results->modtimes);
}
struct ST_File_Enumeration { // global state
ArrayView<OS_Drive*> drives;
Thread* master_thread;
s32 thread_count;
STFE_Results dirs;
STFE_Results files;
s32 directories_enumerated; // going sequentially
bool thread_started;
bool thread_completed;
Files_Combined_Results paths;
Files_Combined_Results files;
s32 work_added = 0;
s32 work_completed = 0;
f64 start_time;
f64 end_time;
};
void push_root (Drive_Enumeration* de, string label, s32 index) {
array_add(*de->paths.name, label);
array_add(*de->paths.parent_indices, index);
array_add(*de->paths.sizes, (u64)0);
array_add(*de->paths.modtime, (u64)0);
}
global ST_File_Enumeration* stfe;
global Drive_Enumeration* drive_enumeration;
string directory_get_full_path (Drive_Enumeration* de, s64 index) {
push_allocator(GPAllocator()); // to copy from String_Builder
Files_Combined_Results* f = &de->paths;
string dir_name = (*f->name)[index];
s32 parent_index = (*f->parent_indices)[index];
s32 next_parent = (*f->parent_indices)[parent_index];
Array<string> paths;
paths.allocator = temp();
array_add(paths, (*f->name)[parent_index]);
while (parent_index != next_parent) {
parent_index = next_parent;
next_parent = (*f->parent_indices)[parent_index];
array_add(paths, (*f->name)[parent_index]);
}
// while (parent_index > -1) { // should be while(true)
//
// s32 next_parent = (*f->parent_indices)[parent_index];
// if (parent_index == next_parent) break;
// s32 parent_index = next_parent;
// }
// go in reverse order and add together string
String_Builder* sb = new_string_builder(Arena_Reserve::Size_64K);
for (s64 i = paths.count-1; i >= 0; i -= 1) {
append(sb, paths[i]);
append(sb, "\\");
}
append(sb, dir_name);
return builder_to_string(sb);
}
void update_results (Drive_Enumeration* de, Enumeration_Work* ew) {
// merge results and release resources!
// unfortunately this is a LOT of copying!
for_each(i, (*ew->d_offsets)) {
u8* string_ptr = (ew->thread_arena->memory_base + (*ew->d_offsets)[i]);
string name = {(*ew->d_lengths)[i], string_ptr};
array_add(*de->paths.name, name);
array_add(*de->paths.parent_indices, (*ew->d_parent_indices)[i]);
array_add(*de->paths.sizes, (*ew->d_sizes)[i]);
array_add(*de->paths.modtime, (*ew->d_modtime)[i]);
}
for_each(i, (*ew->offsets)) {
u8* string_ptr = (ew->thread_arena->memory_base + (*ew->offsets)[i]);
string name = {(*ew->lengths)[i], string_ptr};
array_add(*de->files.name, name);
array_add(*de->files.parent_indices, (*ew->parent_indices)[i]);
array_add(*de->files.sizes, (*ew->sizes)[i]);
array_add(*de->files.modtime, (*ew->modtime)[i]);
}
}
void add_record (Enumeration_Work* ew, WIN32_FIND_DATAW* find_data, string name, s32 parent_index=-1) {
u32 offset = (u32)(name.data - ew->thread_arena->memory_base);
bool is_directory = (find_data->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0;
u64 size = ((u64)find_data->nFileSizeHigh << 32) | ((u64)find_data->nFileSizeLow & 0xFFFFFFFF);
if (is_directory) {
array_add((*ew->d_offsets), offset);
array_add((*ew->d_lengths), (s16)name.count);
array_add((*ew->d_parent_indices), parent_index); // #TODO #parent_index
array_add((*ew->d_sizes), size);
array_add((*ew->d_modtime), FILETIME_to_ticks(find_data->ftLastWriteTime));
} else {
array_add((*ew->offsets), offset);
array_add((*ew->lengths), (s16)name.count);
array_add((*ew->parent_indices), parent_index); // #TODO #parent_index
array_add((*ew->sizes), size);
array_add((*ew->modtime), FILETIME_to_ticks(find_data->ftLastWriteTime));
}
}
Thread_Continue_Status file_enumeration_thread_group_proc (Thread_Group* group, Thread* thread, void* work) {
// 1. setup userdata as an Arena*:
Arena* result_arena;
if (!thread->context->userdata) {
result_arena = next_arena(Arena_Reserve::Size_64G);
thread->context->userdata = result_arena;
} else {
result_arena = (Arena*)thread->context->userdata;
}
Enumeration_Work* enum_work = (Enumeration_Work*)work;
enum_work->thread_arena = (Arena*)thread->context->userdata;
enum_work->d_offsets = arena_array_new<u32>(4096, Arena_Reserve::Size_2M);
enum_work->d_lengths = arena_array_new<s16>(4096, Arena_Reserve::Size_2M);
enum_work->d_parent_indices = arena_array_new<s32>(4096, Arena_Reserve::Size_2M);
enum_work->d_sizes = arena_array_new<u64>(4096, Arena_Reserve::Size_2M);
enum_work->d_modtime = arena_array_new<u64>(4096, Arena_Reserve::Size_2M);
enum_work->offsets = arena_array_new<u32>(4096, Arena_Reserve::Size_2M);
enum_work->lengths = arena_array_new<s16>(4096, Arena_Reserve::Size_2M);
enum_work->parent_indices = arena_array_new<s32>(4096, Arena_Reserve::Size_2M);
enum_work->sizes = arena_array_new<u64>(4096, Arena_Reserve::Size_2M);
enum_work->modtime = arena_array_new<u64>(4096, Arena_Reserve::Size_2M);
// Validate thread context?
push_allocator(temp());
auto_release_temp();
// log("file_enumeration_thread_group_proc, thread index: %d", thread->index);
// MAKE SURE PATH IS NULL TERMINATED!
wstring wildcard_name = utf8_to_wide(format_string("%s\\*", enum_work->first_directory.data)); // #temp
WIN32_FIND_DATAW find_data;
HANDLE h = FindFirstFileExW((LPCWSTR)wildcard_name.data, FindExInfoBasic, &find_data,
FindExSearchNameMatch, nullptr, FIND_FIRST_EX_LARGE_FETCH);
if (h == INVALID_HANDLE_VALUE) {
return Thread_Continue_Status::CONTINUE;
}
while (true) {
push_arena(result_arena);
string name = wide_to_utf8((u16*)find_data.cFileName); // #NOT_TEMP
bool should_continue = (name.count == 0 || name == "." || name == "..");
if (should_continue) {
bool success = FindNextFileW(h, &find_data);
if (!success)
break;
continue;
}
add_record(enum_work, &find_data, name, enum_work->parent_index);
bool success = FindNextFileW(h, &find_data);
if (!success) break;
}
FindClose(h);
return Thread_Continue_Status::CONTINUE;
}
s64 multithreaded_file_enumeration_master_proc (Thread* thread) {
auto task = thread_task(Drive_Enumeration);
push_arena(task->arena);
Thread_Group* file_enum_thread_group = New<Thread_Group>();
s32 thread_count = os_cpu_physical_core_count();
void free_stfe_and_reset () {
push_allocator(GPAllocator());
thread_group_init(file_enum_thread_group, thread_count, file_enumeration_thread_group_proc, true);
array_free(stfe->drives);
internal_free(stfe->master_thread);
STFE_Results_Free(&stfe->dirs);
STFE_Results_Free(&stfe->files);
internal_free(stfe);
stfe = nullptr; // final step
}
string add_record (ST_File_Enumeration* stfe, string full_path, bool is_directory, WIN32_FIND_DATAW* find_data) {
// return the string copy!
if (is_directory) {
STFE_Results* r = &stfe->dirs;
u32 offset = AddString_NoCount(r->strings, full_path.data, (s16)full_path.count);
array_add((*r->offsets), offset);
array_add((*r->lengths), (s16)full_path.count);
// No size for directories.
u64 modtime = FILETIME_to_ticks(find_data->ftLastWriteTime);
array_add((*r->modtimes), modtime);
string path_copy = {full_path.count, &r->strings->data[offset]};
return path_copy;
} else {
STFE_Results* r = &stfe->files;
u32 offset = AddString_NoCount(r->strings, full_path.data, (s16)full_path.count);
array_add((*r->offsets), offset);
array_add((*r->lengths), (s16)full_path.count);
u64 size = ((u64)find_data->nFileSizeHigh << 32) | ((u64)find_data->nFileSizeLow & 0xFFFFFFFF);
u64 modtime = FILETIME_to_ticks(find_data->ftLastWriteTime);
array_add((*r->sizes), size);
array_add((*r->modtimes), modtime);
string path_copy = {full_path.count, &r->strings->data[offset]};
return path_copy;
}
Assert(false);
return {};
}
s32 count_paths (ST_File_Enumeration* stfe) {
STFE_Results* r = &stfe->dirs;
return (s32)r->offsets->count;
}
s32 count_files (ST_File_Enumeration* stfe) {
STFE_Results* r = &stfe->files;
return (s32)r->offsets->count;
}
// #UI #TEMP - just for visualization!
string get_file_copy (ST_File_Enumeration* stfe, s64 index) {
STFE_Results* r = &stfe->files;
Assert(index >= 0 && index < count_files(stfe));
s64 strlength = (*r->lengths)[index];
u32 offset = (*r->offsets)[index];
u8* string_ptr = &r->strings->data[offset];
string file = {strlength, string_ptr};
return copy_string(file);
}
string get_file_string_view (ST_File_Enumeration* stfe, s64 index) {
STFE_Results* r = &stfe->files;
s64 strlength = (*r->lengths)[index];
u32 offset = (*r->offsets)[index];
u8* string_ptr = &r->strings->data[offset];
string file = {strlength, string_ptr};
return file;
}
string get_path_copy (ST_File_Enumeration* stfe, s64 index) {
STFE_Results* r = &stfe->dirs;
Assert(index >= 0 && index < count_paths(stfe));
s64 strlength = (*r->lengths)[index];
u32 offset = (*r->offsets)[index];
u8* string_ptr = &r->strings->data[offset];
string path = {strlength, string_ptr};
return copy_string(path);
}
s64 get_file_size_bytes (ST_File_Enumeration* stfe, s64 index) {
STFE_Results* r = &stfe->files;
return (s64)(*r->sizes)[index];
}
FILETIME get_file_modtime (ST_File_Enumeration* stfe, s64 index) {
STFE_Results* r = &stfe->files;
FILETIME ft;
memcpy(&ft, &(*r->modtimes)[index], sizeof(u64));
return ft;
}
FILETIME get_path_modtime (ST_File_Enumeration* stfe, s64 index) {
STFE_Results* r = &stfe->dirs;
FILETIME ft;
memcpy(&ft, &(*r->modtimes)[index], sizeof(u64));
return ft;
}
s64 win32_file_enum_thread_proc (Thread* thread) {
auto task = thread_task(ST_File_Enumeration);
init(&task->dirs);
init(&task->files);
// Allocates to thread_context->arena, which is cleaned up
// when the thread completes. see: thread_deinit
Array<string> paths_to_enumerate;
for_each(d, task->drives) {
auto work = New<Enumeration_Work>(GPAllocator()); //replace with arena bootstrap?
work->first_directory = task->drives[d]->label; // this includes the colon-slash, (e.g. `C:\`).
work->parent_index = (s32)d; // #HACK?
// add label root to combined results, so we can look it up later!
push_root(task, work->first_directory, work->parent_index);
add_work(file_enum_thread_group, work);
task->work_added += 1;
}
start(file_enum_thread_group);
// set task completed.
s64 path_index = task->drives.count;
// #TODO: Get completed work!
while (task->work_completed < task->work_added) {
auto_release_temp();
ArrayView<void*> cw = get_completed_work(file_enum_thread_group);
for_each(i, cw) {
auto ew = (Enumeration_Work*)cw[i];
update_results(task, ew);
arena_array_free(*ew->d_offsets, false);
arena_array_free(*ew->d_lengths, false);
arena_array_free(*ew->d_parent_indices, false);
arena_array_free(*ew->d_sizes, false);
arena_array_free(*ew->d_modtime, false);
arena_array_free(*ew->offsets, false);
arena_array_free(*ew->lengths, false);
arena_array_free(*ew->parent_indices, false);
arena_array_free(*ew->sizes, false);
arena_array_free(*ew->modtime, false);
string_free(ew->first_directory);
internal_free(ew);
string parent_directory = task->drives[d]->label; // includes a trailing slash
if (parent_directory.data[2] == (u8)'\\') {
parent_directory.count -= 1; //#hack to quickly remove trailing slash.
}
task->work_completed += (s32)cw.count;
array_add(paths_to_enumerate, parent_directory);
// For each new directory:
// s64 dirs_to_enumerate = task->paths.name->count - path_index;
for (s64 i = path_index; i < task->paths.name->count; i += 1) {
auto work = New<Enumeration_Work>(GPAllocator());
work->first_directory = directory_get_full_path(task, i);// need full name here!
work->parent_index = (s32)i;
while (paths_to_enumerate.count > 0) {
push_allocator(temp());
auto_release_temp();
// This needs to be null-terminated:
// #TODO: Replace this #LIFO array with an arena-backed FIFO stack (singly linked-list).
string next_directory = copy_string(pop(paths_to_enumerate)); // LIFO. maybe not the best way?
wstring wildcard_name = utf8_to_wide(format_string("%s\\*", next_directory.data));
add_work(file_enum_thread_group, work);
WIN32_FIND_DATAW find_data;
HANDLE h = FindFirstFileExW((LPCWSTR)wildcard_name.data, FindExInfoBasic, &find_data,
FindExSearchNameMatch, nullptr, FIND_FIRST_EX_LARGE_FETCH);
if (h == INVALID_HANDLE_VALUE) {
log_error("FindFirstFileExW failed for %s", wide_to_utf8(wildcard_name.data, (s32)wildcard_name.count).data);
os_log_error();
continue;
}
task->work_added += 1;
}
path_index = task->paths.name->count;
Sleep(1);
log("work completed: %d/%d",task->work_completed, task->work_added);
}
while (true) { auto_release_temp();
string name = wide_to_utf8((u16*)find_data.cFileName);
bool should_continue = (name.count == 0 || name == "." || name == "..");
if (should_continue) {
bool success = FindNextFileW(h, &find_data);
if (!success) { break; }
continue;
}
bool is_directory = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0;
string full_path = format_string("%s\\%s", next_directory.data, name.data);
string full_path_copy = add_record(task, full_path, is_directory, &find_data);
if (is_directory) {
array_add(paths_to_enumerate, full_path_copy);
}
bool success = FindNextFileW(h, &find_data);
if (!success) break;
} // while (true) -> FindNextFileW
FindClose(h);
} // while (parent_directory)
} // for_each(d, drives)
shutdown(file_enum_thread_group);
task->end_time = GetUnixTimestamp();
task->thread_completed = true;
return 0;
}
void initialize (Files_Combined_Results* fcr) {
fcr->name = arena_array_new<string>(4194304, Arena_Reserve::Size_2G); // 2GB @ 16-byte strings => 134.2M entries. 64 might be better here for really large file collections!
fcr->parent_indices = arena_array_new<s32>(4194304, Arena_Reserve::Size_2G);
fcr->sizes = arena_array_new<u64>(4194304, Arena_Reserve::Size_2G);
fcr->modtime = arena_array_new<u64>(4194304, Arena_Reserve::Size_2G);
}
void run_multithreaded_enumeration_thread () {
// Need some struct to track the state of this operation.
Arena* arena = next_arena(Arena_Reserve::Size_64K);
push_arena(arena);
void os_run_file_enumeration_single_threaded () {
push_allocator(GPAllocator());
drive_enumeration = New<Drive_Enumeration>();
(*drive_enumeration) = {
arena,
os_get_available_drives(),
stfe = New<ST_File_Enumeration>();
(*stfe) = {
os_get_available_drives(),
New<Thread>(),
os_cpu_physical_core_count(),
0, false, false, {}, {},
0, 0
STFE_Results(), STFE_Results(),
0, true, false, GetUnixTimestamp(), 0
};
initialize(&drive_enumeration->paths);
initialize(&drive_enumeration->files);
// We start 1 thread to run the thread group and track the threads
string thread_name = "Multithreaded Enumeration: Master Thread";
bool success = thread_init(drive_enumeration->master_thread,
multithreaded_file_enumeration_master_proc, thread_name);
string thread_name = "Single Thread Enumeration - Master Thread";
bool success = thread_init(stfe->master_thread, win32_file_enum_thread_proc, thread_name);
if (!success) {
log_error("Failed to initialize thread (stft->master_thread)");
os_log_error();
}
Assert(success);
thread_start(drive_enumeration->master_thread, drive_enumeration);
drive_enumeration->thread_started = true;
thread_start(stfe->master_thread, stfe);
}
bool file_enum_multithreading_started () {
if (drive_enumeration == nullptr) return false;
return drive_enumeration->thread_started;
constexpr u32 STFE_Magic_Number = 0x19075fee;
bool Serialize_ST_File_Enumeration (string file_path) {
Timed_Block_Print("Serialize_ST_File_Enumeration");
File f = file_open(file_path, true, false, true);
if (!file_is_valid(f)) return false;
bool success = true;
// #TODO #Serialization Unfortunately, there's a lot of needless copying here
// it would be a lot nicer if we could just write-file in place. idk how to do that though ;_;
Serializer* s = new_serializer(Arena_Reserve::Size_64G);
Add(s, (u32)STFE_Magic_Number);
Add(s, (s32)stfe->drives.count);
// Dirs:
STFE_Results* r = &stfe->dirs;
AddArray(s, to_view(*r->strings));
AddArray(s, to_view(*r->offsets));
AddArray(s, to_view(*r->lengths));
AddArray(s, to_view(*r->modtimes));
// Files:
r = &stfe->files;
AddArray(s, to_view(*r->strings));
AddArray(s, to_view(*r->offsets));
AddArray(s, to_view(*r->lengths));
AddArray(s, to_view(*r->sizes));
AddArray(s, to_view(*r->modtimes));
success = file_write(&f, to_view(*s));
reset_serializer(s);
file_close(&f);
free_serializer(s);
return success;
}
bool file_enum_multithreading_active () {
if (drive_enumeration == nullptr) return false;
if (drive_enumeration->thread_completed) {
return false;
bool Deserialize_ST_File_Enumeration (string file_path) {
Timed_Block_Print("Deserialize_ST_File_Enumeration");
push_allocator(GPAllocator());
if (!stfe) stfe = New<ST_File_Enumeration>();
(*stfe) = {
{},
{},
STFE_Results(), STFE_Results(),
0, false, false, GetUnixTimestamp(), 0
};
push_allocator(temp());
auto_release_temp();
Deserializer deserializer = read_entire_file(file_path, true);
if (deserializer.count == 0) return false;
auto d = &deserializer;
u32 magic_number; s32 drive_count;
Read(d, &magic_number);
Assert(magic_number == STFE_Magic_Number);
Read(d, &drive_count);
init(&stfe->dirs);
init(&stfe->files);
STFE_Results* r = &stfe->dirs;
ReadToArenaArray(d, r->strings);
ReadToArenaArray(d, r->offsets);
ReadToArenaArray(d, r->lengths);
ReadToArenaArray(d, r->modtimes);
r = &stfe->files;
ReadToArenaArray(d, r->strings);
ReadToArenaArray(d, r->offsets);
ReadToArenaArray(d, r->lengths);
ReadToArenaArray(d, r->sizes);
ReadToArenaArray(d, r->modtimes);
stfe->thread_started = true;
stfe->thread_completed = true;
stfe->end_time = GetUnixTimestamp();
return true;
}
// #USNJrnl stuff:
// This should work even if our other indices are not ready yet!
bool USN_Journal_Monitoring_Ready(OS_Drive* drive) {
return (drive->jrnl.hVol != nullptr && drive->jrnl.hVol != INVALID_HANDLE_VALUE);
}
void Win32_Enable_USN_Journal_Monitoring (ArrayView<OS_Drive*> drives) {
push_allocator(temp());
// #TODO: Put any relevant data into Win32_Drive.
for_each(d, drives) {
OS_Drive* drive = drives[d];
if (drive->jrnl.no_permission) continue;
if (USN_Journal_Monitoring_Ready(drive)) continue;
string drive_letter = Win32_drive_letter(drive->label);
string create_file_target = format_string("\\\\.\\%s:", drive_letter.data);
drive->jrnl.hVol = CreateFileA((LPCSTR)create_file_target.data, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr,
OPEN_EXISTING, 0, nullptr);
if (drive->jrnl.hVol == INVALID_HANDLE_VALUE) {
log_error("CreateFileA failed on target %s", create_file_target.data);
os_log_error();
drive->jrnl.no_permission = true;
}
}
if (drive_enumeration->thread_started) {
return true;
}
return false;
}
// if (drive_enumeration != nullptr) {
// // Check if task is completed, clean up thread.
// discard arena and zero drive_enumeration.
// }
void Query_USN_Journal (ArrayView<OS_Drive*> drives) {
Win32_Enable_USN_Journal_Monitoring(drives);
for_each(d, drives) {
OS_Drive* drive = drives[d];
if (!USN_Journal_Monitoring_Ready(drive)) continue;
USN_JOURNAL_DATA_V0 usn_jd;
DWORD bytes_returned;
BOOL ok = DeviceIoControl(drive->jrnl.hVol, FSCTL_QUERY_USN_JOURNAL,
nullptr, 0,
&usn_jd, sizeof(usn_jd),
&bytes_returned,
nullptr);
if (!ok) {
log_error("DeviceIoControl failed on target %s", drive->label.data);
os_log_error();
return;
}
log("[DeviceIoControl] target %s", drive->label.data);
log(" > Journal ID: %llu", usn_jd.UsnJournalID);
log(" > First USN: %llu", usn_jd.FirstUsn);
debug_break(); // #TODO #continue
}
}

View File

@ -176,6 +176,13 @@ File_System Win32_filesystem_from_string (string s) {
return File_System::Unknown;
}
struct Dense_FS; // #hack forward declare!
struct NTFS_USN_Journal {
bool no_permission;
HANDLE hVol;
// ArrayView<USN_Journal_Change> changes;
};
struct Win32_Drive {
string label;
string volume_name;
@ -192,6 +199,8 @@ struct Win32_Drive {
s64 file_count;
f32 time_to_enumerate;
Dense_FS* data;
NTFS_USN_Journal jrnl;
};
typedef Win32_Drive OS_Drive;

View File

@ -0,0 +1,334 @@
struct Parent_Index {
s32 thread_index; // group->worker_info[thread_index].thread
s32 parent_index; // index into d_*offsets/lengths/etc.
};
struct File_Enumeration_Thread_Results { // #userdata
Arena* arena; // for strings
// Directories
ArenaArray<u32>* d_offsets;
ArenaArray<s16>* d_lengths;
ArenaArray<Parent_Index>* d_parent_indices;
ArenaArray<u64>* d_modtime;
// s64 dirs_enumerated = 0;
// Files
ArenaArray<u32>* f_offsets;
ArenaArray<s16>* f_lengths;
ArenaArray<Parent_Index>* f_parent_indices;
ArenaArray<u64>* f_sizes;
ArenaArray<u64>* f_modtime;
};
void initialize (File_Enumeration_Thread_Results* fcr) { // Preallocate for 2^22 files:
fcr->arena = next_arena(Arena_Reserve::Size_2G);
fcr->d_offsets = arena_array_new<u32>(4194304, Arena_Reserve::Size_2G);
fcr->d_lengths = arena_array_new<s16>(4194304, Arena_Reserve::Size_2G);
fcr->d_parent_indices = arena_array_new<Parent_Index>(4194304, Arena_Reserve::Size_2G);
fcr->d_modtime = arena_array_new<u64>(4194304, Arena_Reserve::Size_2G);
fcr->f_offsets = arena_array_new<u32>(4194304, Arena_Reserve::Size_2G);
fcr->f_lengths = arena_array_new<s16>(4194304, Arena_Reserve::Size_2G);
fcr->f_parent_indices = arena_array_new<Parent_Index>(4194304, Arena_Reserve::Size_2G);
fcr->f_sizes = arena_array_new<u64>(4194304, Arena_Reserve::Size_2G);
fcr->f_modtime = arena_array_new<u64>(4194304, Arena_Reserve::Size_2G);
}
struct Enumeration_Work {
string first_directory;
Parent_Index parent;
bool is_root = false;
Array<Enumeration_Work*> next;
};
struct Drive_Enumeration { // master thread struct
ArrayView<OS_Drive*> drives;
Thread* master_thread;
s32 thread_count;
bool thread_started;
bool thread_completed;
// Files_Combined_Results paths;
// Files_Combined_Results files;
s32 work_added = 0;
s32 work_completed = 0;
};
// void push_root (Drive_Enumeration* de, string label, s32 index) {
// array_add(*de->paths.name, label);
// array_add(*de->paths.parent_indices, index);
// array_add(*de->paths.sizes, (u64)0);
// array_add(*de->paths.modtime, (u64)0);
// }
global Drive_Enumeration* drive_enumeration;
// File_Enumeration_Thread_Results* results_from_thread_index (Thread_Group* group, s32 thread_index) {
// return ;
// }
string path_from_parent_index (Thread_Group* group, Parent_Index pid, Parent_Index* next_pid) {
if (pid.parent_index == -1) return "";
auto results = (File_Enumeration_Thread_Results*)group->worker_info[pid.thread_index].thread.context->userdata;
u8* offset = (results->arena->memory_base + (*results->d_offsets)[pid.parent_index]);
u32 length = (*results->d_lengths)[pid.parent_index];
(*next_pid) = (*results->d_parent_indices)[pid.parent_index];
return {(s64)length, offset};
}
// This is much stupider and more complicated than I would like, unfortunately.
string directory_get_full_path (Thread_Group* group, Parent_Index pid, string dir_name) {
push_allocator(GPAllocator()); // to copy from String_Builder
Array<string> paths;
paths.allocator = temp();
Parent_Index this_pid = pid;
Parent_Index next_pid = {};
string parent_dir = path_from_parent_index(group, this_pid, &next_pid);
array_add(paths, parent_dir);
this_pid = next_pid;
next_pid = {};
while (this_pid.parent_index != -1) {
parent_dir = path_from_parent_index(group, this_pid, &next_pid);
array_add(paths, parent_dir);
this_pid = next_pid;
next_pid = {};
}
// go in reverse order and add together string
String_Builder* sb = new_string_builder(Arena_Reserve::Size_64K);
for (s64 i = paths.count-1; i >= 0; i -= 1) {
append(sb, paths[i]);
append(sb, "\\");
}
append(sb, dir_name);
return builder_to_string(sb);
}
/*void update_results (Drive_Enumeration* de, Enumeration_Work* ew) {
// merge results and release resources!
// unfortunately this is a LOT of copying!
for_each(i, (*ew->d_offsets)) {
u8* string_ptr = (ew->thread_arena->memory_base + (*ew->d_offsets)[i]);
string name = {(*ew->d_lengths)[i], string_ptr};
array_add(*de->paths.name, name);
array_add(*de->paths.parent_indices, (*ew->d_parent_indices)[i]);
array_add(*de->paths.sizes, (*ew->d_sizes)[i]);
array_add(*de->paths.modtime, (*ew->d_modtime)[i]);
}
for_each(i, (*ew->offsets)) {
u8* string_ptr = (ew->thread_arena->memory_base + (*ew->offsets)[i]);
string name = {(*ew->lengths)[i], string_ptr};
array_add(*de->files.name, name);
array_add(*de->files.parent_indices, (*ew->parent_indices)[i]);
array_add(*de->files.sizes, (*ew->sizes)[i]);
array_add(*de->files.modtime, (*ew->modtime)[i]);
}
}*/
void add_record (File_Enumeration_Thread_Results* results,
WIN32_FIND_DATAW* find_data,
string name,
Parent_Index parent_index,
bool is_directory) {
u32 offset = (u32)(name.data - results->arena->memory_base);
u64 size = ((u64)find_data->nFileSizeHigh << 32) | ((u64)find_data->nFileSizeLow & 0xFFFFFFFF);
u64 modtime = FILETIME_to_ticks(find_data->ftLastWriteTime);
if (is_directory) {
array_add((*results->d_offsets), offset);
array_add((*results->d_lengths), (s16)name.count);
array_add((*results->d_parent_indices), parent_index); // #parent_index
array_add((*results->d_modtime), modtime);
} else {
array_add((*results->f_offsets), offset);
array_add((*results->f_lengths), (s16)name.count);
array_add((*results->f_parent_indices), parent_index); // #parent_index
array_add((*results->f_sizes), size);
array_add((*results->f_modtime), modtime);
}
}
Thread_Continue_Status file_enumeration_thread_group_proc (Thread_Group* group, Thread* thread, void* work) {
// 1. setup userdata as an Arena*:
// #TODO: replace userdata with a struct that manages the thread-local data for this
// particular problem. This data can be rescued before we
File_Enumeration_Thread_Results* results;
if (!thread->context->userdata) {
thread->context->userdata = New<File_Enumeration_Thread_Results>(GPAllocator());
initialize((File_Enumeration_Thread_Results*)thread->context->userdata);
}
results = (File_Enumeration_Thread_Results*)thread->context->userdata;
Enumeration_Work* enum_work = (Enumeration_Work*)work;
// Validate thread context?
push_allocator(temp());
auto_release_temp();
// log("file_enumeration_thread_group_proc, thread index: %d", thread->index);
// MAKE SURE PATH IS NULL TERMINATED!
wstring wildcard_name = utf8_to_wide(format_string("%s\\*", enum_work->first_directory.data)); // #temp
WIN32_FIND_DATAW find_data;
HANDLE h = FindFirstFileExW((LPCWSTR)wildcard_name.data, FindExInfoBasic, &find_data,
FindExSearchNameMatch, nullptr, FIND_FIRST_EX_LARGE_FETCH);
if (h == INVALID_HANDLE_VALUE) {
return Thread_Continue_Status::CONTINUE;
}
s32 thread_index = get_thread_index(group, (s32)thread->index); // zero-indexed to thread group
Parent_Index pi = enum_work->parent;
push_arena(results->arena);
if (enum_work->is_root) { // see add_record
string name = copy_string(enum_work->first_directory);
u32 offset = (u32)(name.data - results->arena->memory_base);
s32 current_index = (s32)(*results->d_offsets).count;
pi = {thread_index, current_index};
array_add((*results->d_offsets), offset);
array_add((*results->d_lengths), (s16)name.count);
Parent_Index root_pi = {thread_index, -1};
array_add((*results->d_parent_indices), root_pi); // #parent_index
array_add((*results->d_modtime), (u64)0);
// results->dirs_enumerated += 1;
}
while (true) {
string name = wide_to_utf8((u16*)find_data.cFileName);
bool should_continue = (name.count == 0 || name == "." || name == "..");
if (should_continue) {
bool success = FindNextFileW(h, &find_data);
if (!success)
break;
continue;
}
bool is_directory = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0;
Parent_Index next_index = {thread_index, enum_work->parent.parent_index};
add_record(results, &find_data, name, next_index, is_directory);
if (is_directory) {
push_allocator(GPAllocator());
auto new_work = New<Enumeration_Work>(false);
new_work->first_directory = directory_get_full_path(group, pi, name);
new_work->parent = next_index;
new_work->is_root = false;
new_work->next = {};
array_add(enum_work->next, new_work);
}
bool success = FindNextFileW(h, &find_data);
if (!success) break;
}
FindClose(h);
return Thread_Continue_Status::CONTINUE;
}
s64 multithreaded_file_enumeration_master_proc (Thread* thread) {
auto task = thread_task(Drive_Enumeration);
Thread_Group* file_enum_thread_group = New<Thread_Group>();
s32 thread_count = os_cpu_physical_core_count();
push_allocator(GPAllocator());
thread_group_init(file_enum_thread_group, thread_count, file_enumeration_thread_group_proc, true);
for_each(d, task->drives) {
auto work = New<Enumeration_Work>(GPAllocator(), false); //replace with arena bootstrap?
work->first_directory = task->drives[d]->label; // this includes the colon-slash, (e.g. `C:\`).
work->parent = {-1, -1}; // #HACK: (s32)d
work->is_root = true;
work->next = {};
add_work(file_enum_thread_group, work);
task->work_added += 1;
}
start(file_enum_thread_group);
// set task completed.
s64 path_index = task->drives.count;
while (true) {
auto_release_temp();
ArrayView<void*> cw = get_completed_work(file_enum_thread_group);
task->work_completed += (s32)cw.count;
for_each(i, cw) {
auto ew = (Enumeration_Work*)cw[i];
for_each(w, ew->next) {
auto new_work = ew->next[w];
add_work(file_enum_thread_group, new_work);
}
task->work_added += (s32)ew->next.count;
string_free(ew->first_directory);
array_free(ew->next);
internal_free(ew);
}
log("work completed: %d/%d",task->work_completed, task->work_added);
// if (task->work_completed >= task->work_added) break;
Sleep(1);
}
shutdown(file_enum_thread_group);
task->thread_completed = true;
return 0;
}
void os_run_file_enumeration_multithreaded () {
// Need some struct to track the state of this operation.
drive_enumeration = New<Drive_Enumeration>();
(*drive_enumeration) = {
os_get_available_drives(),
New<Thread>(),
os_cpu_physical_core_count(),
0, 0, 0, 0
};
// initialize(&drive_enumeration->paths);
// initialize(&drive_enumeration->files);
// We start 1 thread to run the thread group and track the threads
string thread_name = "Multithreaded Enumeration: Master Thread";
bool success = thread_init(drive_enumeration->master_thread,
multithreaded_file_enumeration_master_proc, thread_name);
Assert(success);
thread_start(drive_enumeration->master_thread, drive_enumeration);
drive_enumeration->thread_started = true;
}
bool file_enum_multithreading_started () {
if (drive_enumeration == nullptr) return false;
return drive_enumeration->thread_started;
}
bool file_enum_multithreading_active () {
if (drive_enumeration == nullptr) return false;
if (drive_enumeration->thread_completed) {
return false;
}
if (drive_enumeration->thread_started) {
return true;
}
return false;
}

View File

@ -63,7 +63,7 @@ struct NTFS_ResidentAttributeHeader : NTFS_AttributeHeader {
};
struct NTFS_FileNameAttributeHeader : NTFS_ResidentAttributeHeader {
u64 parentRecordNumber : 48;
u64 parentRecordNumber : 48; // low 48 bits
u64 sequenceNumber : 16;
u64 creationTime;
u64 modificationTime;
@ -97,7 +97,7 @@ struct NTFS_RunHeader {
#pragma pack(pop)
struct NTFS_File {
u32 parent_id;
u32 parent_id; // #TODO: FRNs should be 64-bit!
u32 record_id;
u16* name_data;
u64 file_modtime; // FILETIME
@ -585,3 +585,183 @@ bool Serialize_Win32_Drives (ArrayView<Win32_Drive*> drives, string file_path) {
return true;
}
void ntfs_create_enumeration_threads (s32 thread_count) {
if (!ex1_ntfs.initialized) { Timed_Block_Print("Thread initialization (ntfs)");
ex1_ntfs.initialized = true;
ex1_ntfs.threads = ArrayView<Thread>(thread_count);
ex1_ntfs.threads_in_flight.allocator = GPAllocator();
for_each(t, ex1_ntfs.threads) {
string thread_name = format_string("ntfs_enumeration_thread#%d", t);
bool success = thread_init(&ex1_ntfs.threads[t], ntfs_enumeration_thread_proc, thread_name);
Assert(success);
}
}
}
/*
void Ex1_show_ntfs_workspace () { using namespace ImGui;
SliderInt("Select path index", &ex1w.path_select, 0, count_paths(stfe)-1);
Text("%s", get_path_copy(stfe, ex1w.path_select).data);
Text("time modified: %s", format_time_datetime(get_path_modtime(stfe, ex1w.path_select)).data);
// #TODO: modtime (to indextime)
SliderInt("Select file index", &ex1w.file_select, 0, count_files(stfe)-1);
Text("%s", get_file_copy(stfe, ex1w.file_select).data);
Text("size: %s", format_bytes(get_file_size_bytes(stfe, ex1w.file_select)).data);
Text("time modified: %s", format_time_datetime(get_file_modtime(stfe, ex1w.file_select)).data);
push_allocator(temp());
for_each(d, ntfs_workspace.drives) {
OS_Drive* drive = ntfs_workspace.drives[d];
Text("%d. %s paths: %lld, files: %lld",
d, drive->label.data,
drive->data->paths.offsets->count,
drive->data->files.offsets->count);
}
// SliderInt("Results to Show", &ntfs_workspace.results_to_show, 0, 50);
for_each(d, ntfs_workspace.drives) {
OS_Drive* drive = ntfs_workspace.drives[d];
// #TODO: Radio button for choosing between paths, files
char* rb1 = format_cstring("paths##%s", drive->label.data);
RadioButton(rb1, &ntfs_workspace.supplementary[d].radio_button, 1);
SameLine();
char* rb2 = format_cstring("files##%s", drive->label.data);
RadioButton(rb2, &ntfs_workspace.supplementary[d].radio_button, 0);
SameLine();
s32 max_count = (s32)drive->data->paths.offsets->count;
if (ntfs_workspace.supplementary[d].radio_button == 0) {
max_count = (s32)drive->data->files.offsets->count;
}
char* slider_label = format_cstring("%s index", drive->label.data);
if (SliderInt(slider_label, &ntfs_workspace.supplementary[d].index, 0, max_count)) { }
}
for_each(d, ntfs_workspace.drives) {
if (ntfs_workspace.supplementary[d].radio_button == 0) { // files
OS_Drive* drive = ntfs_workspace.drives[d];
Dense_FS* dfs = drive->data;
DFS_Array* dfsa = &drive->data->files;
s64 file_index = ntfs_workspace.supplementary[d].index;
DFS_Value v = get_value(dfs, dfsa, file_index);
// #TODO NOTE: v.full_path is NOT the full path #rename
Text("Filename: %s, parent_id: %d", copy_string(v.full_path).data, v.parent_index);
string full_path = get_full_path_from_index(drive, dfsa, file_index);
Text("Full path: %s", full_path.data);
bool success = file_length(full_path, &v.size); // temp, obviously we don't wanna call this every frame lol
Text(" > size: %lld B", v.size);
Text(" > size: %s", format_bytes(v.size).data);
// Text(" > modtime: %s",
} else {
// DFS_Array* dfsa = &ntfs_workspace.drives[d]->data->paths;
}
}
}
*/
/*SeparatorText("ex1_ntfs");
Text("Threads in flight count: %d", ex1_ntfs.threads_in_flight.count);
for_each(i, ex1_ntfs.threads) {
Text(" [%d] initialized: %d, has_context: %d, has_data: %d",
i, ex1_ntfs.threads[i].proc != nullptr, ex1_ntfs.threads[i].context != nullptr, ex1_ntfs.threads[i].data != nullptr);
}*/
/*// #NTFS_MFT_RAW
push_allocator(GPAllocator());
Array<ArrayView<OS_Drive*>> drive_split;
drive_split.allocator = temp(); // this is only needed for this frame
if (drives.count > os_cpu_physical_core_count()) {
s32 thread_count = os_cpu_physical_core_count();
array_resize(drive_split, thread_count);
ntfs_create_enumeration_threads(thread_count);
s32 threads_to_create = thread_count;
s64 drives_per_thread = (drives.count / thread_count);
s64 remainder = drives.count % thread_count;
s64 current_drive = 0;
for_each(d, drive_split) {
if (d == drive_split.count) {
drive_split[d] = ArrayView<OS_Drive*>(remainder);
} else {
drive_split[d] = ArrayView<OS_Drive*>(drives_per_thread);
}
for (s64 i = 0; i < drive_split[d].count; i += 1) {
drive_split[d][i] = drives[current_drive];
current_drive += 1;
}
}
debug_break(); // #TODO: Check that the work has been distributed correctly.
} else { // more threads than drives, or same amount
s32 thread_count = (s32)drives.count;
array_resize(drive_split, drives.count);
ntfs_create_enumeration_threads(thread_count);
for_each(d, drives) {
auto drive = drives[d];
drive_split[d] = ArrayView<OS_Drive*>(1); // Arrays of size one are sad :pensive:
drive_split[d][0] = drive;
}
}
s64 active_thread_count = drive_split.count;
ex1_ntfs.threads_started = true;
for (s64 t = 0; t < active_thread_count; t += 1) {
Thread* thread = &ex1_ntfs.threads[t];
Arena* thread_arena = next_arena(Arena_Reserve::Size_64K);
push_arena(thread_arena);
auto thread_data = New<NTFS_Enumeration_Task>();
thread_data->pool = thread_arena;
thread_data->drives = drive_split[t];
thread_start(thread, thread_data);
array_add(ex1_ntfs.threads_in_flight, thread);
}*/
/* #NTFS_MFT_RAW
if (ex1_ntfs.threads_in_flight.count) {
Text("Threads in flight: %d", ex1_ntfs.threads_in_flight.count);
for_each(t, ex1_ntfs.threads_in_flight) {
if (thread_is_done(ex1_ntfs.threads_in_flight[t])) {
push_allocator(GPAllocator());
Thread* thread = ex1_ntfs.threads_in_flight[t];
auto task = thread_task(NTFS_Enumeration_Task);
array_free(task->drives);
// make sure to retreive any data you need to from here!
release_arena(task->pool);
thread_deinit(ex1_ntfs.threads_in_flight[t], false);
array_unordered_remove_by_index(ex1_ntfs.threads_in_flight, t);
t -= 1; // check this element index again!
}
}
}*/
/* #NTFS_MFT_RAW
if (ex1_ntfs.threads_started && !ex1_ntfs.threads_in_flight.count) {
// All threads are complete, we're free to clean up remaining memory
push_allocator(GPAllocator());
array_free(ex1_ntfs.threads);
array_free(ex1_ntfs.threads_in_flight);
// Instead maybe we should just memset this to zero.
reset_struct(&ex1_ntfs);
}
// How do I tell when all files are enumerated?
// check drives[i]->data.paths.wstrings.count count?
if (all_drives_enumerated && Button("Save drive data")) {
string file_path = format_string_temp("%s_DriveData.bin", os_get_machine_name().data);
bool success = Serialize_Win32_Drives(drives, file_path);
if (!success) { log_error("Failed to save Win32_Drive data"); }
}
if (all_drives_enumerated && Button("Clear all drive data")) {
os_clear_drive_data();
}*/

View File

@ -1,6 +1,6 @@
#pragma once
constexpr const char* MUSA_LIB_VERSION = "0.1a";
const char* MUSA_LIB_VERSION = "0.2";
#define BUILD_DEBUG 1
#define OS_WINDOWS 1
#define OS_LINUX 0

View File

@ -36,6 +36,8 @@
#include "lib/Base/Arena.cpp"
#include "lib/Base/String.cpp"
#include "lib/Base/Unicode.cpp"
#include "lib/Base/RadixSort.cpp"
#include "lib/Base/Base_Thread_Context.cpp"
#include "lib/Base/ErrorType.cpp"
@ -52,7 +54,7 @@
#if OS_WINDOWS
# include "lib/OS/OS_Win32.cpp"
# include "lib/OS/OS_Win32_NTFS.cpp"
// # include "lib/OS/OS_Win32_NTFS.cpp"
#endif
#include "lib/Graphics.cpp"

View File

@ -17,7 +17,9 @@ internal void Main_Entry_Point (int argc, WCHAR **argv);
#endif
internal void Main_Entry_Point (int argc, WCHAR **argv) { // #entry_point
set_cpu_base_frequency(3200); // REQUIRED FOR TIMING MODULE! will depend on CPU
// #TODO: Check if base frequency is even available.
u32 base_frequency = (u32)CPU_Base_Frequency();
set_cpu_base_frequency(base_frequency); // REQUIRED FOR TIMING MODULE! will depend on CPU
#if BASE_RUN_TESTS
run_pre_setup_tests(); // #no_context: context will not be initialized at this point.

View File

@ -1,6 +1,6 @@
struct ExplorerUI {
u8 search_input[64];
u8 secondary_input[64];
// u8 search_input[64];
// u8 secondary_input[64];
};
struct Explorer {
@ -22,19 +22,6 @@ global ExplorerUI explorer_ui;
global Explorer explorer;
global Ex1_NTFS_Enumeration ex1_ntfs;
void ntfs_create_enumeration_threads (s32 thread_count) {
if (!ex1_ntfs.initialized) { Timed_Block_Print("Thread initialization (ntfs)");
ex1_ntfs.initialized = true;
ex1_ntfs.threads = ArrayView<Thread>(thread_count);
ex1_ntfs.threads_in_flight.allocator = GPAllocator();
for_each(t, ex1_ntfs.threads) {
string thread_name = format_string("ntfs_enumeration_thread#%d", t);
bool success = thread_init(&ex1_ntfs.threads[t], ntfs_enumeration_thread_proc, thread_name);
Assert(success);
}
}
}
#define HOTKEY_ID_BRING_TO_FOREGROUND 1
#define VK_SPACE_KEY_CODE 0x20
// #define HOTKEY_ID_HIDE_TITLEBAR
@ -76,55 +63,112 @@ bool Ex1_check_key_combinations() {
return false;
}
void Ex1_show_ntfs_workspace () { using namespace ImGui;
push_allocator(temp());
for_each(d, ntfs_workspace.drives) {
OS_Drive* drive = ntfs_workspace.drives[d];
Text("%d. %s paths: %lld, files: %lld",
d, drive->label.data,
drive->data->paths.offsets->count,
drive->data->files.offsets->count);
// #Workspaces are FOR DEVELOPMENT ONLY.
struct Ex1_Workspace {
s32 path_select;
s32 file_select;
RadixSort file_size_radix;
RadixSort file_modtime_radix;
RadixSort dir_modtime_radix;
bool sort_completed;
// Reordered strings:
ArrayView<string> files_sorted_by_size;
ArrayView<string> files_sorted_by_modtime;
};
global Ex1_Workspace ex1w;
void free_ex1_workspace_and_reset () {
if (ex1w.sort_completed) {
push_allocator(GPAllocator());
radix_sort_free(&ex1w.file_size_radix);
radix_sort_free(&ex1w.file_modtime_radix);
radix_sort_free(&ex1w.dir_modtime_radix);
array_free(ex1w.files_sorted_by_size);
array_free(ex1w.files_sorted_by_modtime);
zero_struct(&ex1w);
}
// SliderInt("Results to Show", &ntfs_workspace.results_to_show, 0, 50);
for_each(d, ntfs_workspace.drives) {
OS_Drive* drive = ntfs_workspace.drives[d];
// #TODO: Radio button for choosing between paths, files
char* rb1 = format_cstring("paths##%s", drive->label.data);
RadioButton(rb1, &ntfs_workspace.supplementary[d].radio_button, 1);
SameLine();
char* rb2 = format_cstring("files##%s", drive->label.data);
RadioButton(rb2, &ntfs_workspace.supplementary[d].radio_button, 0);
SameLine();
s32 max_count = (s32)drive->data->paths.offsets->count;
if (ntfs_workspace.supplementary[d].radio_button == 0) {
max_count = (s32)drive->data->files.offsets->count;
}
char* slider_label = format_cstring("%s index", drive->label.data);
if (SliderInt(slider_label, &ntfs_workspace.supplementary[d].index, 0, max_count)) { }
}
// #TODO: Move all sort stuff to OS_Win32?
// Make a general version of this that takes two ArrayView<T> and reorders.
// There's no need to do this until we have the filtered results.
void os_win32_reorder_files_by_radix (RadixSort* r, ArrayView<string>* files, bool reverse_order=false) {
Timed_Block_Print("os_win32_reorder_files_by_radix");
// Where are my source files!?
(*files) = ArrayView<string>(r->ranks.count);
for_each(f, (*files)) {
// (*files)[f] = get_file_copy(stfe, r->ranks[f]);
(*files)[f] = get_file_string_view(stfe, r->ranks[f]);
}
for_each(d, ntfs_workspace.drives) {
if (ntfs_workspace.supplementary[d].radio_button == 0) { // files
OS_Drive* drive = ntfs_workspace.drives[d];
Dense_FS* dfs = drive->data;
DFS_Array* dfsa = &drive->data->files;
s64 file_index = ntfs_workspace.supplementary[d].index;
DFS_Value v = get_value(dfs, dfsa, file_index);
// #TODO NOTE: v.full_path is NOT the full path #rename
Text("Filename: %s, parent_id: %d", copy_string(v.full_path).data, v.parent_index);
string full_path = get_full_path_from_index(drive, dfsa, file_index);
Text("Full path: %s", full_path.data);
bool success = file_length(full_path, &v.size); // temp, obviously we don't wanna call this every frame lol
Text(" > size: %lld B", v.size);
Text(" > size: %s", format_bytes(v.size).data);
// Text(" > modtime: %s", idk how to convert FILETIME to calendar time
} else {
// DFS_Array* dfsa = &ntfs_workspace.drives[d]->data->paths;
}
}
void Ex1_show_enumeration_workspace () { using namespace ImGui;
push_imgui_window("Enumerated Data Workspace");
if (!ex1w.sort_completed) {
push_allocator(GPAllocator());
Timed_Block_Print("radix_sort_u64: file sizes, file modtimes, directory modtimes");
ArrayView<u64> sizes = to_view(*stfe->files.sizes);
radix_sort_u64(&ex1w.file_size_radix, sizes.data, (u32)sizes.count);
ArrayView<u64> file_modtimes = to_view(*stfe->files.modtimes);
radix_sort_u64(&ex1w.file_modtime_radix, file_modtimes.data, (u32)file_modtimes.count);
ArrayView<u64> dirs_modtimes = to_view(*stfe->dirs.modtimes);
radix_sort_u64(&ex1w.dir_modtime_radix, dirs_modtimes.data, (u32)dirs_modtimes.count);
// Create ArrayView<string>, ArrayView<u64> sizes, and ArrayView<u64> modtimes
os_win32_reorder_files_by_radix(&ex1w.file_size_radix, &ex1w.files_sorted_by_size);
os_win32_reorder_files_by_radix(&ex1w.file_modtime_radix, &ex1w.files_sorted_by_modtime);
// reordering by the rank permutations generated by RadixSort.
ex1w.sort_completed = true;
}
if (!ex1w.sort_completed) { return; }
SeparatorText("Files ordered by modtime");
s32 file_count = (s32)ex1w.files_sorted_by_modtime.count;
SliderInt("Select file index", &ex1w.file_select, 0, file_count-1);
string file_name = copy_string(ex1w.files_sorted_by_modtime[ex1w.file_select]);
Text("%s", file_name.data);
u32 radix_index = rank(&ex1w.file_modtime_radix, ex1w.file_select);
Text("date modified: %s", format_time_datetime(get_file_modtime(stfe, radix_index)).data);
SeparatorText("Files ordered by size");
file_count = (s32)ex1w.files_sorted_by_size.count;
// SliderInt("Select file index", &ex1w.file_select, 0, file_count-1);
file_name = copy_string(ex1w.files_sorted_by_size[ex1w.file_select]);
Text("%s", file_name.data);
radix_index = rank(&ex1w.file_size_radix, ex1w.file_select);
Text("size: %s", format_bytes(get_file_size_bytes(stfe, radix_index)).data);
// Text("date modified: %s", format_time_datetime(get_file_modtime(stfe, radix_index)).data);
if (Button("Count unique UTF-8 characters")) {
count_unique_utf8_chars();
}
Text("unique_codepoints_utf32.count: %", unique_codepoints_utf32.count);
for_each(u, unique_codepoints_utf32) {
Text("[%d] Code point as hex: 0x%X", u, unique_codepoints_utf32[u]);
}
Text("files_sorted_by_size size in bytes: %lld", ex1w.files_sorted_by_size.count * sizeof(string));
Text("files_sorted_by_modtime size in bytes: %lld", ex1w.files_sorted_by_modtime.count * sizeof(string));
for (s64 i = 1; i < 128; i += 1) {
u8 cstring[2] = {};
cstring[0] = (u8)i;
cstring[1] = 0;
Text("codepoint[0x%X]: %s, count: %lld", i, cstring, count_ascii_codepoints[i]);
}
}
void Ex1_Control_Panel () { using namespace ImGui;
f64 frame_time = GetUnixTimestamp();
Table<string, OS_Drive*>* drive_table = get_drive_table();
push_imgui_window("Control Panel");
@ -133,28 +177,39 @@ void Ex1_Control_Panel () { using namespace ImGui;
if (/*Button("Discover drives") ||*/!table_is_valid(drive_table)) { Win32_Discover_Drives(); }
// Text("ntfs_workspace_files_loaded: %s", ntfs_workspace_files_loaded()? "true": "false");
if (ntfs_workspace_files_loaded()) {
Ex1_show_ntfs_workspace();
return;
}
// if (ntfs_workspace_files_loaded()) {
// Ex1_show_ntfs_workspace();
// return;
// }
Text("drive_table is valid: %d", table_is_valid(drive_table));
bool all_drives_enumerated = stfe && stfe->thread_completed;
push_allocator(temp());
ArrayView<OS_Drive*> drives = os_get_available_drives(); // only includes drives that are ready.
for_each(i, drives) {
OS_Drive* drive = drives[i];
Text(" > [%d] drive letter: %s (is_present: %d)", i + 1, drive->label.data, drive->is_present);
if (drive->time_to_enumerate != 0) {
SameLine();
Text("Enumerated in %.2f seconds", drive->time_to_enumerate);
if (!USN_Journal_Monitoring_Ready(drives[0]) && Button("Enable USN Monitoring for all drives")) {
Win32_Enable_USN_Journal_Monitoring(drives);
}
if (USN_Journal_Monitoring_Ready(drives[0]) && Button("Query USN Journal")) {
Query_USN_Journal(drives);
}
if (!all_drives_enumerated) {
// Text("drive_table is valid: %d", table_is_valid(drive_table));
for_each(i, drives) {
OS_Drive* drive = drives[i];
Text(" > [%d] drive letter: %s (is_present: %d)", i + 1, drive->label.data, drive->is_present);
if (drive->time_to_enumerate != 0) {
SameLine();
Text("Enumerated in %.2f seconds", drive->time_to_enumerate);
}
// SameLine();
// if (Button(format_cstring("Read NTFS MFT Raw##%s", drive->label.data))) {
// push_arena(thread_context()->arena);
// Error* error = NTFS_MFT_read_raw(drive);
// }
}
// SameLine();
// if (Button(format_cstring("Read NTFS MFT Raw##%s", drive->label.data))) {
// push_arena(thread_context()->arena);
// Error* error = NTFS_MFT_read_raw(drive);
// }
}
s32 drives_enumerated = 0;
@ -167,128 +222,79 @@ void Ex1_Control_Panel () { using namespace ImGui;
drives_enumerated += 1;
}
}
bool all_drives_enumerated = !ex1_ntfs.threads_in_flight.count
&& (drives_enumerated == drives.count);
// bool all_drives_enumerated = !ex1_ntfs.threads_in_flight.count
// && (drives_enumerated == drives.count);
string file_path = format_string_temp("%s_DriveData.bin", os_get_machine_name().data);
// string file_path = format_string_temp("%s_DriveData.bin", os_get_machine_name().data);
string file_path = "D:/Projects/Cpp/Musa-Cpp-Lib-V2/bin/MUSA-PC3_DriveData.bin";// FIXED path.
Text("fixed file_path: %s", file_path.data);
if (!all_drives_enumerated && file_exists(file_path)) { // #autoload
Deserialize_ST_File_Enumeration(file_path);
}
if (drives.count > 0 && !all_drives_enumerated && file_exists(file_path) && Button("Load from file (this machine)")) {
Deserialize_Win32_Drives(file_path);
Deserialize_ST_File_Enumeration(file_path);
// Deserialize_Win32_Drives(file_path);
}
if (file_enum_multithreading_started()) {
if (thread_is_done(drive_enumeration->master_thread)) {
push_allocator(GPAllocator());
// Thread* thread = drive_enumeration->master_thread;
// auto task = thread_task(Drive_Enumeration);
// Nothing to free?
thread_deinit(drive_enumeration->master_thread, true);
// if (file_enum_multithreading_started()) {
// if (thread_is_done(drive_enumeration->master_thread)) {
// push_allocator(GPAllocator());
// // Thread* thread = drive_enumeration->master_thread;
// // auto task = thread_task(Drive_Enumeration);
// // Nothing to free?
// thread_deinit(drive_enumeration->master_thread, true);
// }
// }
// #FileEnumerationST
if (stfe && stfe->thread_started) {
// print dirs enumerated, etc
if (!stfe->thread_completed) Text("Enumeration Thread Active (elapsed: %s)", format_time_seconds(frame_time-stfe->start_time).data);
if (stfe->dirs.offsets) {
s64 dirs_enumerated = stfe->dirs.offsets->count;
Text("Dirs enumerated: %lld", dirs_enumerated);
// if (!stfe->thread_completed) Text("Current dir: %s", get_last_path_copy(stfe).data);
}
if (stfe->files.offsets) {
s64 files_enumerated = stfe->files.offsets->count;
Text("Files enumerated: %lld", files_enumerated);
// if (!stfe->thread_completed) Text("Current file: %s", get_last_file_copy(stfe).data);
}
if (stfe->thread_completed) {
Text("String bytes stored: %s", format_bytes(stfe->dirs.strings->count + stfe->files.strings->count).data);
Text("Elapsed time: %s", format_time_seconds(stfe->end_time-stfe->start_time).data);
}
}
if (stfe && stfe->thread_started && !stfe->thread_completed) {
Assert(stfe->master_thread != nullptr);
if (thread_is_done(stfe->master_thread)) {
thread_deinit(stfe->master_thread, true);
stfe->thread_completed = true;
}
}
if (all_drives_enumerated && Button("Save enumerated data")) {
if (!Serialize_ST_File_Enumeration(file_path)) {
log_error("Failed to write enumerated files");
os_log_error();
}
}
if (all_drives_enumerated && Button("Reset State")) {
free_ex1_workspace_and_reset();
free_stfe_and_reset();
return;
}
if (all_drives_enumerated) {
Ex1_show_enumeration_workspace();
}
if (drives.count > 0 && !all_drives_enumerated && Button("Enumerate all NTFS drives")) { // && ex1_ntfs.initialized
// if drive count exceeds the number of threads, we need to group them so each thread
// can enumerate multiple drives.
// We need to distribute the drives across our available threads:
{ run_multithreaded_enumeration_thread();
return;
}
push_allocator(GPAllocator());
Array<ArrayView<OS_Drive*>> drive_split;
drive_split.allocator = temp(); // this is only needed for this frame
if (drives.count > os_cpu_physical_core_count()) {
s32 thread_count = os_cpu_physical_core_count();
array_resize(drive_split, thread_count);
ntfs_create_enumeration_threads(thread_count);
s32 threads_to_create = thread_count;
s64 drives_per_thread = (drives.count / thread_count);
s64 remainder = drives.count % thread_count;
s64 current_drive = 0;
for_each(d, drive_split) {
if (d == drive_split.count) {
drive_split[d] = ArrayView<OS_Drive*>(remainder);
} else {
drive_split[d] = ArrayView<OS_Drive*>(drives_per_thread);
}
for (s64 i = 0; i < drive_split[d].count; i += 1) {
drive_split[d][i] = drives[current_drive];
current_drive += 1;
}
}
debug_break(); // #TODO: Check that the work has been distributed correctly.
} else { // more threads than drives, or same amount
s32 thread_count = (s32)drives.count;
array_resize(drive_split, drives.count);
ntfs_create_enumeration_threads(thread_count);
for_each(d, drives) {
auto drive = drives[d];
drive_split[d] = ArrayView<OS_Drive*>(1); // Arrays of size one are sad :pensive:
drive_split[d][0] = drive;
}
}
s64 active_thread_count = drive_split.count;
ex1_ntfs.threads_started = true;
for (s64 t = 0; t < active_thread_count; t += 1) {
Thread* thread = &ex1_ntfs.threads[t];
Arena* thread_arena = next_arena(Arena_Reserve::Size_64K);
push_arena(thread_arena);
auto thread_data = New<NTFS_Enumeration_Task>();
thread_data->pool = thread_arena;
thread_data->drives = drive_split[t];
thread_start(thread, thread_data);
array_add(ex1_ntfs.threads_in_flight, thread);
}
}
if (ex1_ntfs.threads_in_flight.count) {
Text("Threads in flight: %d", ex1_ntfs.threads_in_flight.count);
for_each(t, ex1_ntfs.threads_in_flight) {
if (thread_is_done(ex1_ntfs.threads_in_flight[t])) {
push_allocator(GPAllocator());
Thread* thread = ex1_ntfs.threads_in_flight[t];
auto task = thread_task(NTFS_Enumeration_Task);
array_free(task->drives);
// make sure to retreive any data you need to from here!
release_arena(task->pool);
thread_deinit(ex1_ntfs.threads_in_flight[t], false);
array_unordered_remove_by_index(ex1_ntfs.threads_in_flight, t);
t -= 1; // check this element index again!
}
}
}
if (ex1_ntfs.threads_started && !ex1_ntfs.threads_in_flight.count) {
// All threads are complete, we're free to clean up remaining memory
push_allocator(GPAllocator());
array_free(ex1_ntfs.threads);
array_free(ex1_ntfs.threads_in_flight);
// Instead maybe we should just memset this to zero.
reset_struct(&ex1_ntfs);
}
// How do I tell when all files are enumerated?
// check drives[i]->data.paths.wstrings.count count?
if (all_drives_enumerated && Button("Save drive data")) {
string file_path = format_string_temp("%s_DriveData.bin", os_get_machine_name().data);
bool success = Serialize_Win32_Drives(drives, file_path);
if (!success) { log_error("Failed to save Win32_Drive data"); }
}
if (all_drives_enumerated && Button("Clear all drive data")) {
os_clear_drive_data();
os_run_file_enumeration_single_threaded();
// os_run_file_enumeration_multithreaded(); // #disabled for now
return;
}
}
@ -296,14 +302,8 @@ void ImGui_Debug_Panel () { using namespace ImGui;
push_allocator(temp());
Begin("Debug Panel");
SeparatorText("ex1_ntfs");
Text("Threads in flight count: %d", ex1_ntfs.threads_in_flight.count);
for_each(i, ex1_ntfs.threads) {
Text(" [%d] initialized: %d, has_context: %d, has_data: %d",
i, ex1_ntfs.threads[i].proc != nullptr, ex1_ntfs.threads[i].context != nullptr, ex1_ntfs.threads[i].data != nullptr);
}
// #cpuid
// Text("[cpus] physical: %d, logical: %d, primary: %d, secondary: %d", os_cpu_physical_core_count(), os_cpu_logical_core_count(), os_cpu_primary_core_count(), os_cpu_secondary_core_count());
Text("[cpus] physical: %d, logical: %d, primary: %d, secondary: %d", os_cpu_physical_core_count(), os_cpu_logical_core_count(), os_cpu_primary_core_count(), os_cpu_secondary_core_count());
{ SeparatorText("Arena In-Use List");
lock_guard(&arena_free_list->mutex);
for (u8 i = 0; i < Arena_Reserve_Count; i += 1) {
@ -338,6 +338,10 @@ void ImGui_Debug_Panel () { using namespace ImGui;
SeparatorText("Child Threads");
SeparatorText("Errors");
ArrayView<Error*> errors = get_all_errors(thread_context());
if (errors.count && Button("Clear all errors")) {
clear_errors(thread_context());
errors.count = 0;
}
for_each(e, errors) {
auto button_label = format_cstring("Clear##%d", e);
if (Button(button_label)) {

50
src/String_Analysis.cpp Normal file
View File

@ -0,0 +1,50 @@
// 1. Count unique occurrences:
s64 count_ascii_codepoints[128] = {};
// s64 extended_codepoints[65536] = {};
Array<u32> unique_codepoints_utf32;
force_inline void unique_codepoints_utf32_add_unique (u32 unique) {
array_add_if_unique(unique_codepoints_utf32, unique);
}
void count_unique_chars_from_string (string s) {
string s_copy = s;
while (s_copy.count > 0) {
if (s_copy.data[0] == 0x5C) { // `\` character
s_copy.data += 1;
s_copy.count -= 1;
}
u32 utf32_codepoint; s64 codepoint_source_length;
bool success = character_utf8_to_utf32(s_copy.data, s_copy.count, &utf32_codepoint, &codepoint_source_length);
if (success) {
if (codepoint_source_length == 1) {
Assert(utf32_codepoint <= 127);
// Add to table
count_ascii_codepoints[utf32_codepoint] += 1;
} else {
unique_codepoints_utf32_add_unique(utf32_codepoint);
push_allocator(temp());
string codepoint = copy_string(string_view(s_copy, 0, codepoint_source_length));
log_todo("#TODO: keep track of unique codepoints. Codepoint: %s", codepoint);
}
}
s_copy.data += codepoint_source_length;
s_copy.count -= codepoint_source_length;
}
}
void count_unique_utf8_chars () { Timed_Block_Print("count_unique_utf8_chars");
unique_codepoints_utf32.allocator = GPAllocator();
Assert(stfe != nullptr);
for (s64 i = 0; i < stfe->dirs.offsets->count; i += 1) {
string sample = get_file_string_view(stfe, i);
count_unique_chars_from_string(sample);
}
}

View File

@ -44,7 +44,7 @@ void Explorer_ImGui_Application_Win32 () {
// Setup Dear ImGui context
IMGUI_CHECKVERSION();
printf("ImGui Version %s \n", ImGui::GetVersion());
log("ImGui Version %s \n", ImGui::GetVersion());
imgui_context = ImGui::CreateContext();