Implement RadixSort and sort by modification time and size.

This commit is contained in:
Musa Mahmood 2025-12-11 16:43:25 -05:00
parent d1d775075d
commit bd8729e8ae
7 changed files with 279 additions and 7 deletions

View File

@ -254,3 +254,59 @@ struct Auto_Release {
}
}
};
// #FixedArena procedures:
FixedArena* bootstrap_fixed_arena (s64 size, Allocator backing_allocator) {
push_allocator(backing_allocator);
Assert(size >= sizeof(FixedArena));
ArrayView<u8> memory = ArrayView<u8>(size);
FixedArena* result = (FixedArena*)memory.data;
result->memory = memory;
result->cursor = sizeof(FixedArena);
result->allocator = backing_allocator;
return result;
}
force_inline void destroy_arena (FixedArena* arena) {
Delete(arena->allocator, arena);
}
Allocator allocator (FixedArena* arena) {
return { fixed_arena_allocator_proc, arena };
}
void* fixed_arena_allocator_proc (Allocator_Mode mode, s64 requested_size, s64 old_size, void* old_memory, void* allocator_data) {
constexpr s64 DEFAULT_ALIGNMENT = 16; // maybe make this modifiable as part of FixedArena struct?
FixedArena* arena = (FixedArena*)allocator_data;
Assert(arena != nullptr);
switch (mode) {
case Allocator_Mode::ALLOCATE: {
arena->cursor = Align<s64>(arena->cursor, DEFAULT_ALIGNMENT);
void* result = &arena->memory[arena->cursor];
arena->cursor += requested_size;
Assert(arena->cursor <= arena->memory.count);
return result;
} break;
case Allocator_Mode::RESIZE: {
arena->cursor = Align<s64>(arena->cursor, DEFAULT_ALIGNMENT);
void* result = &arena->memory[arena->cursor];
arena->cursor += requested_size;
Assert(arena->cursor <= arena->memory.count);
s64 size_to_copy = old_size < requested_size ? old_size : requested_size;
if (result && size_to_copy) { memcpy(result, old_memory, size_to_copy); }
return result;
} break;
case Allocator_Mode::DEALLOCATE: {
return nullptr; // unused
} break;
case Allocator_Mode::DETAILS: {
if (allocator_data == nullptr) {
return "fixed_arena_allocator_proc: data pointer is null!";
}
return "fixed_arena_allocator_proc: with valid data";
} break;
}
return nullptr;
}

View File

@ -138,4 +138,20 @@ struct Push_Alignment { // #rename to Arena_Push_Alignment?
// Do this later:
// arena_lock_pages :: (using arena: *Arena, start_address: *u8, byte_count: s64)
// arena_unlock_pages :: (using arena: *Arena, start_address: *u8, byte_count: s64)
// arena_unlock_pages :: (using arena: *Arena, start_address: *u8, byte_count: s64)
// #FixedArena is a super simple arena where you allocate a fixed block up front (fully committed),
// and use it as-is.
// #NOTE: we can save space be always backing with a known allocator (e.g. GPAllocator()).
struct FixedArena {
ArrayView<u8> memory;
s64 cursor;
Allocator allocator;
};
void* fixed_arena_allocator_proc (Allocator_Mode mode, s64 requested_size, s64 old_size, void* old_memory, void* allocator_data);
// #FixedArena API
FixedArena* bootstrap_fixed_arena (s64 size, Allocator backing_allocator);
force_inline void destroy_arena (FixedArena* arena);
Allocator allocator (FixedArena* arena);

View File

@ -107,6 +107,7 @@ Allocator allocator (ExpandableArena* arena_ex) {
}
// #TODO: currently this keeps the final arena's memory. Fix this!
// This is not implemented correctly!
void arena_reset_to (ExpandableArena* arena_ex, Arena* last_arena, u8* starting_point) {
// going backwards from end of arena list

128
lib/Base/RadixSort.cpp Normal file
View File

@ -0,0 +1,128 @@
struct RadixSort {
ArrayView<u32> ranks;
ArrayView<u32> ranks2;
Allocator allocator;
bool valid_ranks;
};
void radix_sort_init (RadixSort* r, u32 items_to_allocate) {
if (r->allocator.proc == nullptr) {
r->allocator = context_allocator();
}
push_allocator(r->allocator);
r->ranks = ArrayView<u32>(items_to_allocate);
r->ranks2 = ArrayView<u32>(items_to_allocate);
r->valid_ranks = false;
}
void radix_sort_free (RadixSort* r) {
Assert(r->allocator.proc != nullptr);
push_allocator(r->allocator);
array_free(r->ranks);
array_free(r->ranks2);
}
// RadixSort provides an array of indices in sorted order.
u32 rank (RadixSort* r, s64 i) {
Assert(r != nullptr);
#if ARRAY_ENABLE_BOUNDS_CHECKING
if (i < 0 || i >= r->ranks.count) { debug_break(); /*INDEX OOB*/ }
#endif
return r->ranks[i];
}
template <typename T> void create_histograms (RadixSort* r, T* buffer, u32 count, u32* histogram) {
constexpr u32 bucket_count = sizeof(T);
// Init bucket pointers:
u32* h[bucket_count] = {};
for (u32 i = 0; i < bucket_count; i += 1) {
h[i] = histogram + (256 * i);
}
// Build histogram:
u8* p = (u8*)buffer;
u8* pe = (p + count * sizeof(T));
while (p != pe) {
h[0][*p] += 1; p += 1;
if (bucket_count > 1) { // how to make compile time if?
h[1][*p] += 1; p += 1;
if (bucket_count > 2) {
h[2][*p] += 1; p += 1;
h[3][*p] += 1; p += 1;
if (bucket_count == 8) {
h[4][*p] += 1; p += 1;
h[5][*p] += 1; p += 1;
h[6][*p] += 1; p += 1;
h[7][*p] += 1; p += 1;
}
}
}
}
}
template <typename T> void radix_sort (RadixSort* r, T* input, u32 count) {
constexpr u32 T_SIZE = sizeof(T);
// Allocate histograms & offsets on the stack:
u32 histogram [256 * T_SIZE] = {};
u32* link [256];
create_histograms(r, input, count, histogram);
// Radix sort, j is the pass number, (0 = LSB, P = MSB)
for (u32 j = 0; j < T_SIZE; j += 1) {
u32* h = &histogram[j * 256];
u8* input_bytes = (u8*)input;
input_bytes += j; // Assumes little endian!
if (h[input_bytes[0]] == count) {
continue;
}
// Create offsets
link[0] = r->ranks2.data;
for (u32 i = 1; i < 256; i += 1) { // 1..255
link[i] = link[i-1] + h[i-1];
}
// Perform Radix Sort
if (!r->valid_ranks) {
for (u32 i = 0; i < count; i += 1) {
*link[input_bytes[i*T_SIZE]] = i;
link[input_bytes[i*T_SIZE]] += 1;
}
r->valid_ranks = true;
} else {
for (u32 i = 0; i < count; i += 1) {
u32 idx = r->ranks[i];
*link[input_bytes[idx*T_SIZE]] = idx;
link[input_bytes[idx*T_SIZE]] += 1;
}
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in ranks after the swap.
ArrayView<u32> ranks2_temp = r->ranks2;
r->ranks2 = r->ranks;
r->ranks = ranks2_temp;
}
// All values were equal; generate linear ranks
if (!r->valid_ranks) {
for (u32 i = 0; i < count; i += 1) {
r->ranks[i] = i;
r->valid_ranks = true;
}
}
}
// NOTE: For a small number of elements it's more efficient to use insertion sort
void radix_sort_u64 (RadixSort* r, u64* input, u32 count) {
if (input == nullptr || count == 0) return;
if (r->ranks.count == 0) {
radix_sort_init(r, count);
}
radix_sort(r, input, count);
}

View File

@ -1214,6 +1214,16 @@ string get_file_copy (ST_File_Enumeration* stfe, s64 index) {
string file = {strlength, string_ptr};
return copy_string(file);
}
string get_file_string_view (ST_File_Enumeration* stfe, s64 index) {
STFE_Results* r = &stfe->files;
s64 strlength = (*r->lengths)[index];
u32 offset = (*r->offsets)[index];
u8* string_ptr = &r->strings->data[offset];
string file = {strlength, string_ptr};
return file;
}
string get_path_copy (ST_File_Enumeration* stfe, s64 index) {
STFE_Results* r = &stfe->dirs;
Assert(index >= 0 && index < count_paths(stfe));
@ -1261,6 +1271,7 @@ s64 win32_file_enum_thread_proc (Thread* thread) {
push_allocator(temp());
auto_release_temp();
// This needs to be null-terminated:
// #TODO: Replace this #LIFO array with an arena-backed FIFO stack (singly linked-list).
string next_directory = copy_string(pop(paths_to_enumerate)); // LIFO. maybe not the best way?
wstring wildcard_name = utf8_to_wide(format_string("%s\\*", next_directory.data));

View File

@ -36,6 +36,7 @@
#include "lib/Base/Arena.cpp"
#include "lib/Base/String.cpp"
#include "lib/Base/RadixSort.cpp"
#include "lib/Base/Base_Thread_Context.cpp"
#include "lib/Base/ErrorType.cpp"

View File

@ -124,15 +124,37 @@ void Ex1_show_ntfs_workspace () { using namespace ImGui;
}
}
// #Workspaces are FOR DEVELOPMENT ONLY.
struct Ex1_Workspace {
s32 path_select;
s32 file_select;
RadixSort file_size_radix;
RadixSort file_modtime_radix;
RadixSort dir_modtime_radix;
bool sort_completed;
// Reordered strings:
ArrayView<string> files_sorted_by_size;
ArrayView<string> files_sorted_by_modtime;
};
void reorder_files_by_radix (RadixSort* r, ArrayView<string>* files, bool reverse_order=false) {
Timed_Block_Print("reorder_files_by_radix");
// Where are my source files!?
(*files) = ArrayView<string>(r->ranks.count);
for_each(f, (*files)) {
// (*files)[f] = get_file_copy(stfe, r->ranks[f]);
(*files)[f] = get_file_string_view(stfe, r->ranks[f]);
}
}
global Ex1_Workspace ex1w;
void Ex1_show_enumeration_workspace () { using namespace ImGui;
push_imgui_window("Enumerated Data Workspace");
// #TODO SliderInt for each
/*
SliderInt("Select path index", &ex1w.path_select, 0, count_paths(stfe)-1);
Text("%s", get_path_copy(stfe, ex1w.path_select).data);
Text("time modified: %s", format_time_datetime(get_path_modtime(stfe, ex1w.path_select)).data);
@ -141,7 +163,44 @@ void Ex1_show_enumeration_workspace () { using namespace ImGui;
Text("%s", get_file_copy(stfe, ex1w.file_select).data);
Text("size: %s", format_bytes(get_file_size_bytes(stfe, ex1w.file_select)).data);
Text("time modified: %s", format_time_datetime(get_file_modtime(stfe, ex1w.file_select)).data);
*/
// #TODO: size, modtime
if (!ex1w.sort_completed || Button("sort file sizes")) {
push_allocator(GPAllocator());
Timed_Block_Print("radix_sort_u64: file sizes, file modtimes, directory modtimes");
ArrayView<u64> sizes = to_view(*stfe->files.sizes);
radix_sort_u64(&ex1w.file_size_radix, sizes.data, (u32)sizes.count);
ArrayView<u64> file_modtimes = to_view(*stfe->files.modtimes);
radix_sort_u64(&ex1w.file_modtime_radix, file_modtimes.data, (u32)file_modtimes.count);
ArrayView<u64> dirs_modtimes = to_view(*stfe->dirs.modtimes);
radix_sort_u64(&ex1w.dir_modtime_radix, dirs_modtimes.data, (u32)dirs_modtimes.count);
// Create ArrayView<string>, ArrayView<u64> sizes, and ArrayView<u64> modtimes
reorder_files_by_radix(&ex1w.file_size_radix, &ex1w.files_sorted_by_size);
reorder_files_by_radix(&ex1w.file_modtime_radix, &ex1w.files_sorted_by_modtime);
// reordering by the rank permutations generated by RadixSort.
ex1w.sort_completed = true;
}
if (ex1w.sort_completed) {
SeparatorText("Files ordered by modtime");
s32 file_count = (s32)ex1w.files_sorted_by_modtime.count;
SliderInt("Select file index", &ex1w.file_select, 0, file_count-1);
string file_name = copy_string(ex1w.files_sorted_by_modtime[ex1w.file_select]);
Text("%s", file_name.data);
u32 radix_index = rank(&ex1w.file_modtime_radix, ex1w.file_select);
Text("date modified: %s", format_time_datetime(get_file_modtime(stfe, radix_index)).data);
}
if (ex1w.sort_completed) {
SeparatorText("Files ordered by size");
s32 file_count = (s32)ex1w.files_sorted_by_size.count;
// SliderInt("Select file index", &ex1w.file_select, 0, file_count-1);
string file_name = copy_string(ex1w.files_sorted_by_size[ex1w.file_select]);
Text("%s", file_name.data);
u32 radix_index = rank(&ex1w.file_size_radix, ex1w.file_select);
Text("size: %s", format_bytes(get_file_size_bytes(stfe, radix_index)).data);
// Text("date modified: %s", format_time_datetime(get_file_modtime(stfe, radix_index)).data);
}
}
void Ex1_Control_Panel () { using namespace ImGui;
@ -195,9 +254,9 @@ void Ex1_Control_Panel () { using namespace ImGui;
// && (drives_enumerated == drives.count);
string file_path = format_string_temp("%s_DriveData.bin", os_get_machine_name().data);
if (!all_drives_enumerated && file_exists(file_path)) {
Deserialize_ST_File_Enumeration(file_path);
}
// if (!all_drives_enumerated && file_exists(file_path)) {
// Deserialize_ST_File_Enumeration(file_path);
// }
if (drives.count > 0 && !all_drives_enumerated && file_exists(file_path) && Button("Load from file (this machine)")) {
Deserialize_ST_File_Enumeration(file_path);
// Deserialize_Win32_Drives(file_path);
@ -368,12 +427,12 @@ void ImGui_Debug_Panel () { using namespace ImGui;
push_allocator(temp());
Begin("Debug Panel");
SeparatorText("ex1_ntfs");
/*SeparatorText("ex1_ntfs");
Text("Threads in flight count: %d", ex1_ntfs.threads_in_flight.count);
for_each(i, ex1_ntfs.threads) {
Text(" [%d] initialized: %d, has_context: %d, has_data: %d",
i, ex1_ntfs.threads[i].proc != nullptr, ex1_ntfs.threads[i].context != nullptr, ex1_ntfs.threads[i].data != nullptr);
}
}*/
// #cpuid
// Text("[cpus] physical: %d, logical: %d, primary: %d, secondary: %d", os_cpu_physical_core_count(), os_cpu_logical_core_count(), os_cpu_primary_core_count(), os_cpu_secondary_core_count());
{ SeparatorText("Arena In-Use List");