From 680f735b72f767afcaafbcf392497146cbf443db Mon Sep 17 00:00:00 2001
From: Musa Mahmood <Musasmahmood@gmail.com>
Date: Fri, 21 Nov 2025 08:53:55 -0500
Subject: [PATCH] Update Array type to use custom/context allocator.

---
 cpp_lib.todo                           |  34 ++++-
 lib/Base/Allocator.cpp                 |  11 +-
 lib/Base/Allocator.h                   |  44 +++++--
 lib/Base/Arena.h                       |  12 +-
 lib/Base/Arena_Array.h                 |  22 ++--
 lib/Base/Arena_Table.cpp               |   3 +
 lib/Base/Array.h                       | 105 +++++++++------
 lib/Base/Base.cpp                      |  31 +----
 lib/Base/Base.h                        |   7 +-
 lib/Base/Base_String.h                 |  14 +-
 lib/Base/Base_Thread_Context.cpp       |  19 +++
 lib/Base/Base_Thread_Context.h         |   9 +-
 lib/Base/CPU_X64.cpp                   |  25 ++++
 lib/Base/ErrorCodes.cpp                |  11 +-
 lib/Base/General_Purpose_Allocator.cpp |  16 ++-
 lib/Base/General_Purpose_Allocator.h   |   7 -
 lib/Base/String.cpp                    |  60 ++++++---
 src/Base_Entry_Point.cpp               |  47 +++++++
 src/OS_Win32.cpp                       | 172 +++++++++++++++++++++----
 unity_build_exe.cpp                    |   4 +-
 20 files changed, 476 insertions(+), 177 deletions(-)
 create mode 100644 lib/Base/CPU_X64.cpp
 create mode 100644 src/Base_Entry_Point.cpp

diff --git a/cpp_lib.todo b/cpp_lib.todo
index e3567a6..207e44e 100644
--- a/cpp_lib.todo
+++ b/cpp_lib.todo
@@ -1,15 +1,37 @@
 [#TODO]
-
-[*] This is the order I want to follow, because I need them in thread context.
-    - Allocator interface
-    - Thread context (Base_Thread_Context.h)
-    - Arenas (copy from Ex1)
+  [ ] Thread primitives (Mutex, Semaphore, etc.)
+  [ ] Thread Creation / Deletion
+  [ ] CreateWindow
+  [ ] ... Mouse / Keyboard inputs
+  [*] This is the order I want to follow, because I need them in thread context.
+    
     - Entry point(s) `Entry_Point_Main.cpp`
         -> We can have a lib_init() instead of a clear entry point for now.
         -> Switch from library to application once I add an entry point
         -> See how rjf abstracts his entry points for each platform with TCTX.
-[ ] 1. setup thread-local storage via thread_static (see raddbg, base_core.h, 
+  [ ] 1. setup thread-local storage via thread_static (see raddbg, base_core.h, 
     C_LINKAGE thread_static TCTX *tctx_thread_local;
         >> Must be assigned at entry point (arena_alloc())
         >> TCTX defined in base_thread_context.h
     > See Base_Thread.h
+[Design Stuff]
+  [ ] "Primary Thread" <- Main entry point thread
+  [ ] "Secondary Thread" <- does all the heavy lifting for the first thread and synchronizes with the first.
+    [ ] Tertiary threads (launched by secondary thread and awaiting work as assigned by Primary and Secondary threads)
+  [ ] Multi-threaded by default with sync points
+    [ ] Need bindings for Semaphore and Mutex
+  [ ] Does calling SetProcessWorkingSetSize(.., -1, -1) cause the program to crash? Why tho.
+    [ ] Maybe we just shouldn't do this?
+    [ ] *our* program memory usage can be calculated by Stack Size + allocations from GPAllocator + allocations from arena.
+[TODO - Low priority]
+    [ ] Implement Secure Arenas (with VirtualLock, wiping memory with RtlSecureZeroMemory)
+    [ ] 
+[Project Ideas]
+    [ ](Graphics): How do I create a basic overlay that shows the time, date, cpu/gpu temps, frequency, memory usage all the time and responds to global hotkey to show more info
+[Documentation Notes]
+  Main inspirations for this codebase:
+    (1) jonathan blow & jai compiler project
+    (2) casey muratori and "handmade" approach to software
+    (3) ryan fleury and raddebugger codebase
+    
+Always contemplate your impulses
\ No newline at end of file
diff --git a/lib/Base/Allocator.cpp b/lib/Base/Allocator.cpp
index bd5fc0f..2a67819 100644
--- a/lib/Base/Allocator.cpp
+++ b/lib/Base/Allocator.cpp
@@ -1,15 +1,6 @@
 #include "Allocator.h"
 #include "Base_Thread_Context.h"
 
-force_inline Allocator get_temp_allocator() {
-  return get_allocator(get_thread_context()->temp);
-}
-
-force_inline Allocator get_context_allocator() {
-  Thread_Context* context = get_thread_context();
-  return context->allocator;
-}
-
 void* internal_alloc    (s64 size) {
   Allocator allocator = get_context_allocator();
   void* result = allocator.proc(Allocator_Mode::ALLOCATE, size, 0, nullptr, allocator.data);
@@ -18,7 +9,7 @@ void* internal_alloc    (s64 size) {
 
 void* internal_realloc  (void* memory, s64 size, s64 old_size) {
   Allocator allocator = get_context_allocator();
-  void* result = allocator.proc(Allocator_Mode::RESIZE, size, 0, nullptr, allocator.data);
+  void* result = allocator.proc(Allocator_Mode::RESIZE, size, old_size, nullptr, allocator.data);
   return result;
 }
 
diff --git a/lib/Base/Allocator.h b/lib/Base/Allocator.h
index 9e573eb..fa7abec 100644
--- a/lib/Base/Allocator.h
+++ b/lib/Base/Allocator.h
@@ -17,20 +17,19 @@ typedef void* (*Allocator_Proc)(Allocator_Mode mode, s64 requested_size, s64 old
 struct Allocator {
     Allocator_Proc proc;
     void* data;
+    
+    bool operator ! () {
+        return (proc == nullptr && data == nullptr);
+    }
 };
 
 // Public Allocator API:
-// Thread-local allocators:
-PROTOTYPING_API Allocator get_temp_allocator();
-PROTOTYPING_API Allocator get_context_allocator();
-
 // Note that alignment is handled on a per-allocator basis.
 void* internal_alloc    (s64 size);
 void  internal_free     (void* memory);
 void* internal_realloc  (void* memory, s64 size, s64 old_size);
 
-template <typename T>
-void Initialize (T* memory) { (*memory) = T(); }
+template <typename T> force_inline void Initialize (T* memory) { (*memory) = T(); }
 
 template <typename T> T* New (Allocator allocator, bool initialize=true) {
   auto memory = (T*)allocator.proc(Allocator_Mode::ALLOCATE, sizeof(T), 0, nullptr, allocator.data);
@@ -52,6 +51,35 @@ template <typename T> T* New (bool initialize=true) {
   return memory;
 }
 
-void Delete (void* object) { internal_free(object); }
+// For raw-pointer arrays.
+template <typename T> T* NewArray (Allocator allocator, s64 count, bool initialize=true) {
+  auto memory = (T*)allocator.proc(Allocator_Mode::ALLOCATE, count * sizeof(T), 0, nullptr, allocator.data);
+  
+  if (initialize) {
+    for (s64 i = 0; i < count; i += 1) {
+      memory[i] = T();
+    }
+  }
+  
+  return memory;
+}
 
-// template <typename T> T* NewArray(s64 count, bool initialize) // should be in Array.jai
\ No newline at end of file
+template <typename T> T* NewArray (s64 count, bool initialize=true) {
+  auto memory = (T*)internal_alloc(count * sizeof(T));
+
+  if (initialize) {
+    for (s64 i = 0; i < count; i += 1) {
+      memory[i] = T();
+    }
+  }
+
+  return memory;
+}
+
+// For Resizes and Deletes, use internal_realloc and internal_free.
+// template typename<T> void reset_struct(T* src) { (*src) = T(); }
+// template typename<T> void zero_struct(T* src) { memset(src, 0, sizeof(T)); }
+// template typename<T> T* copy_struct(T* src) {
+//   T* dst = New<T>(false);
+//   memcpy(dst, src, sizeof(T));
+// }
diff --git a/lib/Base/Arena.h b/lib/Base/Arena.h
index c9b0d0f..cfd2bba 100644
--- a/lib/Base/Arena.h
+++ b/lib/Base/Arena.h
@@ -24,7 +24,7 @@ enum class Arena_Flags: u8 {
   Chained = 0x01, 
   Is_Bootstrapped = 0x02,
   Large_Pages = 0x40, 
-  // Secure_Arena = 0xF0 // #NOTE: Secure Arenas are not implemented here!
+  // Secure_Arena = 0xF0 // #NOTE: Secure Arenas are not implemented yet!
 };
 
 force_inline Arena_Flags operator | (Arena_Flags a, Arena_Flags b) {
@@ -99,6 +99,8 @@ void arena_delete (Arena* arena);
 
 // Scoped Macros/Functions for auto_reset and auto_release
 // usage `Auto_Reset guard(arena);` within a scope.
+#define auto_reset(x) \
+    Auto_Reset Concat(_auto_reset_guard_, __LINE__)(x)
 struct Auto_Reset {
   Arena* arena;
   u8* starting_point;
@@ -114,6 +116,8 @@ struct Auto_Reset {
   }
 };
 
+#define auto_release(x) \
+    Auto_Release Concat(_auto_release_guard_, __LINE__)(x)
 struct Auto_Release {
   Arena* arena;
   u8* starting_point;
@@ -130,7 +134,9 @@ struct Auto_Release {
   }
 };
 
-struct Push_Alignment {
+#define push_alignment(x, y) \
+    Push_Alignment Concat(_push_align_guard_, __LINE__)(x, y)
+struct Push_Alignment { // #rename to Arena_Push_Alignment?
   Arena* arena;
   u16 original_alignment;
   
@@ -146,6 +152,8 @@ struct Push_Alignment {
   }
 };
 
+#define push_arena(x) \
+    Push_Arena Concat(_push_alloc_guard_, __LINE__)(x)
 struct Push_Arena {
   Thread_Context* context;
   Allocator original_allocator;
diff --git a/lib/Base/Arena_Array.h b/lib/Base/Arena_Array.h
index ee98e89..1b8aa72 100644
--- a/lib/Base/Arena_Array.h
+++ b/lib/Base/Arena_Array.h
@@ -28,8 +28,8 @@ struct ArenaArray { // downcasts to an ArrayView.
 template <typename T>
 ArenaArray<T>* arena_array_new (s64 preallocate_count, Arena_Reserve reserve_size) {
   Arena* arena = next_arena(reserve_size);
-  Push_Arena push_arena(arena);
-  Push_Alignment push_alignment(arena, 1);
+  push_arena(arena);
+  push_alignment(arena, 1);
   ArenaArray<T>* array = New<ArenaArray<T>>(true);
   array->arena = arena;
   
@@ -65,7 +65,7 @@ template <typename T> void array_free (ArenaArray<T>& array) {
   release_arena(array.arena, delete_extra_pages=true);
 }
 
-template <typename T> ArrayView<T> array_view (Array<T> array) {
+template <typename T> ArrayView<T> array_view (ArenaArray<T> array) {
   ArrayView<T> av;
   av.count = array.count;
   av.data = array.data;
@@ -136,14 +136,6 @@ template <typename T> void array_resize (ArenaArray<T>& array, s64 desired_item_
   }
 }
 
-void reserve_internal (ArenaArray<void>& array, s64 desired_item_count, s64 element_size) {
-  if (desired_item_count <= array.allocated) return;
-  
-  array_arena_realloc(array, desired_item_count * element_size, array.allocated * element_size);
-  
-  array.allocated = desired_item_count;
-}
-
 s64 max_array_size (ArenaArray<void>& array) {
   return reserve_size(array.arena) - sizeof(Arena) - sizeof(ArenaArray<void>);
 }
@@ -167,6 +159,14 @@ void array_arena_realloc (ArenaArray<void>& array, s64 new_size, s64 old_size) {
   }
 }
 
+void reserve_internal (ArenaArray<void>& array, s64 desired_item_count, s64 element_size) {
+  if (desired_item_count <= array.allocated) return;
+  
+  array_arena_realloc(array, desired_item_count * element_size, array.allocated * element_size);
+  
+  array.allocated = desired_item_count;
+}
+
 template <typename T> void init_range (T* ptr, s64 start_offset, s64 end_offset) {
   for (s64 i = start_offset; i < end_offset; i += 1) {
     T* current_item = ptr + i;
diff --git a/lib/Base/Arena_Table.cpp b/lib/Base/Arena_Table.cpp
index eb646c7..c1790dd 100644
--- a/lib/Base/Arena_Table.cpp
+++ b/lib/Base/Arena_Table.cpp
@@ -1,6 +1,7 @@
 // API in Arena.h
 #include "Arena.h"
 #include "Array.h"
+#include "General_Purpose_Allocator.h"
 #include <mutex>
 
 global std::mutex arena_table_mutex;
@@ -12,6 +13,8 @@ global Array<Arena*> arenas_in_flight[6];
 
 void initialize_arena_table () {
   for (s32 i = 0; i < 6; i += 1) {
+    arena_free_table[i].allocator = GPAllocator();
+    arenas_in_flight[i].allocator = GPAllocator();
     array_reserve(arena_free_table[i], 64);
     array_reserve(arenas_in_flight[i], 64);
   }
diff --git a/lib/Base/Array.h b/lib/Base/Array.h
index 6aaf241..c06cf0e 100644
--- a/lib/Base/Array.h
+++ b/lib/Base/Array.h
@@ -4,10 +4,14 @@
 #include "Base.h"
 #include "Allocator.h"
 
-#define DEFAULT_ARRAY_ALIGNMENT 16
-
-// #NOTE: This uses `General_Purpose_Allocator` for simplicity.
+// #TODO: Array.h
+  // [x] Set allocations to use context.allocator interface
+  // For now, I'm just disabling alignment:
+  // [ ] Add back alignment, and make sure there's a way to pass alignment to NewArray, which gets passed to allocator.proc. 
+  // [ ] Make versions of ArrayView initializer that takes allocator as a param
+  // [ ] Make version of array_free (ArrayView&) that takes allocator as a param
 // For Arena-Backed arrays use ArenaArray
+// #define DEFAULT_ARRAY_ALIGNMENT 16
 
 MSVC_RUNTIME_CHECKS_OFF
 
@@ -17,39 +21,42 @@ struct Array { // downcasts to an ArrayView.
   s64 count;
   T* data;
   s64 allocated;
-  s64 alignment = DEFAULT_ARRAY_ALIGNMENT;
+  Allocator allocator;
+  // s64 alignment = DEFAULT_ARRAY_ALIGNMENT;
 
   Array() { 
     memset(this, 0, sizeof(*this));
-    alignment = DEFAULT_ARRAY_ALIGNMENT;
   }
 
-  Array(s64 new_count, s64 _alignment, bool zero_memory=true) {
+  Array(s64 new_count, bool initialize=false) { // old: NewArray ::, array_new :
     count = new_count;
-    data = (T*)GPAllocator_New(new_count * sizeof(T), _alignment);
-    if (zero_memory) { memset(data, 0, new_count * sizeof(T)); }
-    alignment = _alignment;
+    allocator = get_context_allocator();
+    data = NewArray<T>(new_count, initialize);
     allocated = new_count;
   }
 
-  // Use constructor delegation to pass params to above constructor
-  Array(s64 new_count, bool zero_memory=true)
-    : Array(new_count, DEFAULT_ARRAY_ALIGNMENT, zero_memory) {}
-  
   // initializer-list type instantiation: `Array<T> new_array = {count, data}`
-  // This is essentially an arrayview.
-  // (Musa) Ok, but this array cannot then be resized.
-  Array(s64 new_count, void* new_data) {
-    count = new_count;
-    data = (T*)new_data;
-    
-    allocated = new_count;
-    alignment = DEFAULT_ARRAY_ALIGNMENT;
-  }
+  // (Musa) This array cannot then be resized. Why do I even have this? Do I need it?
+  // Array(s64 new_count, void* new_data) {
+  //   count = new_count;
+  //   data = (T*)new_data;
+  //   allocator = { nullptr, nullptr }; // NOT RESIZABLE.
+  //   allocated = new_count;
+  // }
   
   // Used by array_zero, array_copy, etc.
-  Array(s64 new_count, void* new_data, s64 _allocated, s64 _alignment) {
-    count = new_count; data = (T*)new_data; allocated = _allocated; alignment = _alignment;
+  Array(s64 new_count, void* new_data, s64 _allocated) {
+    count = new_count;
+    data = (T*)new_data;
+    allocated = _allocated;
+    allocator = get_context_allocator();
+  }
+  
+  Array(s64 new_count, void* new_data, s64 _allocated, Allocator _allocator) {
+    count = new_count;
+    data = (T*)new_data;
+    allocated = _allocated;
+    allocator = _allocator;
   }
   
   T& operator[](s64 index) {
@@ -60,13 +67,17 @@ struct Array { // downcasts to an ArrayView.
   }
 };
 
+template <typename T> bool is_resizable (Array<T>& src) {
+  // If we have a valid allocator, we assume this is resizeable.
+  return src.allocator.proc != nullptr;
+}
+
 template <typename T>
 bool is_valid(Array<T> src) {
   if (src.count == 0) return true;
   if (src.count < 0) return false;
   if (src.data == nullptr) return false;
   if (src.allocated < src.count) return false;
-  // if ((src.alignment % 8) != 0) return false; Dubious - we could want an alignment of 1
   return true;
 }
 
@@ -76,10 +87,10 @@ Array<T> array_copy_zero(const Array<T>& src) {
     return Array<T>(); // Return an empty array
   }
   
-  void* new_data = GPAllocator_New(src.count * sizeof(T), src.alignment);
+  T* new_data = NewArray<T>(src.count, false);
   memset(new_data, 0, src.count * sizeof(T));
   
-  return Array<T>(src.count, new_data, src.allocated, src.alignment);
+  return Array<T>(src.count, new_data, src.allocated);
 }
 
 template <typename T>
@@ -88,20 +99,26 @@ Array<T> array_copy(const Array<T>& src) {
     return Array<T>(); // Return an empty array
   }
   
-  void* new_data = GPAllocator_New(src.count * sizeof(T), src.alignment);
+  T* new_data = NewArray<T>(src.count, false);
   memcpy(new_data, src.data, src.count * sizeof(T));
   
-  return Array<T>(src.count, new_data, src.allocated, src.alignment);
+  return Array<T>(src.count, new_data, src.allocated);
 }
 
 template <typename T>
-void array_reset_count(Array<T>& src) {
+void array_reset_keeping_memory(Array<T>& src) {
   src.count = 0;
 }
 
 template <typename T>
 void array_free(Array<T>& src) {
-  GPAllocator_Delete(src.data);
+  if (!src.data) return;
+  if (src.allocated == 0) return;
+  if (src.allocator.proc != nullptr) {
+    src.allocator.proc(Allocator_Mode::DEALLOCATE, 0, 0, src.data, src.allocator.data);
+  } else {
+    internal_free(src.data);
+  }
   src.count = 0;
   src.data = nullptr;
   src.allocated = 0;
@@ -120,7 +137,15 @@ template <typename T>
 void array_reserve(Array<T>& src, s64 desired_items) {
   if (desired_items <= src.allocated) return;
 
-  src.data = (T*)GPAllocator_Resize(src.allocated * sizeof(T), src.data, desired_items * sizeof(T), src.alignment);
+  src.data = nullptr;
+  if (src.allocator.proc == nullptr) {
+    src.allocator = get_context_allocator();
+  }
+  
+  Assert(src.allocator.proc != nullptr);
+  
+  src.data = (T*)src.allocator.proc(Allocator_Mode::RESIZE, desired_items * sizeof(T), src.allocated * sizeof(T), nullptr, src.allocator.data);
+  
   Assert(src.data != nullptr);
 
   src.allocated = desired_items;  
@@ -150,7 +175,7 @@ force_inline void array_maybe_grow(Array<T>& src) {
 
 template <typename T>
 T pop(Array<T>& src) {
-  auto result = src[src.count-1]; // how do I dereference?
+  auto result = src[src.count-1];
   src.count -= 1;
   return result;
 }
@@ -239,11 +264,9 @@ struct ArrayView {
   
   ArrayView() { count = 0; data = nullptr; }
   
-  // If we don't need reallocation or alignments
-  ArrayView(s64 new_count, s64 alignment=DEFAULT_ARRAY_ALIGNMENT, bool zero_memory=true) {
+  ArrayView(s64 new_count, bool initialize=true) {
     count = new_count;
-    data = (T*)GPAllocator_New(new_count * sizeof(T), alignment);
-    if (zero_memory) { memset(data, 0, new_count * sizeof(T)); }
+    data = NewArray<T>(new_count, initialize);
   }
   
   // #Note: use array_view to create slices or to downcast to ArrayView!
@@ -315,7 +338,7 @@ ArrayView<T> array_view(Array<T> array, s64 start_index, s64 view_count) {
 }
 
 template <typename T>
-void array_reset_count(ArrayView<T>& src) {
+void array_reset_keeping_memory(ArrayView<T>& src) {
   src.count = 0;
 }
 
@@ -326,7 +349,7 @@ ArrayView<T> array_copy(const ArrayView<T>& src) {
     return ArrayView<T>(); // Return an empty array
   }
   
-  void* new_data = GPAllocator_New(src.count * sizeof(T), DEFAULT_ARRAY_ALIGNMENT);
+  T* new_data = NewArray<T>(src.count);
   memcpy(new_data, src.data, src.count * sizeof(T));
   
   return ArrayView<T>(src.count, (T*)new_data);
@@ -335,7 +358,9 @@ ArrayView<T> array_copy(const ArrayView<T>& src) {
 template <typename T>
 void array_free(ArrayView<T>& src) {
   if (!src.data || src.count == 0) { return; }
-  GPAllocator_Delete(src.data);
+  
+  internal_free(src.data); // we just have to trust that the context.allocator is correct for this guy!
+  
   src.count = 0;
   src.data  = nullptr;
 }
diff --git a/lib/Base/Base.cpp b/lib/Base/Base.cpp
index c9a91ee..07c6828 100644
--- a/lib/Base/Base.cpp
+++ b/lib/Base/Base.cpp
@@ -1,28 +1,3 @@
-#include "Base.h"
-
-// #TODO Split these up into platform_base (e.g. Windows_Base, Unix_Base...)
-#if OS_WINDOWS
-#include <intrin.h>
-
-int CPU_Base_Frequency() {
-  int cpuInfo[4] = {0};
-    
-  // Call CPUID with EAX = 0x16 (Base CPU Frequency)
-  __cpuid(cpuInfo, 0x16);
-  
-  return cpuInfo[0];
-}
-#endif
-
-#if OS_IS_UNIX
-#include <cpuid.h>
-
-  int CPU_Base_Frequency() {
-    unsigned int eax, ebx, ecx, edx;
-    if (__get_cpuid(0x16, &eax, &ebx, &ecx, &edx)) {
-      return eax;
-    }
-    
-    return 0; // not found or supported
-  }
-#endif
+#include "Base.h"
+
+// #TODO Split these up into platform_base (e.g. Windows_Base, Unix_Base...)
diff --git a/lib/Base/Base.h b/lib/Base/Base.h
index 2816536..7f4c507 100644
--- a/lib/Base/Base.h
+++ b/lib/Base/Base.h
@@ -6,6 +6,7 @@
 #define BUILD_CONSOLE_INTERFACE BUILD_DEBUG
 
 #if ARCH_CPU_X64
+  #include "CPU_X64.cpp"
   #define PLATFORM_MEMORY_PAGE_SIZE 4096
   #define PLATFORM_MEMORY_LARGE_PAGE_SIZE 2097152
   #define CPU_REGISTER_WIDTH_BYTES 8
@@ -128,6 +129,9 @@ force_inline s64 Next_Power_Of_Two(s64 v) {
 #define Stringify_(S) #S
 #define Stringify(S) Stringify_(S)
 
+#define Concat_(A,B) A##B
+#define Concat(A,B) Concat_(A,B)
+
 #if COMPILER_MSVC
 # define debug_break() __debugbreak()
 #elif COMPILER_CLANG || COMPILER_GCC
@@ -171,6 +175,3 @@ force_inline s64 Next_Power_Of_Two(s64 v) {
 #define ForArrayStartingAt(_it_, _array_, _start_) for (s64 _it_ = _start_; _it_ < (_array_).count; _it_ += 1)
 #define ForUpTo(_it_, _end_) for (s64 _it_ = 0; _it_ < _end_; _it_ += 1)
 
-// #MOVE TO CPU_X64.cpp
-PROTOTYPING_API int CPU_Base_Frequency();
-
diff --git a/lib/Base/Base_String.h b/lib/Base/Base_String.h
index 533d83a..d1b80d5 100644
--- a/lib/Base/Base_String.h
+++ b/lib/Base/Base_String.h
@@ -10,22 +10,28 @@ struct string {
   s64 count;
   u8* data;
   // Construct from a string literal or C-string
-  string() { // default constructor
+  string () { // default constructor
     count = 0;
     data = nullptr;
   }
   
-  string(const char* cstr) {
+  string (char* cstr) {
     count = strlen(cstr);
     data = (u8*)cstr;
   }
+  
+  string (s64 _count, char* str) { count = _count; data = (u8*)str; }
+  string (s64 _count, u8* str) { count = _count; data = str; }
 };
 
+// ~ API ~ #TODO
+string copy_string (string str);
 bool strings_match(string first_string, string second_string);
 
-// ~ API ~ #TODO
+// Unicode stuff
+string wide_to_utf8 (u16* source, s32 length);
+
 // string string_view(string n_string, int start_index, int view_count);
-// string copy_string(string original_string);
 // string copy_string(char* c_string);
 // void free(string& n_string);
 
diff --git a/lib/Base/Base_Thread_Context.cpp b/lib/Base/Base_Thread_Context.cpp
index 968ff57..00f5b0b 100644
--- a/lib/Base/Base_Thread_Context.cpp
+++ b/lib/Base/Base_Thread_Context.cpp
@@ -20,4 +20,23 @@ void init_thread_context(Thread_Context* tctx) {
 
 Thread_Context* get_thread_context() {
   return (Thread_Context*)thread_local_context;
+}
+
+force_inline Allocator get_temp_allocator() {
+  return get_allocator(get_thread_context()->temp);
+}
+
+force_inline Allocator get_context_allocator() {
+  Thread_Context* context = get_thread_context();
+  return context->allocator;
+}
+
+void temp_reset_keeping_memory() {
+  Thread_Context* context = get_thread_context();
+  arena_reset_keeping_memory(context->temp);
+}
+
+void temp_reset() {
+  Thread_Context* context = get_thread_context();
+  arena_reset(context->temp);
 }
\ No newline at end of file
diff --git a/lib/Base/Base_Thread_Context.h b/lib/Base/Base_Thread_Context.h
index 59e35e6..6305eb7 100644
--- a/lib/Base/Base_Thread_Context.h
+++ b/lib/Base/Base_Thread_Context.h
@@ -24,7 +24,9 @@ struct Thread_Context {
 
 Thread_Context* get_thread_context();
 
-#define push_allocator(x) Push_Allocator guard(x) // maybe should append line number to guard?
+// maybe should append line number to guard?
+// This is stupid, and maybe should be something generated by a metaprogram?
+#define push_allocator(x) Push_Allocator Concat(_push_alloc_guard_, __LINE__)(x)
 struct Push_Allocator {
   Thread_Context* context;
   Allocator old_allocator;
@@ -41,4 +43,9 @@ struct Push_Allocator {
 };
 
 
+// Thread-local allocators:
+PROTOTYPING_API Allocator get_temp_allocator();
+PROTOTYPING_API Allocator get_context_allocator();
+PROTOTYPING_API void reset_temp_allocator();
+PROTOTYPING_API void free_temp_allocator();
 
diff --git a/lib/Base/CPU_X64.cpp b/lib/Base/CPU_X64.cpp
new file mode 100644
index 0000000..6a09f2c
--- /dev/null
+++ b/lib/Base/CPU_X64.cpp
@@ -0,0 +1,25 @@
+#if OS_WINDOWS
+#include <intrin.h>
+
+int CPU_Base_Frequency() {
+  int cpuInfo[4] = {0};
+    
+  // Call CPUID with EAX = 0x16 (Base CPU Frequency)
+  __cpuid(cpuInfo, 0x16);
+  
+  return cpuInfo[0];
+}
+#endif
+
+#if OS_IS_UNIX
+#include <cpuid.h>
+
+  int CPU_Base_Frequency() {
+    unsigned int eax, ebx, ecx, edx;
+    if (__get_cpuid(0x16, &eax, &ebx, &ecx, &edx)) {
+      return eax;
+    }
+    
+    return 0; // not found or supported
+  }
+#endif
diff --git a/lib/Base/ErrorCodes.cpp b/lib/Base/ErrorCodes.cpp
index 26d79d5..548ec5f 100644
--- a/lib/Base/ErrorCodes.cpp
+++ b/lib/Base/ErrorCodes.cpp
@@ -4,7 +4,6 @@
 #include <stdio.h> // vsnprintf, printf
 #include <cstdarg> // va_list...
 
-// Should always be false when making python bindings.
 #define BREAK_ON_WARNINGS 0
 #define BREAK_ON_ERRORS 0
 #define BREAK_ON_FATAL_ERROR BUILD_DEBUG
@@ -12,9 +11,11 @@
 
 Native_Error* Create_New_Native_Error_Internal(char* format, va_list args) {
   constexpr s64 ERROR_BUFFER_COUNT = 512;
+  
+  push_allocator(GPAllocator());
 
-  auto error  = ALLOCATE(Native_Error);
-  error->data = ALLOCATE_RAW_ARRAY(ERROR_BUFFER_COUNT, u8);
+  auto error  = New<Native_Error>(false);
+  error->data = (u8*)GPAllocator_New(ERROR_BUFFER_COUNT);
 
   // You MUST copy the va_list before using it more than once
   va_list args_copy;
@@ -43,12 +44,14 @@ Native_Error* New_Fatal_Error_Internal(char* format, ...) {
 }
 
 Native_Error* Native_Error_Callstack(Native_Error* new_error, Native_Error* old_error, ErrorSeverity severity) {
+  push_allocator(GPAllocator());
+  
   auto error_message = format_string("%s\n > %s", new_error->data, old_error->data).data;
 
   Cleanup_Error(new_error);
   Cleanup_Error(old_error);
 
-  Native_Error* error_merged = ALLOCATE(Native_Error);
+  Native_Error* error_merged = New<Native_Error>(false);
   error_merged->data = (u8*)error_message;
   error_merged->count = strlen((char*)error_merged->data);
   error_merged->severity = severity;
diff --git a/lib/Base/General_Purpose_Allocator.cpp b/lib/Base/General_Purpose_Allocator.cpp
index c265686..d10adbb 100644
--- a/lib/Base/General_Purpose_Allocator.cpp
+++ b/lib/Base/General_Purpose_Allocator.cpp
@@ -1,17 +1,15 @@
-// #TODO: Make this not MSVC-centric with alias for std::aligned_alloc / _aligned_malloc
-// #TODO: Define GPAllocator_Proc
 #include "General_Purpose_Allocator.h"
 
 #include <string.h>
 
 #if GP_ALLOCATOR_TRACK_ALLOCATIONS
   #include <mutex>
-  General_Allocator gAllocator; // @Shared
-  std::mutex allocator_mutex;
+  global General_Allocator gAllocator; // @Shared
+  global std::mutex allocator_mutex;
 #endif
 
 #if !COMPILER_MSVC
-// Note: There is *no* aligned_realloc. Must implement manually if needed.
+// Note: There is *no* std::aligned_realloc. Must implement manually if needed.
 force_inline void* gp_aligned_realloc(u64 old_size, void* ptr, u64 new_size, u64 alignment) {
     if (!ptr || old_size == 0) return std::aligned_alloc(alignment, new_size);
     if (new_size == 0) { std::free(ptr); return nullptr; }
@@ -49,7 +47,7 @@ void GPAllocator_Initialize_Allocation_Tracker() {
     s64 item_count_max = 64 * 4096;
     s64 total_allocation_size = item_count_max * sizeof(Allocation);
     auto memory = Aligned_Alloc(total_allocation_size, alignment); // @MemoryLeak (intentional)
-    gAllocator.allocations = Array<Allocation>(item_count_max, memory, item_count_max, alignment);
+    gAllocator.allocations = Array<Allocation>(item_count_max, memory, item_count_max, GPAllocator());
     gAllocator.allocations.count = 0; // Init to zero. 
 #endif
 }
@@ -139,7 +137,11 @@ Allocator GPAllocator() {
 }
 
 void* GPAllocator_Proc (Allocator_Mode mode, s64 requested_size, s64 old_size, void* old_memory, void* allocator_data) {
-    u16 alignment = get_thread_context()->GPAllocator_alignment;
+    u16 alignment = 16; // default alignment
+
+    Thread_Context* tctx = get_thread_context();
+    if (tctx) alignment = tctx->GPAllocator_alignment;
+
     switch (mode) {
     case Allocator_Mode::ALLOCATE: {
       return GPAllocator_New(requested_size, alignment);
diff --git a/lib/Base/General_Purpose_Allocator.h b/lib/Base/General_Purpose_Allocator.h
index a68cd94..503d5c6 100644
--- a/lib/Base/General_Purpose_Allocator.h
+++ b/lib/Base/General_Purpose_Allocator.h
@@ -43,13 +43,6 @@ General_Allocator* get_general_allocator_data();
 
 constexpr u16 GPAllocator_Default_Alignment = 16;
 
-// #TODO: #REMOVE these:
-#define ALLOCATE(type) \
-    (type*)GPAllocator_New(sizeof(type))
-
-#define ALLOCATE_RAW_ARRAY(length, type) \
-    (type*)GPAllocator_New(sizeof(type) * (length))
-
 Allocator GPAllocator();
 
 void* GPAllocator_Proc (Allocator_Mode mode, s64 requested_size, s64 old_size, void* old_memory, void* allocator_data);
diff --git a/lib/Base/String.cpp b/lib/Base/String.cpp
index 345a4f8..2fe2358 100644
--- a/lib/Base/String.cpp
+++ b/lib/Base/String.cpp
@@ -14,12 +14,23 @@ bool is_valid(string n_string) {
   return (n_string.data != nullptr && n_string.count > 0);
 }
 
-string format_string(char* format, ...) {
-  constexpr s64 BUFFER_SIZE = 1024;
+string copy_string (string str) {
+  string new_str = {};
+  
+  new_str.count = str.count;
+  new_str.data  = (u8*)internal_alloc(str.count);
+  
+  memcpy(new_str.data, str.data, str.count);
+  
+  return new_str;
+}
+
+string format_string (char* format, ...) {
+  constexpr s64 BUFFER_SIZE = 4096;
   
   string str = {0};
   
-  str.data = ALLOCATE_RAW_ARRAY(BUFFER_SIZE, u8);
+  str.data = NewArray<u8>(BUFFER_SIZE);
   
   va_list args;
   va_start(args, format);
@@ -30,21 +41,11 @@ string format_string(char* format, ...) {
   return str;
 }
 
-string copy_string(string original_string) {
-  string str = {0};
-
-  str.data = ALLOCATE_RAW_ARRAY(original_string.count + 1, u8);
-  memcpy(str.data, original_string.data, original_string.count);
-  str.count = original_string.count;
-  
-  return str;
-}
-
 string copy_string(char* c_string) {
   string str = {0};
   s64 string_length = strlen(c_string);
   
-  str.data = ALLOCATE_RAW_ARRAY(string_length + 1, u8);
+  str.data = NewArray<u8>(string_length + 1);
   memcpy(str.data, c_string, string_length);
   str.count = string_length;
   
@@ -74,9 +75,36 @@ string string_from_literal(char* literal) {
 }
 
 void free(string& n_string) {
-  // #TODO: Get context allocator?
-  GPAllocator_Delete(n_string.data);
+  internal_free(n_string.data);
   
   n_string.data = nullptr;
   n_string.count = 0;
+}
+
+// Unicode nonsense
+string wide_to_utf8 (u16* source, s32 length) {
+  if (length == 0) return { };
+  
+  s32 query_result = WideCharToMultiByte(CP_UTF8, 0, (LPCWCH)source, length, 
+                        nullptr, 0, nullptr, nullptr);
+  
+  if (query_result <= 0) return { };
+  
+  // Make room for a null terminator:
+  query_result += 1;
+  
+  u8* memory = NewArray<u8>(query_result);
+  
+  string utf8_string;
+  utf8_string.count = query_result;
+  utf8_string.data = memory;
+  
+  s32 result = WideCharToMultiByte(CP_UTF8, 0, (LPCWCH)source, length, 
+                  (LPSTR)memory, query_result, nullptr, nullptr);
+  if (result <= 0) {  
+    internal_free(memory);
+    return { };
+  }
+  
+  return utf8_string;
 }
\ No newline at end of file
diff --git a/src/Base_Entry_Point.cpp b/src/Base_Entry_Point.cpp
new file mode 100644
index 0000000..2110362
--- /dev/null
+++ b/src/Base_Entry_Point.cpp
@@ -0,0 +1,47 @@
+internal void Bootstrap_Main_Thread_Context () {
+  // 0. Setup general purpose allocator
+  GPAllocator_Initialize_Allocation_Tracker();
+  // 1. Setup arena table
+  initialize_arena_table();
+  // 2. Setup thread local context
+  Arena* arena = next_arena(Arena_Reserve::Size_64G);
+  thread_local_context = New<Thread_Context>(get_allocator(arena));
+  thread_local_context->temp        = next_arena(Arena_Reserve::Size_64G);
+  thread_local_context->arena       = arena;
+  thread_local_context->allocator   = get_allocator(arena);
+  thread_local_context->thread_idx  = 0;
+  thread_local_context->thread_name = "Main Thread";
+  // thread_local_context->logger     = init_logger();
+
+  // debug_break();
+  
+}
+
+// #include "lib/Base/Arena_Array.h"
+void run_arena_array_tests () {
+  { push_arena(thread_local_context->temp);
+    push_alignment(thread_local_context->temp, 1);
+    auto_reset(thread_local_context->temp);
+    auto something = New<Thread_Context>();
+    auto something2 = New<Array<s64>>();
+    auto something3 = internal_alloc(5000);
+  }
+  
+  { push_allocator(GPAllocator());
+    auto something = New<Thread_Context>();
+    auto something2 = New<Array<s64>>();
+  }
+  
+  // { auto na = arena_array_new<s64>(64000, Arena_Reserve::Size_64G); 
+    // array_add(...)
+  // }
+}
+
+internal void Main_Entry_Point (int argc, WCHAR **argv) {
+  run_arena_array_tests();
+
+  // #TODO:
+  // [ ] Launch second thread
+  // [ ] Setup Mouse and Keyboard Inputs 
+  // OS_Create_Window();
+}
diff --git a/src/OS_Win32.cpp b/src/OS_Win32.cpp
index 5db4972..c3ed3e8 100644
--- a/src/OS_Win32.cpp
+++ b/src/OS_Win32.cpp
@@ -1,39 +1,155 @@
 // Move into src/Win32.cpp
 
-// #MOVE TO Base_Entry_Point.cpp
-internal void Bootstrap_Entry_Point () {
-  // 0. Setup general purpose allocator
-  GPAllocator_Initialize_Allocation_Tracker();
-  // 1. Setup arena table
-  initialize_arena_table();
-  // 2. Setup thread local context
-  Arena* arena = next_arena(Arena_Reserve::Size_64G);
-  thread_local_context = New<Thread_Context>(get_allocator(arena));
-  thread_local_context->temp        = next_arena(Arena_Reserve::Size_64G);
-  thread_local_context->arena       = arena;
-  thread_local_context->allocator   = get_allocator(arena);
-  thread_local_context->thread_idx  = 0;
-  thread_local_context->thread_name = "Main Thread";
-  // thread_local_context.logger     = init_logger();
-  
-  // push_allocator(GPAllocator());
-  // auto something = New<Thread_Context>();
-  // auto something2 = New<Array<s64>>();
-  
-}
+struct OS_System_Info {
+  s32 logical_processor_count;
+  s32 physical_core_count;
+  s32 primary_core_count;
+  s32 secondary_core_count; // Any weaker or "Efficiency" cores.
+  u64 page_size;
+  u64 large_page_size;
+  u64 allocation_granularity;
+  string machine_name;
+};
 
-// #MOVE TO Base_Entry_Point.cpp
-internal void Main_Entry_Point (int argc, WCHAR **argv) {
-  debug_break();
+struct OS_Process_Info {
+  u32 process_id;
+  b32 large_pages_allowed;
+  string binary_path;
+  string working_path;
+  string user_program_data_path;
+  Array<string> module_load_paths;
+  Array<string> environment_paths;
+};
+
+struct OS_State_Win32 {
+  Arena* arena;
   
-  // #TODO: Run Tests?
+  OS_System_Info system_info;
+  OS_Process_Info process_info;
+};
+
+global OS_State_Win32 os_state_w32;
+internal b32 win32_g_is_quiet = 0; // No console output
+
+internal LONG WINAPI Win32_Exception_Filter (EXCEPTION_POINTERS* exception_ptrs) {
+  if (win32_g_is_quiet) { ExitProcess(1); }
+
+  static volatile LONG first = 0;
+  if(InterlockedCompareExchange(&first, 1, 0) != 0)
+  { // prevent failures in other threads to popup same message box
+    // this handler just shows first thread that crashes
+    // we are terminating afterwards anyway
+    for (;;) Sleep(1000);
+  }
+  
+  // #TODO: Exception handling code.
+  
+  return 0;
 }
 
 internal void Win32_Entry_Point (int argc, WCHAR **argv) {
   // See: w32_entry_point_caller(); (raddebugger)
-  // [ ] SetUnhandledExceptionFilter
-  // [ ] Setup Thread Context:
-  Bootstrap_Entry_Point();
+  SetUnhandledExceptionFilter(&Win32_Exception_Filter);
+  
+  SYSTEM_INFO sysinfo = {0};
+  GetSystemInfo(&sysinfo);
+  
+  // Try to allow large pages if we can.
+  // b32 large_pages_allowed = 0;
+  // {
+  //   HANDLE token;
+  //   if(OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token))
+  //   {
+  //     LUID luid;
+  //     if(LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid))
+  //     {
+  //       TOKEN_PRIVILEGES priv;
+  //       priv.PrivilegeCount           = 1;
+  //       priv.Privileges[0].Luid       = luid;
+  //       priv.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+  //       large_pages_allowed = !!AdjustTokenPrivileges(token, 0, &priv, sizeof(priv), 0, 0);
+  //     }
+  //     CloseHandle(token);
+  //   }
+  // }
+  
+  Bootstrap_Main_Thread_Context();
+  
+  push_arena(get_thread_context()->arena);
+  
+  // #TODO: Need to write Win32 abstraction layer first.
+  { OS_System_Info* info = &os_state_w32.system_info;
+    info->logical_processor_count = (s32)sysinfo.dwNumberOfProcessors;
+    info->page_size               = sysinfo.dwPageSize;
+    info->large_page_size         = GetLargePageMinimum();
+    info->allocation_granularity  = sysinfo.dwAllocationGranularity;
+  }
+  { OS_Process_Info* info = &os_state_w32.process_info;
+    info->large_pages_allowed = false;
+    info->process_id = GetCurrentProcessId();
+  }
+  /*{ OS_System_Info* info = &os_state_w32.system_info;
+    // [ ] Extract input args 
+    u32 length;
+    GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, (PDWORD)&length);
+    u8* cpu_information_buffer = NewArray<u8>(length);
+    GetLogicalProcessorInformationEx(RelationProcessorCore, // *sigh* 
+      (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)cpu_information_buffer, (PDWORD)&length);
+    
+    u32 offset = 0;
+    
+    u32 all_cpus_count = 0;
+    u32 max_performance = 0;
+    u32 performance_core_count = 0;
+    // u32 efficient_core_count;
+    
+    while (offset < length) {
+      SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* cpu_information 
+        = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(cpu_information_buffer + offset);
+      
+      offset += cpu_information->Size;
+      
+      u32 count_per_group_physical = 1;
+      u32 value = (u32)cpu_information->Processor.GroupMask->Mask;
+      u32 count_per_group = __popcnt(value); // logical
+      if (cpu_information->Relationship != RelationProcessorCore) continue;
+      
+      if (cpu_information->Processor.EfficiencyClass > max_performance) {
+        max_performance = cpu_information->Processor.EfficiencyClass;
+        performance_core_count = count_per_group_physical;
+      } else if (cpu_information->Processor.EfficiencyClass == max_performance) {
+        performance_core_count += count_per_group_physical;
+      }
+      
+      all_cpus_count += count_per_group;  
+    }
+  
+    info->physical_core_count = (s32)all_cpus_count;
+    info->primary_core_count  = (s32)performance_core_count;
+  } 
+  // info->secondary_core_count = #TODO;
+  */
+  { OS_System_Info* info = &os_state_w32.system_info;
+    u8 buffer[MAX_COMPUTERNAME_LENGTH + 1] = {0};
+    DWORD size = MAX_COMPUTERNAME_LENGTH + 1;
+    if(GetComputerNameA((char*)buffer, &size)) {
+      string machine_name_temp = string(size, buffer);
+      info->machine_name = copy_string(machine_name_temp);
+    }
+  }
+  
+  debug_break();
+  
+  { OS_Process_Info* info = &os_state_w32.process_info;
+    DWORD length = GetCurrentDirectoryW(0, 0);
+    // This can be freed later when we call temp_reset();
+    u16* memory = NewArray<u16>(get_temp_allocator(), length + 1);
+    length = GetCurrentDirectoryW(length + 1, (WCHAR*)memory);
+    info->working_path = wide_to_utf8(memory, length);
+    Assert(is_valid(info->working_path));
+  }
+  // [ ] Get Working directory (info->working_path)
+  // [ ] GetEnvironmentStringsW
   
   printf("Hello there!\n\n");
   // See: main_thread_base_entry_point
diff --git a/unity_build_exe.cpp b/unity_build_exe.cpp
index 443e2fc..e3c3126 100644
--- a/unity_build_exe.cpp
+++ b/unity_build_exe.cpp
@@ -1,10 +1,10 @@
 #include "unity_build_lib.cpp"
+#include "Base_Entry_Point.cpp"
+// #include "imgui-docking.cpp"
 #include "src/OS_Win32.cpp"
 // #include "src/OS_Linux.cpp"
 // #include "src/OS_MacOS.cpp"
 
-// #include "imgui-docking.cpp"
-
 
 #if OS_WINDOWS
  #if BUILD_CONSOLE_INTERFACE