// really #hacky forward declares. struct Work_Entry; struct Worker_Info; struct Work_List; struct Thread_Group; void init(Work_List* list); void destroy(Work_List* list); s64 thread_group_run (Thread* thread); struct Thread { Thread_Context* context; Thread_Proc proc; void* data; s64 index; OS_Thread os_thread; // Used by Thread_Group Worker_Info* worker_info; }; global u32 next_thread_index = 1; // Thread Group API (Copied from Jonathan Blow's implementation - I did not come up with this.) struct Work_Entry { Work_Entry* next; void* work; s64 thread_index; // Thread.index for the thread that handled this work // string logging_name; f64 issue_time; s32 work_list_index; }; struct Work_List { Semaphore semaphore; Mutex mutex; Work_Entry* first; Work_Entry* last; s32 count; }; struct Worker_Info { Thread thread; Work_List available; Work_List completed; Thread_Group* group; s32 worker_index; u8 padding0[44]; // Work steal indices should be on another cache line: ArrayView work_steal_indices; u8 padding1[48]; }; static_assert(sizeof(Worker_Info) % 64 == 0); // This MUST be padded to cache line! enum class Thread_Continue_Status: s32 { STOP = 0, CONTINUE = 1 }; typedef Thread_Continue_Status (*Thread_Group_Proc)(Thread_Group* group, Thread* thread, void* work); struct Thread_Group { void* data; Thread_Group_Proc proc; string name; Allocator allocator; // for allocating work indices ArrayView worker_info; // only alloc'd once with allocator?? s32 next_worker_index; bool initialized = false; bool started = false; bool should_exit = false; }; // This might be too slow. s32 get_thread_index (Thread_Group* group, s32 thread_index) { for_each(w, group->worker_info) { if (group->worker_info[w].thread.index == thread_index) { return (s32)w; // zero-indexed to thread group } } return -1; }