20 static void init(
size_t number_of_executors,
size_t slices_per_executor,
23 throw std::runtime_error(
24 std::string(
"Trying to initialize cppuddle aggregation pool twice") +
25 " Agg pool name: " + std::string(kernelname));
28 throw std::runtime_error(
30 "Trying to initialize aggregation with more devices than the "
31 "maximum number of GPUs given at compiletime") +
32 " Agg pool name: " + std::string(kernelname));
34 number_devices = num_devices;
35 for (
size_t gpu_id = 0; gpu_id < number_devices; gpu_id++) {
37 std::lock_guard<aggregation_mutex_t> guard(instance()[gpu_id].pool_mutex);
38 assert(instance()[gpu_id].aggregation_executor_pool.empty());
39 for (
int i = 0; i < number_of_executors; i++) {
40 instance()[gpu_id].aggregation_executor_pool.emplace_back(slices_per_executor,
43 instance()[gpu_id].slices_per_executor = slices_per_executor;
44 instance()[gpu_id].mode = mode;
46 is_initialized =
true;
51 if (!is_initialized) {
52 throw std::runtime_error(
53 std::string(
"ERROR: Trying to use cppuddle aggregation pool without first calling init!\n") +
58 std::lock_guard<aggregation_mutex_t>
guard(instance()[gpu_id].pool_mutex);
59 assert(!instance()[gpu_id].aggregation_executor_pool.empty());
60 std::optional<hpx::lcos::future<
63 size_t local_id = (instance()[gpu_id].current_interface) %
64 instance()[gpu_id].aggregation_executor_pool.size();
65 ret = instance()[gpu_id].aggregation_executor_pool[
local_id].request_executor_slice();
67 if (
ret.has_value()) {
72 const size_t abort_number = instance()[gpu_id].aggregation_executor_pool.size() + 1;
74 local_id = (++(instance()[gpu_id].current_interface)) %
75 instance()[gpu_id].aggregation_executor_pool.size();
77 instance()[gpu_id].aggregation_executor_pool[
local_id].request_executor_slice();
78 if (
ret.has_value()) {
85 if (instance()[gpu_id].growing_pool) {
86 instance()[gpu_id].aggregation_executor_pool.emplace_back(
87 instance()[gpu_id].slices_per_executor, instance()[gpu_id].mode, gpu_id);
88 instance()[gpu_id].current_interface =
89 instance()[gpu_id].aggregation_executor_pool.size() - 1;
90 assert(instance()[gpu_id].aggregation_executor_pool.size() < 20480);
91 ret = instance()[gpu_id]
92 .aggregation_executor_pool[instance()[gpu_id].current_interface]
93 .request_executor_slice();