Executor Class that aggregates function calls for specific kernels. More...

#include <aggregation_executors_and_allocators.hpp>

Classes
class	executor_slice
	Slice class - meant as a scope interface to the aggregated executor. More...

Public Types
using	Executor_Slice = executor_slice

using	buffer_entry_t = std::tuple< void *, const size_t, std::atomic< size_t >, bool, const size_t, size_t >
	Data entry for a buffer allocation: void* pointer, size_t for buffer-size, atomic for the slice counter, location_id, gpu_id.

Public Member Functions
template<typename T , typename Host_Allocator >
T *	get (const size_t size, const size_t slice_alloc_counter)
	Get new buffer OR get buffer already allocated by different slice.

template<typename T , typename Host_Allocator >
void	mark_unused (T *p, const size_t size)
	Notify buffer list that one slice is done with the buffer.

bool	sync_aggregation_slices (const size_t slice_launch_counter)
	Only meant to be accessed by the slice executors.

template<typename F , typename... Ts>
void	post (const size_t slice_launch_counter, F &&f, Ts &&...ts)
	Only meant to be accessed by the slice executors.

template<typename F , typename... Ts>
hpx::lcos::future< void >	async (const size_t slice_launch_counter, F &&f, Ts &&...ts)
	Only meant to be accessed by the slice executors.

template<typename F , typename... Ts>
hpx::lcos::shared_future< void >	wrap_async (const size_t slice_launch_counter, F &&f, Ts &&...ts)
	Only meant to be accessed by the slice executors.

bool	slice_available (void)

std::optional< hpx::lcos::future< executor_slice > >	request_executor_slice ()

void	reduce_usage_counter (void)

	~aggregated_executor (void)

	aggregated_executor (const size_t number_slices, aggregated_executor_modes mode, const size_t gpu_id=0)

	aggregated_executor (const aggregated_executor &other)=delete

aggregated_executor &	operator= (const aggregated_executor &other)=delete

	aggregated_executor (aggregated_executor &&other)=delete

aggregated_executor &	operator= (aggregated_executor &&other)=delete

Public Attributes
size_t	gpu_id

hpx::lcos::local::promise< void >	slices_full_promise

std::vector< hpx::lcos::local::promise< executor_slice > >	executor_slices
	Promises with the slice executors – to be set when the starting criteria is met.

std::deque< aggregated_function_call< Executor > >	function_calls
	List of aggregated function calls - function will be launched when all slices have called it.

aggregation_mutex_t	mut
	For synchronizing the access to the function calls list.

std::deque< buffer_entry_t >	buffer_allocations
	Keeps track of the aggregated buffer allocations done in all the slices.

std::unordered_map< void *, size_t >	buffer_allocations_map
	Map pointer to deque index for fast access in the deallocations.

aggregation_mutex_t	buffer_mut
	For synchronizing the access to the buffer_allocations.

std::atomic< size_t >	buffer_counter = 0

hpx::lcos::future< void >	current_continuation

hpx::lcos::future< void >	last_stream_launch_done

std::atomic< size_t >	overall_launch_counter = 0

size_t	launched_slices

Detailed Description

template<typename Executor>
class cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >

Executor Class that aggregates function calls for specific kernels.

Executor is not meant to be used directly. Instead it yields multiple executor_slice objects. These serve as interfaces. Slices from the same aggregated_executor are meant to execute the same function calls but on different data (i.e. different tasks)

Classes

Public Types

Public Member Functions

Public Attributes

Detailed Description

Member Typedef Documentation

◆ buffer_entry_t

◆ Executor_Slice

Constructor & Destructor Documentation

◆ ~aggregated_executor()

◆ aggregated_executor() [1/3]

◆ aggregated_executor() [2/3]

◆ aggregated_executor() [3/3]

Member Function Documentation

◆ async()

◆ get()

◆ mark_unused()

◆ operator=() [1/2]

◆ operator=() [2/2]

◆ post()

◆ reduce_usage_counter()

◆ request_executor_slice()

◆ slice_available()

◆ sync_aggregation_slices()

◆ wrap_async()

Member Data Documentation

◆ buffer_allocations

◆ buffer_allocations_map

◆ buffer_counter

◆ buffer_mut

◆ current_continuation

◆ executor_slices

◆ function_calls

◆ gpu_id

◆ last_stream_launch_done

◆ launched_slices

◆ mut

◆ overall_launch_counter

◆ slices_full_promise