CPPuddle
Loading...
Searching...
No Matches
Classes | Public Types | Public Member Functions | Public Attributes | List of all members
cppuddle::kernel_aggregation::detail::aggregated_executor< Executor > Class Template Reference

Executor Class that aggregates function calls for specific kernels. More...

#include <aggregation_executors_and_allocators.hpp>

Classes

class  executor_slice
 Slice class - meant as a scope interface to the aggregated executor. More...
 

Public Types

using Executor_Slice = executor_slice
 
using buffer_entry_t = std::tuple< void *, const size_t, std::atomic< size_t >, bool, const size_t, size_t >
 Data entry for a buffer allocation: void* pointer, size_t for buffer-size, atomic for the slice counter, location_id, gpu_id.
 

Public Member Functions

template<typename T , typename Host_Allocator >
Tget (const size_t size, const size_t slice_alloc_counter)
 Get new buffer OR get buffer already allocated by different slice.
 
template<typename T , typename Host_Allocator >
void mark_unused (T *p, const size_t size)
 Notify buffer list that one slice is done with the buffer.
 
bool sync_aggregation_slices (const size_t slice_launch_counter)
 Only meant to be accessed by the slice executors.
 
template<typename F , typename... Ts>
void post (const size_t slice_launch_counter, F &&f, Ts &&...ts)
 Only meant to be accessed by the slice executors.
 
template<typename F , typename... Ts>
hpx::lcos::future< voidasync (const size_t slice_launch_counter, F &&f, Ts &&...ts)
 Only meant to be accessed by the slice executors.
 
template<typename F , typename... Ts>
hpx::lcos::shared_future< voidwrap_async (const size_t slice_launch_counter, F &&f, Ts &&...ts)
 Only meant to be accessed by the slice executors.
 
bool slice_available (void)
 
std::optional< hpx::lcos::future< executor_slice > > request_executor_slice ()
 
void reduce_usage_counter (void)
 
 ~aggregated_executor (void)
 
 aggregated_executor (const size_t number_slices, aggregated_executor_modes mode, const size_t gpu_id=0)
 
 aggregated_executor (const aggregated_executor &other)=delete
 
aggregated_executoroperator= (const aggregated_executor &other)=delete
 
 aggregated_executor (aggregated_executor &&other)=delete
 
aggregated_executoroperator= (aggregated_executor &&other)=delete
 

Public Attributes

size_t gpu_id
 
hpx::lcos::local::promise< voidslices_full_promise
 
std::vector< hpx::lcos::local::promise< executor_slice > > executor_slices
 Promises with the slice executors – to be set when the starting criteria is met.
 
std::deque< aggregated_function_call< Executor > > function_calls
 List of aggregated function calls - function will be launched when all slices have called it.
 
aggregation_mutex_t mut
 For synchronizing the access to the function calls list.
 
std::deque< buffer_entry_tbuffer_allocations
 Keeps track of the aggregated buffer allocations done in all the slices.
 
std::unordered_map< void *, size_tbuffer_allocations_map
 Map pointer to deque index for fast access in the deallocations.
 
aggregation_mutex_t buffer_mut
 For synchronizing the access to the buffer_allocations.
 
std::atomic< size_tbuffer_counter = 0
 
hpx::lcos::future< voidcurrent_continuation
 
hpx::lcos::future< voidlast_stream_launch_done
 
std::atomic< size_toverall_launch_counter = 0
 
size_t launched_slices
 

Detailed Description

template<typename Executor>
class cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >

Executor Class that aggregates function calls for specific kernels.

Executor is not meant to be used directly. Instead it yields multiple executor_slice objects. These serve as interfaces. Slices from the same aggregated_executor are meant to execute the same function calls but on different data (i.e. different tasks)

Member Typedef Documentation

◆ buffer_entry_t

Data entry for a buffer allocation: void* pointer, size_t for buffer-size, atomic for the slice counter, location_id, gpu_id.

◆ Executor_Slice

Constructor & Destructor Documentation

◆ ~aggregated_executor()

◆ aggregated_executor() [1/3]

template<typename Executor >
cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::aggregated_executor ( const size_t  number_slices,
aggregated_executor_modes  mode,
const size_t  gpu_id = 0 
)
inline

◆ aggregated_executor() [2/3]

◆ aggregated_executor() [3/3]

Member Function Documentation

◆ async()

template<typename Executor >
template<typename F , typename... Ts>
hpx::lcos::future< void > cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::async ( const size_t  slice_launch_counter,
F &&  f,
Ts &&...  ts 
)
inline

Only meant to be accessed by the slice executors.

◆ get()

template<typename Executor >
T * cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::get ( const size_t  size,
const size_t  slice_alloc_counter 
)
inline

Get new buffer OR get buffer already allocated by different slice.

◆ mark_unused()

Notify buffer list that one slice is done with the buffer.

◆ operator=() [1/2]

◆ operator=() [2/2]

◆ post()

template<typename Executor >
template<typename F , typename... Ts>
void cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::post ( const size_t  slice_launch_counter,
F &&  f,
Ts &&...  ts 
)
inline

Only meant to be accessed by the slice executors.

◆ reduce_usage_counter()

◆ request_executor_slice()

template<typename Executor >
std::optional< hpx::lcos::future< executor_slice > > cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::request_executor_slice ( )
inline

◆ slice_available()

◆ sync_aggregation_slices()

template<typename Executor >
bool cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::sync_aggregation_slices ( const size_t  slice_launch_counter)
inline

Only meant to be accessed by the slice executors.

◆ wrap_async()

template<typename Executor >
template<typename F , typename... Ts>
hpx::lcos::shared_future< void > cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::wrap_async ( const size_t  slice_launch_counter,
F &&  f,
Ts &&...  ts 
)
inline

Only meant to be accessed by the slice executors.

Member Data Documentation

◆ buffer_allocations

Keeps track of the aggregated buffer allocations done in all the slices.

◆ buffer_allocations_map

template<typename Executor >
std::unordered_map<void*,size_t> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::buffer_allocations_map

Map pointer to deque index for fast access in the deallocations.

◆ buffer_counter

◆ buffer_mut

For synchronizing the access to the buffer_allocations.

◆ current_continuation

template<typename Executor >
hpx::lcos::future<void> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::current_continuation

◆ executor_slices

template<typename Executor >
std::vector<hpx::lcos::local::promise<executor_slice> > cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::executor_slices

Promises with the slice executors – to be set when the starting criteria is met.

◆ function_calls

List of aggregated function calls - function will be launched when all slices have called it.

◆ gpu_id

◆ last_stream_launch_done

template<typename Executor >
hpx::lcos::future<void> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::last_stream_launch_done

◆ launched_slices

◆ mut

For synchronizing the access to the function calls list.

◆ overall_launch_counter

template<typename Executor >
std::atomic<size_t> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::overall_launch_counter = 0

◆ slices_full_promise

template<typename Executor >
hpx::lcos::local::promise<void> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::slices_full_promise

The documentation for this class was generated from the following file: