CPPuddle
Classes | Public Types | Public Member Functions | Public Attributes | List of all members
cppuddle::kernel_aggregation::detail::aggregated_executor< Executor > Class Template Reference

Executor Class that aggregates function calls for specific kernels. More...

#include <aggregation_executors_and_allocators.hpp>

Classes

class  executor_slice
 Slice class - meant as a scope interface to the aggregated executor. More...
 

Public Types

using Executor_Slice = executor_slice
 
using buffer_entry_t = std::tuple< void *, const size_t, std::atomic< size_t >, bool, const size_t, size_t >
 Data entry for a buffer allocation: void* pointer, size_t for buffer-size, atomic for the slice counter, location_id, gpu_id. More...
 

Public Member Functions

template<typename T , typename Host_Allocator >
T * get (const size_t size, const size_t slice_alloc_counter)
 Get new buffer OR get buffer already allocated by different slice. More...
 
template<typename T , typename Host_Allocator >
void mark_unused (T *p, const size_t size)
 Notify buffer list that one slice is done with the buffer. More...
 
bool sync_aggregation_slices (const size_t slice_launch_counter)
 Only meant to be accessed by the slice executors. More...
 
template<typename F , typename... Ts>
void post (const size_t slice_launch_counter, F &&f, Ts &&...ts)
 Only meant to be accessed by the slice executors. More...
 
template<typename F , typename... Ts>
hpx::lcos::future< void > async (const size_t slice_launch_counter, F &&f, Ts &&...ts)
 Only meant to be accessed by the slice executors. More...
 
template<typename F , typename... Ts>
hpx::lcos::shared_future< void > wrap_async (const size_t slice_launch_counter, F &&f, Ts &&...ts)
 Only meant to be accessed by the slice executors. More...
 
bool slice_available (void)
 
std::optional< hpx::lcos::future< executor_slice > > request_executor_slice ()
 
void reduce_usage_counter (void)
 
 ~aggregated_executor (void)
 
 aggregated_executor (const size_t number_slices, aggregated_executor_modes mode, const size_t gpu_id=0)
 
 aggregated_executor (const aggregated_executor &other)=delete
 
aggregated_executoroperator= (const aggregated_executor &other)=delete
 
 aggregated_executor (aggregated_executor &&other)=delete
 
aggregated_executoroperator= (aggregated_executor &&other)=delete
 

Public Attributes

size_t gpu_id
 
hpx::lcos::local::promise< void > slices_full_promise
 
std::vector< hpx::lcos::local::promise< executor_slice > > executor_slices
 Promises with the slice executors – to be set when the starting criteria is met. More...
 
std::deque< aggregated_function_call< Executor > > function_calls
 List of aggregated function calls - function will be launched when all slices have called it. More...
 
aggregation_mutex_t mut
 For synchronizing the access to the function calls list. More...
 
std::deque< buffer_entry_tbuffer_allocations
 Keeps track of the aggregated buffer allocations done in all the slices. More...
 
std::unordered_map< void *, size_t > buffer_allocations_map
 Map pointer to deque index for fast access in the deallocations. More...
 
aggregation_mutex_t buffer_mut
 For synchronizing the access to the buffer_allocations. More...
 
std::atomic< size_t > buffer_counter = 0
 
hpx::lcos::future< void > current_continuation
 
hpx::lcos::future< void > last_stream_launch_done
 
std::atomic< size_t > overall_launch_counter = 0
 
size_t launched_slices
 

Detailed Description

template<typename Executor>
class cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >

Executor Class that aggregates function calls for specific kernels.

Executor is not meant to be used directly. Instead it yields multiple executor_slice objects. These serve as interfaces. Slices from the same aggregated_executor are meant to execute the same function calls but on different data (i.e. different tasks)

Member Typedef Documentation

◆ buffer_entry_t

template<typename Executor >
using cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::buffer_entry_t = std::tuple<void*, const size_t, std::atomic<size_t>, bool, const size_t, size_t>

Data entry for a buffer allocation: void* pointer, size_t for buffer-size, atomic for the slice counter, location_id, gpu_id.

◆ Executor_Slice

Constructor & Destructor Documentation

◆ ~aggregated_executor()

template<typename Executor >
cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::~aggregated_executor ( void  )
inline

◆ aggregated_executor() [1/3]

template<typename Executor >
cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::aggregated_executor ( const size_t  number_slices,
aggregated_executor_modes  mode,
const size_t  gpu_id = 0 
)
inline

◆ aggregated_executor() [2/3]

template<typename Executor >
cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::aggregated_executor ( const aggregated_executor< Executor > &  other)
delete

◆ aggregated_executor() [3/3]

template<typename Executor >
cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::aggregated_executor ( aggregated_executor< Executor > &&  other)
delete

Member Function Documentation

◆ async()

template<typename Executor >
template<typename F , typename... Ts>
hpx::lcos::future<void> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::async ( const size_t  slice_launch_counter,
F &&  f,
Ts &&...  ts 
)
inline

Only meant to be accessed by the slice executors.

◆ get()

template<typename Executor >
template<typename T , typename Host_Allocator >
T* cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::get ( const size_t  size,
const size_t  slice_alloc_counter 
)
inline

Get new buffer OR get buffer already allocated by different slice.

◆ mark_unused()

template<typename Executor >
template<typename T , typename Host_Allocator >
void cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::mark_unused ( T *  p,
const size_t  size 
)
inline

Notify buffer list that one slice is done with the buffer.

◆ operator=() [1/2]

template<typename Executor >
aggregated_executor& cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::operator= ( aggregated_executor< Executor > &&  other)
delete

◆ operator=() [2/2]

template<typename Executor >
aggregated_executor& cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::operator= ( const aggregated_executor< Executor > &  other)
delete

◆ post()

template<typename Executor >
template<typename F , typename... Ts>
void cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::post ( const size_t  slice_launch_counter,
F &&  f,
Ts &&...  ts 
)
inline

Only meant to be accessed by the slice executors.

◆ reduce_usage_counter()

template<typename Executor >
void cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::reduce_usage_counter ( void  )
inline

◆ request_executor_slice()

template<typename Executor >
std::optional<hpx::lcos::future<executor_slice> > cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::request_executor_slice ( void  )
inline

◆ slice_available()

template<typename Executor >
bool cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::slice_available ( void  )
inline

◆ sync_aggregation_slices()

template<typename Executor >
bool cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::sync_aggregation_slices ( const size_t  slice_launch_counter)
inline

Only meant to be accessed by the slice executors.

◆ wrap_async()

template<typename Executor >
template<typename F , typename... Ts>
hpx::lcos::shared_future<void> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::wrap_async ( const size_t  slice_launch_counter,
F &&  f,
Ts &&...  ts 
)
inline

Only meant to be accessed by the slice executors.

Member Data Documentation

◆ buffer_allocations

template<typename Executor >
std::deque<buffer_entry_t> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::buffer_allocations

Keeps track of the aggregated buffer allocations done in all the slices.

◆ buffer_allocations_map

template<typename Executor >
std::unordered_map<void*,size_t> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::buffer_allocations_map

Map pointer to deque index for fast access in the deallocations.

◆ buffer_counter

template<typename Executor >
std::atomic<size_t> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::buffer_counter = 0

◆ buffer_mut

template<typename Executor >
aggregation_mutex_t cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::buffer_mut

For synchronizing the access to the buffer_allocations.

◆ current_continuation

template<typename Executor >
hpx::lcos::future<void> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::current_continuation

◆ executor_slices

template<typename Executor >
std::vector<hpx::lcos::local::promise<executor_slice> > cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::executor_slices

Promises with the slice executors – to be set when the starting criteria is met.

◆ function_calls

template<typename Executor >
std::deque<aggregated_function_call<Executor> > cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::function_calls

List of aggregated function calls - function will be launched when all slices have called it.

◆ gpu_id

template<typename Executor >
size_t cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::gpu_id

◆ last_stream_launch_done

template<typename Executor >
hpx::lcos::future<void> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::last_stream_launch_done

◆ launched_slices

template<typename Executor >
size_t cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::launched_slices

◆ mut

For synchronizing the access to the function calls list.

◆ overall_launch_counter

template<typename Executor >
std::atomic<size_t> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::overall_launch_counter = 0

◆ slices_full_promise

template<typename Executor >
hpx::lcos::local::promise<void> cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >::slices_full_promise

The documentation for this class was generated from the following file: