CPPuddle
Namespaces | Typedefs | Functions
cppuddle::kernel_aggregation Namespace Reference

CPPuddle namespace containing the kernel aggregation functionality. More...

Namespaces

 detail
 

Typedefs

using aggregated_executor_modes = cppuddle::kernel_aggregation::detail::aggregated_executor_modes
 
template<typename T , typename Host_Allocator , typename Executor >
using allocator_slice = cppuddle::kernel_aggregation::detail::allocator_slice< T, Host_Allocator, Executor >
 
template<typename Executor >
using aggregated_executor = cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >
 
template<const char * kernelname, class Interface , class Pool >
using aggregation_pool = cppuddle::kernel_aggregation::detail::aggregation_pool< kernelname, Interface, Pool >
 Pool to get an aggregation executor for the desired code region (kernelname) More...
 

Functions

template<const char * region_name, typename executor_t , typename return_type >
hpx::future< return_type > aggregation_region (const size_t team_size, std::function< return_type(size_t, size_t, typename cppuddle::kernel_aggregation::detail::aggregated_executor< executor_t >::executor_slice &)> &&aggregation_area)
 Start an aggregation region (passsed via lambda) More...
 
template<typename Agg_view_t >
CPPUDDLE_HOST_DEVICE_METHOD Agg_view_t::view_type get_slice_subview (const size_t slice_id, const size_t max_slices, const Agg_view_t &agg_view)
 Get subview for the current slice. More...
 
template<typename Integer , std::enable_if_t< std::is_integral< Integer >::value, bool > = true, typename Agg_view_t , typename... Args>
CPPUDDLE_HOST_DEVICE_METHOD auto map_views_to_slice (const Integer slice_id, const Integer max_slices, const Agg_view_t &current_arg, const Args &...rest)
 
template<typename Agg_executor_t , typename Agg_view_t , std::enable_if_t< Kokkos::is_view< typename Agg_view_t::view_type >::value, bool > = true, typename... Args>
CPPUDDLE_HOST_DEVICE_METHOD auto map_views_to_slice (const Agg_executor_t &agg_exec, const Agg_view_t &current_arg, const Args &...rest)
 
template<typename Agg_executor_t , typename TargetView_t , typename SourceView_t >
void aggregated_deep_copy (Agg_executor_t &agg_exec, TargetView_t &target, SourceView_t &source)
 Convenience function to perform an aggregated deep copy. More...
 
template<typename Agg_executor_t , typename TargetView_t , typename SourceView_t >
void aggregated_deep_copy (Agg_executor_t &agg_exec, TargetView_t &target, SourceView_t &source, int elements_per_slice)
 Convenience function to perform an aggregated deep copy. More...
 
template<typename executor_t , typename TargetView_t , typename SourceView_t >
hpx::shared_future< void > aggregrated_deep_copy_async (typename Aggregated_Executor< executor_t >::Executor_Slice &agg_exec, TargetView_t &target, SourceView_t &source)
 Convenience function to launch an aggregated kernel and get a future back. More...
 
template<typename executor_t , typename TargetView_t , typename SourceView_t >
hpx::shared_future< void > aggregrated_deep_copy_async (typename Aggregated_Executor< executor_t >::Executor_Slice &agg_exec, TargetView_t &target, SourceView_t &source, int elements_per_slice)
 Convenience function to launch an aggregated kernel and get a future back. More...
 

Detailed Description

CPPuddle namespace containing the kernel aggregation functionality.

Typedef Documentation

◆ aggregated_executor

Executor facilitating the kernel aggregation Contains the executor_slice subclass which is intended to be used by the individual tasks

◆ aggregated_executor_modes

Possible launch modes: EAGER = launch either when enough kernels aggregated or executor becomes idles STRICT = launch only when enough kernels aggregated (be aware of deadlocks when not enough kernels are available!) ENDLESS = launch only when executor becomes idle

◆ aggregation_pool

template<const char * kernelname, class Interface , class Pool >
using cppuddle::kernel_aggregation::aggregation_pool = typedef cppuddle::kernel_aggregation::detail::aggregation_pool<kernelname, Interface, Pool>

Pool to get an aggregation executor for the desired code region (kernelname)

◆ allocator_slice

template<typename T , typename Host_Allocator , typename Executor >
using cppuddle::kernel_aggregation::allocator_slice = typedef cppuddle::kernel_aggregation::detail::allocator_slice<T, Host_Allocator, Executor>

Allocator to get a buffer slice of a buffer shared with other tasks in the same aggregation region

Function Documentation

◆ aggregated_deep_copy() [1/2]

template<typename Agg_executor_t , typename TargetView_t , typename SourceView_t >
void cppuddle::kernel_aggregation::aggregated_deep_copy ( Agg_executor_t &  agg_exec,
TargetView_t &  target,
SourceView_t &  source 
)

Convenience function to perform an aggregated deep copy.

◆ aggregated_deep_copy() [2/2]

template<typename Agg_executor_t , typename TargetView_t , typename SourceView_t >
void cppuddle::kernel_aggregation::aggregated_deep_copy ( Agg_executor_t &  agg_exec,
TargetView_t &  target,
SourceView_t &  source,
int  elements_per_slice 
)

Convenience function to perform an aggregated deep copy.

◆ aggregation_region()

template<const char * region_name, typename executor_t , typename return_type >
hpx::future<return_type> cppuddle::kernel_aggregation::aggregation_region ( const size_t  team_size,
std::function< return_type(size_t, size_t, typename cppuddle::kernel_aggregation::detail::aggregated_executor< executor_t >::executor_slice &)> &&  aggregation_area 
)

Start an aggregation region (passsed via lambda)

◆ aggregrated_deep_copy_async() [1/2]

template<typename executor_t , typename TargetView_t , typename SourceView_t >
hpx::shared_future<void> cppuddle::kernel_aggregation::aggregrated_deep_copy_async ( typename Aggregated_Executor< executor_t >::Executor_Slice &  agg_exec,
TargetView_t &  target,
SourceView_t &  source 
)

Convenience function to launch an aggregated kernel and get a future back.

◆ aggregrated_deep_copy_async() [2/2]

template<typename executor_t , typename TargetView_t , typename SourceView_t >
hpx::shared_future<void> cppuddle::kernel_aggregation::aggregrated_deep_copy_async ( typename Aggregated_Executor< executor_t >::Executor_Slice &  agg_exec,
TargetView_t &  target,
SourceView_t &  source,
int  elements_per_slice 
)

Convenience function to launch an aggregated kernel and get a future back.

◆ get_slice_subview()

template<typename Agg_view_t >
CPPUDDLE_HOST_DEVICE_METHOD Agg_view_t::view_type cppuddle::kernel_aggregation::get_slice_subview ( const size_t  slice_id,
const size_t  max_slices,
const Agg_view_t &  agg_view 
)

Get subview for the current slice.

◆ map_views_to_slice() [1/2]

template<typename Agg_executor_t , typename Agg_view_t , std::enable_if_t< Kokkos::is_view< typename Agg_view_t::view_type >::value, bool > = true, typename... Args>
CPPUDDLE_HOST_DEVICE_METHOD auto cppuddle::kernel_aggregation::map_views_to_slice ( const Agg_executor_t &  agg_exec,
const Agg_view_t &  current_arg,
const Args &...  rest 
)

Convenience function mapping aggregated Kokkos views to the current exeuction slice by using subviews

◆ map_views_to_slice() [2/2]

template<typename Integer , std::enable_if_t< std::is_integral< Integer >::value, bool > = true, typename Agg_view_t , typename... Args>
CPPUDDLE_HOST_DEVICE_METHOD auto cppuddle::kernel_aggregation::map_views_to_slice ( const Integer  slice_id,
const Integer  max_slices,
const Agg_view_t &  current_arg,
const Args &...  rest 
)

Convenience function mapping aggregated Kokkos views to the current exeuction slice by using subviews