CPPuddle namespace containing the kernel aggregation functionality. More...

Namespaces
namespace	detail

Typedefs
using	aggregated_executor_modes = cppuddle::kernel_aggregation::detail::aggregated_executor_modes

template<typename T , typename Host_Allocator , typename Executor >
using	allocator_slice = cppuddle::kernel_aggregation::detail::allocator_slice< T, Host_Allocator, Executor >

template<typename Executor >
using	aggregated_executor = cppuddle::kernel_aggregation::detail::aggregated_executor< Executor >

template<const char * kernelname, class Interface , class Pool >
using	aggregation_pool = cppuddle::kernel_aggregation::detail::aggregation_pool< kernelname, Interface, Pool >
	Pool to get an aggregation executor for the desired code region (kernelname)

Functions
template<const char * region_name, typename executor_t , typename return_type >
hpx::future< return_type >	aggregation_region (const size_t team_size, std::function< return_type(size_t, size_t, typename cppuddle::kernel_aggregation::detail::aggregated_executor< executor_t >::executor_slice &)> &&aggregation_area)
	Start an aggregation region (passsed via lambda)

template<typename Agg_view_t >
CPPUDDLE_HOST_DEVICE_METHOD Agg_view_t::view_type	get_slice_subview (const size_t slice_id, const size_t max_slices, const Agg_view_t &agg_view)
	Get subview for the current slice.

template<typename Integer , std::enable_if_t< std::is_integral< Integer >::value, bool > = true, typename Agg_view_t , typename... Args>
CPPUDDLE_HOST_DEVICE_METHOD auto	map_views_to_slice (const Integer slice_id, const Integer max_slices, const Agg_view_t &current_arg, const Args &...rest)

template<typename Agg_executor_t , typename Agg_view_t , std::enable_if_t< Kokkos::is_view< typename Agg_view_t::view_type >::value, bool > = true, typename... Args>
CPPUDDLE_HOST_DEVICE_METHOD auto	map_views_to_slice (const Agg_executor_t &agg_exec, const Agg_view_t &current_arg, const Args &...rest)

template<typename Agg_executor_t , typename TargetView_t , typename SourceView_t >
void	aggregated_deep_copy (Agg_executor_t &agg_exec, TargetView_t &target, SourceView_t &source)
	Convenience function to perform an aggregated deep copy.

template<typename Agg_executor_t , typename TargetView_t , typename SourceView_t >
void	aggregated_deep_copy (Agg_executor_t &agg_exec, TargetView_t &target, SourceView_t &source, int elements_per_slice)
	Convenience function to perform an aggregated deep copy.

template<typename executor_t , typename TargetView_t , typename SourceView_t >
hpx::shared_future< void >	aggregrated_deep_copy_async (typename Aggregated_Executor< executor_t >::Executor_Slice &agg_exec, TargetView_t &target, SourceView_t &source)
	Convenience function to launch an aggregated kernel and get a future back.

template<typename executor_t , typename TargetView_t , typename SourceView_t >
hpx::shared_future< void >	aggregrated_deep_copy_async (typename Aggregated_Executor< executor_t >::Executor_Slice &agg_exec, TargetView_t &target, SourceView_t &source, int elements_per_slice)
	Convenience function to launch an aggregated kernel and get a future back.

Detailed Description

CPPuddle namespace containing the kernel aggregation functionality.

Typedef Documentation

◆ aggregated_executor

template<typename Executor >

using cppuddle::kernel_aggregation::aggregated_executor = typedef cppuddle::kernel_aggregation::detail::aggregated_executor<Executor>

Executor facilitating the kernel aggregation Contains the executor_slice subclass which is intended to be used by the individual tasks

◆ aggregated_executor_modes

using cppuddle::kernel_aggregation::aggregated_executor_modes = typedef cppuddle::kernel_aggregation::detail::aggregated_executor_modes

Possible launch modes: EAGER = launch either when enough kernels aggregated or executor becomes idles STRICT = launch only when enough kernels aggregated (be aware of deadlocks when not enough kernels are available!) ENDLESS = launch only when executor becomes idle

◆ aggregation_pool

template<const char * kernelname, class Interface , class Pool >

using cppuddle::kernel_aggregation::aggregation_pool = typedef cppuddle::kernel_aggregation::detail::aggregation_pool<kernelname, Interface, Pool>

Pool to get an aggregation executor for the desired code region (kernelname)

◆ allocator_slice

template<typename T , typename Host_Allocator , typename Executor >

using cppuddle::kernel_aggregation::allocator_slice = typedef cppuddle::kernel_aggregation::detail::allocator_slice<T, Host_Allocator, Executor>

Allocator to get a buffer slice of a buffer shared with other tasks in the same aggregation region

Function Documentation

◆ aggregated_deep_copy() [1/2]

template<typename Agg_executor_t , typename TargetView_t , typename SourceView_t >

void cppuddle::kernel_aggregation::aggregated_deep_copy	(	Agg_executor_t &	agg_exec,
		TargetView_t &	target,
		SourceView_t &	source
	)

Convenience function to perform an aggregated deep copy.

◆ aggregated_deep_copy() [2/2]

template<typename Agg_executor_t , typename TargetView_t , typename SourceView_t >

void cppuddle::kernel_aggregation::aggregated_deep_copy	(	Agg_executor_t &	agg_exec,
		TargetView_t &	target,
		SourceView_t &	source,
		int	elements_per_slice
	)

Convenience function to perform an aggregated deep copy.

◆ aggregation_region()

template<const char * region_name, typename executor_t , typename return_type >

hpx::future< return_type > cppuddle::kernel_aggregation::aggregation_region	(	const size_t	team_size,
		std::function< return_type(size_t, size_t, typename cppuddle::kernel_aggregation::detail::aggregated_executor< executor_t >::executor_slice &)> &&	aggregation_area
	)

Start an aggregation region (passsed via lambda)

◆ aggregrated_deep_copy_async() [1/2]

template<typename executor_t , typename TargetView_t , typename SourceView_t >

hpx::shared_future< void > cppuddle::kernel_aggregation::aggregrated_deep_copy_async	(	typename Aggregated_Executor< executor_t >::Executor_Slice &	agg_exec,
		TargetView_t &	target,
		SourceView_t &	source
	)

Convenience function to launch an aggregated kernel and get a future back.

◆ aggregrated_deep_copy_async() [2/2]

template<typename executor_t , typename TargetView_t , typename SourceView_t >

hpx::shared_future< void > cppuddle::kernel_aggregation::aggregrated_deep_copy_async	(	typename Aggregated_Executor< executor_t >::Executor_Slice &	agg_exec,
		TargetView_t &	target,
		SourceView_t &	source,
		int	elements_per_slice
	)

Convenience function to launch an aggregated kernel and get a future back.

◆ get_slice_subview()

template<typename Agg_view_t >

CPPUDDLE_HOST_DEVICE_METHOD Agg_view_t::view_type cppuddle::kernel_aggregation::get_slice_subview	(	const size_t	slice_id,
		const size_t	max_slices,
		const Agg_view_t &	agg_view
	)

Get subview for the current slice.

◆ map_views_to_slice() [1/2]

template<typename Agg_executor_t , typename Agg_view_t , std::enable_if_t< Kokkos::is_view< typename Agg_view_t::view_type >::value, bool > = true, typename... Args>

CPPUDDLE_HOST_DEVICE_METHOD auto cppuddle::kernel_aggregation::map_views_to_slice	(	const Agg_executor_t &	agg_exec,
		const Agg_view_t &	current_arg,
		const Args &...	rest
	)

Convenience function mapping aggregated Kokkos views to the current exeuction slice by using subviews

◆ map_views_to_slice() [2/2]

template<typename Integer , std::enable_if_t< std::is_integral< Integer >::value, bool > = true, typename Agg_view_t , typename... Args>

CPPUDDLE_HOST_DEVICE_METHOD auto cppuddle::kernel_aggregation::map_views_to_slice	(	const Integer	slice_id,
		const Integer	max_slices,
		const Agg_view_t &	current_arg,
		const Args &...	rest
	)