CPPuddle
|
#include <chrono>
#include <hpx/futures/future.hpp>
#include <hpx/async_cuda/cuda_executor.hpp>
#include <boost/program_options.hpp>
#include "cppuddle/memory_recycling/cuda_recycling_allocators.hpp"
#include "cppuddle/memory_recycling/util/cuda_recycling_device_buffer.hpp"
#include "cppuddle/kernel_aggregation/kernel_aggregation_interface.hpp"
Functions | |
template<typename float_t > | |
__global__ void | __launch_bounds__ (1024, 2) triad_kernel(float_t *A |
int | hpx_main (int argc, char *argv[]) |
int | main (int argc, char *argv[]) |
Variables | |
__global__ void const float_t * | B |
__global__ void const float_t const float_t * | C |
__global__ void const float_t const float_t const float_t | scalar |
__global__ void const float_t const float_t const float_t const size_t | start_id |
__global__ void const float_t const float_t const float_t const size_t const size_t | kernel_size |
__global__ void const float_t const float_t const float_t const size_t const size_t const size_t | problem_size |
A [i] = B[i] + scalar * C[i] | |
__global__ void __launch_bounds__ | ( | 1024 | , |
2 | |||
) |
int hpx_main | ( | int | argc, |
char * | argv[] | ||
) |
int main | ( | int | argc, |
char * | argv[] | ||
) |
__global__ void const float_t* B |
__global__ void const float_t const float_t* C |
__global__ void const float_t const float_t const float_t const size_t const size_t kernel_size |
__global__ void const float_t const float_t const float_t const size_t const size_t const size_t problem_size |
__global__ void const float_t const float_t const float_t scalar |
__global__ void const float_t const float_t const float_t const size_t start_id |