12 #ifndef PLSSVM_BACKENDS_SYCL_SVM_KERNEL_ND_RANGE_HPP_ 
   13 #define PLSSVM_BACKENDS_SYCL_SVM_KERNEL_ND_RANGE_HPP_ 
   19 #include "sycl/sycl.hpp"                             
   50     nd_range_device_kernel_linear(::sycl::handler &cgh, 
const real_type *q, 
real_type *ret, 
const real_type *d, 
const real_type *data_d, 
const real_type QA_cost, 
const real_type cost, 
const kernel_index_type num_rows, 
const kernel_index_type feature_range, 
const real_type add, 
const kernel_index_type id) :
 
   51         data_intern_i_{ ::
sycl::range<2>{ 
THREAD_BLOCK_SIZE, 
INTERNAL_BLOCK_SIZE }, cgh }, data_intern_j_{ ::sycl::range<2>{ 
THREAD_BLOCK_SIZE, 
INTERNAL_BLOCK_SIZE }, cgh }, q_{ q }, ret_{ ret }, d_{ d }, data_d_{ data_d }, QA_cost_{ QA_cost }, cost_{ cost }, num_rows_{ num_rows }, feature_range_{ feature_range }, add_{ add }, device_{ 
id } {}
 
   65         if (nd_idx.get_local_range(0) < 
THREAD_BLOCK_SIZE && nd_idx.get_local_range(1) == 0) {
 
   66             #pragma unroll INTERNAL_BLOCK_SIZE 
   68                 data_intern_i_[nd_idx.get_local_range(0)][block_id] = 0.0;
 
   69                 data_intern_j_[nd_idx.get_local_range(0)][block_id] = 0.0;
 
   72         ::sycl::group_barrier(nd_idx.get_group());
 
   79             for (
kernel_index_type vec_index = 0; vec_index < feature_range_ * num_rows_; vec_index += num_rows_) {
 
   80                 ::sycl::group_barrier(nd_idx.get_group());
 
   81                 #pragma unroll INTERNAL_BLOCK_SIZE 
   84                     if (nd_idx.get_local_id(1) == idx) {
 
   85                         data_intern_i_[nd_idx.get_local_id(0)][block_id] = data_d_[block_id + vec_index + i];
 
   88                     if (nd_idx.get_local_id(0) == idx_2) {
 
   89                         data_intern_j_[nd_idx.get_local_id(1)][block_id] = data_d_[block_id + vec_index + j];
 
   92                 ::sycl::group_barrier(nd_idx.get_group());
 
   94                 #pragma unroll INTERNAL_BLOCK_SIZE 
   96                     data_j[data_index] = data_intern_j_[nd_idx.get_local_id(1)][data_index];
 
   99                 #pragma unroll INTERNAL_BLOCK_SIZE 
  101                     const real_type data_i = data_intern_i_[nd_idx.get_local_id(0)][l];
 
  102                     #pragma unroll INTERNAL_BLOCK_SIZE 
  104                         matr[k][l] += data_i * data_j[k];
 
  109             #pragma unroll INTERNAL_BLOCK_SIZE 
  112                 #pragma unroll INTERNAL_BLOCK_SIZE 
  116                         temp = (matr[x][y] + QA_cost_ - q_[i + y] - q_[j + x]) * add_;
 
  118                         temp = matr[x][y] * add_;
 
  123                         ret_jx += temp * d_[i + y];
 
  124                     } 
else if (i + x == j + y) {
 
  127                             ret_jx += (temp + cost_ * add_) * d_[i + y];
 
  129                             ret_jx += temp * d_[i + y];
 
  163 template <
typename T>
 
  185     nd_range_device_kernel_polynomial(::sycl::handler &cgh, 
const real_type *q, 
real_type *ret, 
const real_type *d, 
const real_type *data_d, 
const real_type QA_cost, 
const real_type cost, 
const kernel_index_type num_rows, 
const kernel_index_type num_cols, 
const real_type add, 
const int degree, 
const real_type gamma, 
const real_type coef0) :
 
  186         data_intern_i_{ ::
sycl::range<2>{ 
THREAD_BLOCK_SIZE, 
INTERNAL_BLOCK_SIZE }, cgh }, data_intern_j_{ ::sycl::range<2>{ 
THREAD_BLOCK_SIZE, 
INTERNAL_BLOCK_SIZE }, cgh }, q_{ q }, ret_{ ret }, d_{ d }, data_d_{ data_d }, QA_cost_{ QA_cost }, cost_{ cost }, num_rows_{ num_rows }, num_cols_{ num_cols }, add_{ add }, degree_{ degree }, gamma_{ gamma }, coef0_{ coef0 } {}
 
  200         if (nd_idx.get_local_range(0) < 
THREAD_BLOCK_SIZE && nd_idx.get_local_range(1) == 0) {
 
  201             #pragma unroll INTERNAL_BLOCK_SIZE 
  203                 data_intern_i_[nd_idx.get_local_range(0)][block_id] = 0.0;
 
  204                 data_intern_j_[nd_idx.get_local_range(0)][block_id] = 0.0;
 
  207         ::sycl::group_barrier(nd_idx.get_group());
 
  214             for (
kernel_index_type vec_index = 0; vec_index < num_cols_ * num_rows_; vec_index += num_rows_) {
 
  215                 ::sycl::group_barrier(nd_idx.get_group());
 
  216                 #pragma unroll INTERNAL_BLOCK_SIZE 
  219                     if (nd_idx.get_local_id(1) == idx) {
 
  220                         data_intern_i_[nd_idx.get_local_id(0)][block_id] = data_d_[block_id + vec_index + i];
 
  223                     if (nd_idx.get_local_id(0) == idx_2) {
 
  224                         data_intern_j_[nd_idx.get_local_id(1)][block_id] = data_d_[block_id + vec_index + j];
 
  227                 ::sycl::group_barrier(nd_idx.get_group());
 
  229                 #pragma unroll INTERNAL_BLOCK_SIZE 
  231                     data_j[data_index] = data_intern_j_[nd_idx.get_local_id(1)][data_index];
 
  234                 #pragma unroll INTERNAL_BLOCK_SIZE 
  236                     const real_type data_i = data_intern_i_[nd_idx.get_local_id(0)][l];
 
  237                     #pragma unroll INTERNAL_BLOCK_SIZE 
  239                         matr[k][l] += data_i * data_j[k];
 
  244             #pragma unroll INTERNAL_BLOCK_SIZE 
  247                 #pragma unroll INTERNAL_BLOCK_SIZE 
  249                     const real_type temp = (::sycl::pow(gamma_ * matr[x][y] + coef0_, 
static_cast<real_type>(degree_)) + QA_cost_ - q_[i + y] - q_[j + x]) * add_;
 
  253                         ret_jx += temp * d_[i + y];
 
  254                     } 
else if (i + x == j + y) {
 
  256                         ret_jx += (temp + cost_ * add_) * d_[i + y];
 
  291 template <
typename T>
 
  311     nd_range_device_kernel_rbf(::sycl::handler &cgh, 
const real_type *q, 
real_type *ret, 
const real_type *d, 
const real_type *data_d, 
const real_type QA_cost, 
const real_type cost, 
const kernel_index_type num_rows, 
const kernel_index_type num_cols, 
const real_type add, 
const real_type gamma) :
 
  312         data_intern_i_{ ::
sycl::range<2>{ 
THREAD_BLOCK_SIZE, 
INTERNAL_BLOCK_SIZE }, cgh }, data_intern_j_{ ::sycl::range<2>{ 
THREAD_BLOCK_SIZE, 
INTERNAL_BLOCK_SIZE }, cgh }, q_{ q }, ret_{ ret }, d_{ d }, data_d_{ data_d }, QA_cost_{ QA_cost }, cost_{ cost }, num_rows_{ num_rows }, num_cols_{ num_cols }, add_{ add }, gamma_{ gamma } {}
 
  326         if (nd_idx.get_local_range(0) < 
THREAD_BLOCK_SIZE && nd_idx.get_local_range(1) == 0) {
 
  327             #pragma unroll INTERNAL_BLOCK_SIZE 
  329                 data_intern_i_[nd_idx.get_local_range(0)][block_id] = 0.0;
 
  330                 data_intern_j_[nd_idx.get_local_range(0)][block_id] = 0.0;
 
  333         ::sycl::group_barrier(nd_idx.get_group());
 
  340             for (
kernel_index_type vec_index = 0; vec_index < num_cols_ * num_rows_; vec_index += num_rows_) {
 
  341                 ::sycl::group_barrier(nd_idx.get_group());
 
  342                 #pragma unroll INTERNAL_BLOCK_SIZE 
  345                     if (nd_idx.get_local_id(1) == idx) {
 
  346                         data_intern_i_[nd_idx.get_local_id(0)][block_id] = data_d_[block_id + vec_index + i];
 
  349                     if (nd_idx.get_local_id(0) == idx_2) {
 
  350                         data_intern_j_[nd_idx.get_local_id(1)][block_id] = data_d_[block_id + vec_index + j];
 
  353                 ::sycl::group_barrier(nd_idx.get_group());
 
  355                 #pragma unroll INTERNAL_BLOCK_SIZE 
  357                     data_j[data_index] = data_intern_j_[nd_idx.get_local_id(1)][data_index];
 
  360                 #pragma unroll INTERNAL_BLOCK_SIZE 
  362                     const real_type data_i = data_intern_i_[nd_idx.get_local_id(0)][l];
 
  363                     #pragma unroll INTERNAL_BLOCK_SIZE 
  365                         matr[k][l] += (data_i - data_j[k]) * (data_i - data_j[k]);
 
  370             #pragma unroll INTERNAL_BLOCK_SIZE 
  373                 #pragma unroll INTERNAL_BLOCK_SIZE 
  375                     const real_type temp = (::sycl::exp(-gamma_ * matr[x][y]) + QA_cost_ - q_[i + y] - q_[j + x]) * add_;
 
  379                         ret_jx += temp * d_[i + y];
 
  380                     } 
else if (i + x == j + y) {
 
  382                         ret_jx += (temp + cost_ * add_) * d_[i + y];
 
Defines an atomic_ref wrapper for the SYCL backend.
 
Calculates the C-SVM kernel using the nd_range formulation and the linear kernel function.
Definition: svm_kernel_nd_range.hpp:31
 
void operator()(::sycl::nd_item< 2 > nd_idx) const
Function call operator overload performing the actual calculation.
Definition: svm_kernel_nd_range.hpp:58
 
T real_type
The type of the data.
Definition: svm_kernel_nd_range.hpp:34
 
::sycl::local_accessor< real_type, 2 > data_intern_j_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:142
 
nd_range_device_kernel_linear(::sycl::handler &cgh, const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type feature_range, const real_type add, const kernel_index_type id)
Construct a new device kernel calculating the C-SVM kernel using the linear C-SVM kernel.
Definition: svm_kernel_nd_range.hpp:50
 
::sycl::local_accessor< real_type, 2 > data_intern_i_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:140
 
Calculates the C-SVM kernel using the nd_range formulation and the polynomial kernel function.
Definition: svm_kernel_nd_range.hpp:164
 
::sycl::local_accessor< real_type, 2 > data_intern_j_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:268
 
void operator()(::sycl::nd_item< 2 > nd_idx) const
Function call operator overload performing the actual calculation.
Definition: svm_kernel_nd_range.hpp:193
 
T real_type
The type of the data.
Definition: svm_kernel_nd_range.hpp:167
 
nd_range_device_kernel_polynomial(::sycl::handler &cgh, const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type num_cols, const real_type add, const int degree, const real_type gamma, const real_type coef0)
Construct a new device kernel calculating the C-SVM kernel using the polynomial C-SVM kernel.
Definition: svm_kernel_nd_range.hpp:185
 
::sycl::local_accessor< real_type, 2 > data_intern_i_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:266
 
Calculates the C-SVM kernel using the nd_range formulation and the radial basis functions kernel func...
Definition: svm_kernel_nd_range.hpp:292
 
void operator()(::sycl::nd_item< 2 > nd_idx) const
Function call operator overload performing the actual calculation.
Definition: svm_kernel_nd_range.hpp:319
 
nd_range_device_kernel_rbf(::sycl::handler &cgh, const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type num_cols, const real_type add, const real_type gamma)
Construct a new device kernel calculating the C-SVM kernel using the radial basis functions C-SVM ker...
Definition: svm_kernel_nd_range.hpp:311
 
T real_type
The type of the data.
Definition: svm_kernel_nd_range.hpp:295
 
::sycl::local_accessor< real_type, 2 > data_intern_j_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:394
 
::sycl::local_accessor< real_type, 2 > data_intern_i_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:392
 
Global type definitions and compile-time constants.
 
Namespace containing the C-SVM using the SYCL backend with the preferred SYCL implementation....
Definition: atomics.hpp:18
 
::sycl::atomic_ref< T, ::sycl::memory_order::relaxed, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space > atomic_op
Shortcut alias for a sycl::atomic_ref targeting global memory.
Definition: atomics.hpp:25
 
constexpr kernel_index_type THREAD_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:25
 
int kernel_index_type
Integer type used inside kernels.
Definition: constants.hpp:19
 
constexpr kernel_index_type INTERNAL_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:32