12 #ifndef PLSSVM_BACKENDS_SYCL_SVM_KERNEL_ND_RANGE_HPP_
13 #define PLSSVM_BACKENDS_SYCL_SVM_KERNEL_ND_RANGE_HPP_
19 #include "sycl/sycl.hpp"
50 nd_range_device_kernel_linear(::sycl::handler &cgh,
const real_type *q,
real_type *ret,
const real_type *d,
const real_type *data_d,
const real_type QA_cost,
const real_type cost,
const kernel_index_type num_rows,
const kernel_index_type feature_range,
const real_type add,
const kernel_index_type id) :
51 data_intern_i_{ ::
sycl::range<2>{
THREAD_BLOCK_SIZE,
INTERNAL_BLOCK_SIZE }, cgh }, data_intern_j_{ ::sycl::range<2>{
THREAD_BLOCK_SIZE,
INTERNAL_BLOCK_SIZE }, cgh }, q_{ q }, ret_{ ret }, d_{ d }, data_d_{ data_d }, QA_cost_{ QA_cost }, cost_{ cost }, num_rows_{ num_rows }, feature_range_{ feature_range }, add_{ add }, device_{
id } {}
65 if (nd_idx.get_local_range(0) <
THREAD_BLOCK_SIZE && nd_idx.get_local_range(1) == 0) {
66 #pragma unroll INTERNAL_BLOCK_SIZE
68 data_intern_i_[nd_idx.get_local_range(0)][block_id] = 0.0;
69 data_intern_j_[nd_idx.get_local_range(0)][block_id] = 0.0;
72 ::sycl::group_barrier(nd_idx.get_group());
79 for (
kernel_index_type vec_index = 0; vec_index < feature_range_ * num_rows_; vec_index += num_rows_) {
80 ::sycl::group_barrier(nd_idx.get_group());
81 #pragma unroll INTERNAL_BLOCK_SIZE
84 if (nd_idx.get_local_id(1) == idx) {
85 data_intern_i_[nd_idx.get_local_id(0)][block_id] = data_d_[block_id + vec_index + i];
88 if (nd_idx.get_local_id(0) == idx_2) {
89 data_intern_j_[nd_idx.get_local_id(1)][block_id] = data_d_[block_id + vec_index + j];
92 ::sycl::group_barrier(nd_idx.get_group());
94 #pragma unroll INTERNAL_BLOCK_SIZE
96 data_j[data_index] = data_intern_j_[nd_idx.get_local_id(1)][data_index];
99 #pragma unroll INTERNAL_BLOCK_SIZE
101 const real_type data_i = data_intern_i_[nd_idx.get_local_id(0)][l];
102 #pragma unroll INTERNAL_BLOCK_SIZE
104 matr[k][l] += data_i * data_j[k];
109 #pragma unroll INTERNAL_BLOCK_SIZE
112 #pragma unroll INTERNAL_BLOCK_SIZE
116 temp = (matr[x][y] + QA_cost_ - q_[i + y] - q_[j + x]) * add_;
118 temp = matr[x][y] * add_;
123 ret_jx += temp * d_[i + y];
124 }
else if (i + x == j + y) {
127 ret_jx += (temp + cost_ * add_) * d_[i + y];
129 ret_jx += temp * d_[i + y];
163 template <
typename T>
185 nd_range_device_kernel_polynomial(::sycl::handler &cgh,
const real_type *q,
real_type *ret,
const real_type *d,
const real_type *data_d,
const real_type QA_cost,
const real_type cost,
const kernel_index_type num_rows,
const kernel_index_type num_cols,
const real_type add,
const int degree,
const real_type gamma,
const real_type coef0) :
186 data_intern_i_{ ::
sycl::range<2>{
THREAD_BLOCK_SIZE,
INTERNAL_BLOCK_SIZE }, cgh }, data_intern_j_{ ::sycl::range<2>{
THREAD_BLOCK_SIZE,
INTERNAL_BLOCK_SIZE }, cgh }, q_{ q }, ret_{ ret }, d_{ d }, data_d_{ data_d }, QA_cost_{ QA_cost }, cost_{ cost }, num_rows_{ num_rows }, num_cols_{ num_cols }, add_{ add }, degree_{ degree }, gamma_{ gamma }, coef0_{ coef0 } {}
200 if (nd_idx.get_local_range(0) <
THREAD_BLOCK_SIZE && nd_idx.get_local_range(1) == 0) {
201 #pragma unroll INTERNAL_BLOCK_SIZE
203 data_intern_i_[nd_idx.get_local_range(0)][block_id] = 0.0;
204 data_intern_j_[nd_idx.get_local_range(0)][block_id] = 0.0;
207 ::sycl::group_barrier(nd_idx.get_group());
214 for (
kernel_index_type vec_index = 0; vec_index < num_cols_ * num_rows_; vec_index += num_rows_) {
215 ::sycl::group_barrier(nd_idx.get_group());
216 #pragma unroll INTERNAL_BLOCK_SIZE
219 if (nd_idx.get_local_id(1) == idx) {
220 data_intern_i_[nd_idx.get_local_id(0)][block_id] = data_d_[block_id + vec_index + i];
223 if (nd_idx.get_local_id(0) == idx_2) {
224 data_intern_j_[nd_idx.get_local_id(1)][block_id] = data_d_[block_id + vec_index + j];
227 ::sycl::group_barrier(nd_idx.get_group());
229 #pragma unroll INTERNAL_BLOCK_SIZE
231 data_j[data_index] = data_intern_j_[nd_idx.get_local_id(1)][data_index];
234 #pragma unroll INTERNAL_BLOCK_SIZE
236 const real_type data_i = data_intern_i_[nd_idx.get_local_id(0)][l];
237 #pragma unroll INTERNAL_BLOCK_SIZE
239 matr[k][l] += data_i * data_j[k];
244 #pragma unroll INTERNAL_BLOCK_SIZE
247 #pragma unroll INTERNAL_BLOCK_SIZE
249 const real_type temp = (::sycl::pow(gamma_ * matr[x][y] + coef0_,
static_cast<real_type>(degree_)) + QA_cost_ - q_[i + y] - q_[j + x]) * add_;
253 ret_jx += temp * d_[i + y];
254 }
else if (i + x == j + y) {
256 ret_jx += (temp + cost_ * add_) * d_[i + y];
291 template <
typename T>
311 nd_range_device_kernel_rbf(::sycl::handler &cgh,
const real_type *q,
real_type *ret,
const real_type *d,
const real_type *data_d,
const real_type QA_cost,
const real_type cost,
const kernel_index_type num_rows,
const kernel_index_type num_cols,
const real_type add,
const real_type gamma) :
312 data_intern_i_{ ::
sycl::range<2>{
THREAD_BLOCK_SIZE,
INTERNAL_BLOCK_SIZE }, cgh }, data_intern_j_{ ::sycl::range<2>{
THREAD_BLOCK_SIZE,
INTERNAL_BLOCK_SIZE }, cgh }, q_{ q }, ret_{ ret }, d_{ d }, data_d_{ data_d }, QA_cost_{ QA_cost }, cost_{ cost }, num_rows_{ num_rows }, num_cols_{ num_cols }, add_{ add }, gamma_{ gamma } {}
326 if (nd_idx.get_local_range(0) <
THREAD_BLOCK_SIZE && nd_idx.get_local_range(1) == 0) {
327 #pragma unroll INTERNAL_BLOCK_SIZE
329 data_intern_i_[nd_idx.get_local_range(0)][block_id] = 0.0;
330 data_intern_j_[nd_idx.get_local_range(0)][block_id] = 0.0;
333 ::sycl::group_barrier(nd_idx.get_group());
340 for (
kernel_index_type vec_index = 0; vec_index < num_cols_ * num_rows_; vec_index += num_rows_) {
341 ::sycl::group_barrier(nd_idx.get_group());
342 #pragma unroll INTERNAL_BLOCK_SIZE
345 if (nd_idx.get_local_id(1) == idx) {
346 data_intern_i_[nd_idx.get_local_id(0)][block_id] = data_d_[block_id + vec_index + i];
349 if (nd_idx.get_local_id(0) == idx_2) {
350 data_intern_j_[nd_idx.get_local_id(1)][block_id] = data_d_[block_id + vec_index + j];
353 ::sycl::group_barrier(nd_idx.get_group());
355 #pragma unroll INTERNAL_BLOCK_SIZE
357 data_j[data_index] = data_intern_j_[nd_idx.get_local_id(1)][data_index];
360 #pragma unroll INTERNAL_BLOCK_SIZE
362 const real_type data_i = data_intern_i_[nd_idx.get_local_id(0)][l];
363 #pragma unroll INTERNAL_BLOCK_SIZE
365 matr[k][l] += (data_i - data_j[k]) * (data_i - data_j[k]);
370 #pragma unroll INTERNAL_BLOCK_SIZE
373 #pragma unroll INTERNAL_BLOCK_SIZE
375 const real_type temp = (::sycl::exp(-gamma_ * matr[x][y]) + QA_cost_ - q_[i + y] - q_[j + x]) * add_;
379 ret_jx += temp * d_[i + y];
380 }
else if (i + x == j + y) {
382 ret_jx += (temp + cost_ * add_) * d_[i + y];
Defines an atomic_ref wrapper for the SYCL backend.
Calculates the C-SVM kernel using the nd_range formulation and the linear kernel function.
Definition: svm_kernel_nd_range.hpp:31
void operator()(::sycl::nd_item< 2 > nd_idx) const
Function call operator overload performing the actual calculation.
Definition: svm_kernel_nd_range.hpp:58
T real_type
The type of the data.
Definition: svm_kernel_nd_range.hpp:34
::sycl::local_accessor< real_type, 2 > data_intern_j_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:142
nd_range_device_kernel_linear(::sycl::handler &cgh, const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type feature_range, const real_type add, const kernel_index_type id)
Construct a new device kernel calculating the C-SVM kernel using the linear C-SVM kernel.
Definition: svm_kernel_nd_range.hpp:50
::sycl::local_accessor< real_type, 2 > data_intern_i_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:140
Calculates the C-SVM kernel using the nd_range formulation and the polynomial kernel function.
Definition: svm_kernel_nd_range.hpp:164
::sycl::local_accessor< real_type, 2 > data_intern_j_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:268
void operator()(::sycl::nd_item< 2 > nd_idx) const
Function call operator overload performing the actual calculation.
Definition: svm_kernel_nd_range.hpp:193
T real_type
The type of the data.
Definition: svm_kernel_nd_range.hpp:167
nd_range_device_kernel_polynomial(::sycl::handler &cgh, const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type num_cols, const real_type add, const int degree, const real_type gamma, const real_type coef0)
Construct a new device kernel calculating the C-SVM kernel using the polynomial C-SVM kernel.
Definition: svm_kernel_nd_range.hpp:185
::sycl::local_accessor< real_type, 2 > data_intern_i_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:266
Calculates the C-SVM kernel using the nd_range formulation and the radial basis functions kernel func...
Definition: svm_kernel_nd_range.hpp:292
void operator()(::sycl::nd_item< 2 > nd_idx) const
Function call operator overload performing the actual calculation.
Definition: svm_kernel_nd_range.hpp:319
nd_range_device_kernel_rbf(::sycl::handler &cgh, const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type num_cols, const real_type add, const real_type gamma)
Construct a new device kernel calculating the C-SVM kernel using the radial basis functions C-SVM ker...
Definition: svm_kernel_nd_range.hpp:311
T real_type
The type of the data.
Definition: svm_kernel_nd_range.hpp:295
::sycl::local_accessor< real_type, 2 > data_intern_j_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:394
::sycl::local_accessor< real_type, 2 > data_intern_i_
Local memory used for internal memory access optimizations.
Definition: svm_kernel_nd_range.hpp:392
Global type definitions and compile-time constants.
Namespace containing the C-SVM using the SYCL backend with the preferred SYCL implementation....
Definition: atomics.hpp:18
::sycl::atomic_ref< T, ::sycl::memory_order::relaxed, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space > atomic_op
Shortcut alias for a sycl::atomic_ref targeting global memory.
Definition: atomics.hpp:25
constexpr kernel_index_type THREAD_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:25
int kernel_index_type
Integer type used inside kernels.
Definition: constants.hpp:19
constexpr kernel_index_type INTERNAL_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:32