12 #ifndef PLSSVM_BACKENDS_SYCL_SVM_KERNEL_HIERARCHICAL_HPP_
13 #define PLSSVM_BACKENDS_SYCL_SVM_KERNEL_HIERARCHICAL_HPP_
19 #include "sycl/sycl.hpp"
50 q_{ q }, ret_{ ret }, d_{ d }, data_d_{ data_d }, QA_cost_{ QA_cost }, cost_{ cost }, num_rows_{ num_rows }, feature_range_{ feature_range }, add_{ add }, device_{ id } {}
64 ::sycl::private_memory<real_type[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE], 2> private_matr{ group };
65 ::sycl::private_memory<real_type[INTERNAL_BLOCK_SIZE], 2> private_data_j{ group };
66 ::sycl::private_memory<kernel_index_type, 2> private_i{ group };
67 ::sycl::private_memory<kernel_index_type, 2> private_j{ group };
68 ::sycl::private_memory<bool, 2> private_cond{ group };
71 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
75 private_cond(idx) = private_i(idx) >= private_j(idx);
76 if (private_cond(idx)) {
82 #pragma unroll INTERNAL_BLOCK_SIZE
84 #pragma unroll INTERNAL_BLOCK_SIZE
86 private_matr(idx)[i][j] =
real_type{ 0.0 };
94 for (
kernel_index_type vec_index = 0; vec_index < feature_range_ * num_rows_; vec_index += num_rows_) {
95 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
96 if (private_cond(idx)) {
97 #pragma unroll INTERNAL_BLOCK_SIZE
100 if (idx.get_local_id(1) == idx_1) {
101 data_intern_i[idx.get_local_id(0)][block_id] = data_d_[block_id + vec_index + private_i(idx)];
104 if (idx.get_local_id(0) == idx_2) {
105 data_intern_j[idx.get_local_id(1)][block_id] = data_d_[block_id + vec_index + private_j(idx)];
114 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
115 if (private_cond(idx)) {
116 #pragma unroll INTERNAL_BLOCK_SIZE
118 private_data_j(idx)[data_index] = data_intern_j[idx.get_local_id(1)][data_index];
121 #pragma unroll INTERNAL_BLOCK_SIZE
123 const real_type data_i = data_intern_i[idx.get_local_id(0)][l];
124 #pragma unroll INTERNAL_BLOCK_SIZE
126 private_matr(idx)[k][l] += data_i * private_data_j(idx)[k];
136 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
137 if (private_cond(idx)) {
138 #pragma unroll INTERNAL_BLOCK_SIZE
141 #pragma unroll INTERNAL_BLOCK_SIZE
145 temp = (private_matr(idx)[x][y] + QA_cost_ - q_[private_i(idx) + y] - q_[private_j(idx) + x]) * add_;
147 temp = private_matr(idx)[x][y] * add_;
149 if (private_i(idx) + x > private_j(idx) + y) {
152 ret_jx += temp * d_[private_i(idx) + y];
153 }
else if (private_i(idx) + x == private_j(idx) + y) {
156 ret_jx += (temp + cost_ * add_) * d_[private_i(idx) + y];
158 ret_jx += temp * d_[private_i(idx) + y];
188 template <
typename T>
209 hierarchical_device_kernel_polynomial(
const real_type *q,
real_type *ret,
const real_type *d,
const real_type *data_d,
const real_type QA_cost,
const real_type cost,
const kernel_index_type num_rows,
const kernel_index_type num_cols,
const real_type add,
const int degree,
const real_type gamma,
const real_type coef0) :
210 q_{ q }, ret_{ ret }, d_{ d }, data_d_{ data_d }, QA_cost_{ QA_cost }, cost_{ cost }, num_rows_{ num_rows }, num_cols_{ num_cols }, add_{ add }, degree_{ degree }, gamma_{ gamma }, coef0_{ coef0 } {}
224 ::sycl::private_memory<real_type[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE], 2> private_matr{ group };
225 ::sycl::private_memory<real_type[INTERNAL_BLOCK_SIZE], 2> private_data_j{ group };
226 ::sycl::private_memory<kernel_index_type, 2> private_i{ group };
227 ::sycl::private_memory<kernel_index_type, 2> private_j{ group };
228 ::sycl::private_memory<bool, 2> private_cond{ group };
231 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
235 private_cond(idx) = private_i(idx) >= private_j(idx);
236 if (private_cond(idx)) {
242 #pragma unroll INTERNAL_BLOCK_SIZE
244 #pragma unroll INTERNAL_BLOCK_SIZE
246 private_matr(idx)[i][j] =
real_type{ 0.0 };
254 for (
kernel_index_type vec_index = 0; vec_index < num_cols_ * num_rows_; vec_index += num_rows_) {
255 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
256 if (private_cond(idx)) {
257 #pragma unroll INTERNAL_BLOCK_SIZE
260 if (idx.get_local_id(1) == idx_1) {
261 data_intern_i[idx.get_local_id(0)][block_id] = data_d_[block_id + vec_index + private_i(idx)];
264 if (idx.get_local_id(0) == idx_2) {
265 data_intern_j[idx.get_local_id(1)][block_id] = data_d_[block_id + vec_index + private_j(idx)];
274 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
275 if (private_cond(idx)) {
276 #pragma unroll INTERNAL_BLOCK_SIZE
278 private_data_j(idx)[data_index] = data_intern_j[idx.get_local_id(1)][data_index];
281 #pragma unroll INTERNAL_BLOCK_SIZE
283 const real_type data_i = data_intern_i[idx.get_local_id(0)][l];
284 #pragma unroll INTERNAL_BLOCK_SIZE
286 private_matr(idx)[k][l] += data_i * private_data_j(idx)[k];
296 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
297 if (private_cond(idx)) {
298 #pragma unroll INTERNAL_BLOCK_SIZE
301 #pragma unroll INTERNAL_BLOCK_SIZE
303 const real_type temp = (::sycl::pow(gamma_ * private_matr(idx)[x][y] + coef0_,
static_cast<real_type>(degree_)) + QA_cost_ - q_[private_i(idx) + y] - q_[private_j(idx) + x]) * add_;
304 if (private_i(idx) + x > private_j(idx) + y) {
307 ret_jx += temp * d_[private_i(idx) + y];
308 }
else if (private_i(idx) + x == private_j(idx) + y) {
310 ret_jx += (temp + cost_ * add_) * d_[private_i(idx) + y];
341 template <
typename T>
361 q_{ q }, ret_{ ret }, d_{ d }, data_d_{ data_d }, QA_cost_{ QA_cost }, cost_{ cost }, num_rows_{ num_rows }, num_cols_{ num_cols }, add_{ add }, gamma_{ gamma } {}
375 ::sycl::private_memory<real_type[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE], 2> private_matr{ group };
376 ::sycl::private_memory<real_type[INTERNAL_BLOCK_SIZE], 2> private_data_j{ group };
377 ::sycl::private_memory<kernel_index_type, 2> private_i{ group };
378 ::sycl::private_memory<kernel_index_type, 2> private_j{ group };
379 ::sycl::private_memory<bool, 2> private_cond{ group };
382 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
386 private_cond(idx) = private_i(idx) >= private_j(idx);
387 if (private_cond(idx)) {
393 #pragma unroll INTERNAL_BLOCK_SIZE
395 #pragma unroll INTERNAL_BLOCK_SIZE
397 private_matr(idx)[i][j] =
real_type{ 0.0 };
405 for (
kernel_index_type vec_index = 0; vec_index < num_cols_ * num_rows_; vec_index += num_rows_) {
406 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
407 if (private_cond(idx)) {
408 #pragma unroll INTERNAL_BLOCK_SIZE
411 if (idx.get_local_id(1) == idx_1) {
412 data_intern_i[idx.get_local_id(0)][block_id] = data_d_[block_id + vec_index + private_i(idx)];
415 if (idx.get_local_id(0) == idx_2) {
416 data_intern_j[idx.get_local_id(1)][block_id] = data_d_[block_id + vec_index + private_j(idx)];
425 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
426 if (private_cond(idx)) {
427 #pragma unroll INTERNAL_BLOCK_SIZE
429 private_data_j(idx)[data_index] = data_intern_j[idx.get_local_id(1)][data_index];
432 #pragma unroll INTERNAL_BLOCK_SIZE
434 const real_type data_i = data_intern_i[idx.get_local_id(0)][l];
435 #pragma unroll INTERNAL_BLOCK_SIZE
437 private_matr(idx)[k][l] += (data_i - private_data_j(idx)[k]) * (data_i - private_data_j(idx)[k]);
447 group.parallel_for_work_item([&](::sycl::h_item<2> idx) {
448 if (private_cond(idx)) {
449 #pragma unroll INTERNAL_BLOCK_SIZE
452 #pragma unroll INTERNAL_BLOCK_SIZE
454 const real_type temp = (::sycl::exp(-gamma_ * private_matr(idx)[x][y]) + QA_cost_ - q_[private_i(idx) + y] - q_[private_j(idx) + x]) * add_;
455 if (private_i(idx) + x > private_j(idx) + y) {
458 ret_jx += temp * d_[private_i(idx) + y];
459 }
else if (private_i(idx) + x == private_j(idx) + y) {
461 ret_jx += (temp + cost_ * add_) * d_[private_i(idx) + y];
Defines an atomic_ref wrapper for the SYCL backend.
Calculates the C-SVM kernel using the hierarchical formulation and the linear kernel function.
Definition: svm_kernel_hierarchical.hpp:31
T real_type
The type of the data.
Definition: svm_kernel_hierarchical.hpp:34
hierarchical_device_kernel_linear(const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type feature_range, const real_type add, const kernel_index_type id)
Construct a new device kernel calculating the C-SVM kernel using the linear C-SVM kernel.
Definition: svm_kernel_hierarchical.hpp:49
void operator()(::sycl::group< 2 > group) const
Function call operator overload performing the actual calculation.
Definition: svm_kernel_hierarchical.hpp:57
Calculates the C-SVM kernel using the hierarchical formulation and the polynomial kernel function.
Definition: svm_kernel_hierarchical.hpp:189
hierarchical_device_kernel_polynomial(const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type num_cols, const real_type add, const int degree, const real_type gamma, const real_type coef0)
Construct a new device kernel calculating the C-SVM kernel using the polynomial C-SVM kernel.
Definition: svm_kernel_hierarchical.hpp:209
T real_type
The type of the data.
Definition: svm_kernel_hierarchical.hpp:192
void operator()(::sycl::group< 2 > group) const
Function call operator overload performing the actual calculation.
Definition: svm_kernel_hierarchical.hpp:217
Calculates the C-SVM kernel using the hierarchical formulation and the radial basis functions kernel ...
Definition: svm_kernel_hierarchical.hpp:342
void operator()(::sycl::group< 2 > group) const
Function call operator overload performing the actual calculation.
Definition: svm_kernel_hierarchical.hpp:368
T real_type
The type of the data.
Definition: svm_kernel_hierarchical.hpp:345
hierarchical_device_kernel_rbf(const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type num_cols, const real_type add, const real_type gamma)
Construct a new device kernel calculating the C-SVM kernel using the radial basis functions kernel fu...
Definition: svm_kernel_hierarchical.hpp:360
Global type definitions and compile-time constants.
Namespace containing the C-SVM using the SYCL backend with the preferred SYCL implementation....
Definition: atomics.hpp:18
::sycl::atomic_ref< T, ::sycl::memory_order::relaxed, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space > atomic_op
Shortcut alias for a sycl::atomic_ref targeting global memory.
Definition: atomics.hpp:25
constexpr kernel_index_type THREAD_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:25
int kernel_index_type
Integer type used inside kernels.
Definition: constants.hpp:19
constexpr kernel_index_type INTERNAL_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:32