12 #ifndef PLSSVM_BACKENDS_HIP_SVM_KERNEL_HPP_
13 #define PLSSVM_BACKENDS_HIP_SVM_KERNEL_HPP_
16 #include "hip/hip_runtime.h"
17 #include "hip/hip_runtime_api.h"
38 template <
typename real_type>
53 for (
kernel_index_type vec_index = 0; vec_index < feature_range * num_rows; vec_index += num_rows) {
55 #pragma unroll INTERNAL_BLOCK_SIZE
58 if (threadIdx.y == idx) {
59 data_intern_i[threadIdx.x][block_id] = data_d[block_id + vec_index + i];
62 if (threadIdx.y == idx_2) {
63 data_intern_j[threadIdx.x][block_id] = data_d[block_id + vec_index + ji];
68 #pragma unroll INTERNAL_BLOCK_SIZE
70 data_j[data_index] = data_intern_j[threadIdx.y][data_index];
73 #pragma unroll INTERNAL_BLOCK_SIZE
75 const real_type data_i = data_intern_i[threadIdx.x][l];
76 #pragma unroll INTERNAL_BLOCK_SIZE
78 matr[k][l] += data_i * data_j[k];
83 #pragma unroll INTERNAL_BLOCK_SIZE
85 real_type ret_jx = 0.0;
86 #pragma unroll INTERNAL_BLOCK_SIZE
90 temp = (matr[x][y] + QA_cost - q[i + y] - q[j + x]) * add;
92 temp = matr[x][y] * add;
97 ret_jx += temp * d[i + y];
98 }
else if (i + x == j + y) {
101 ret_jx += (temp + cost * add) * d[i + y];
103 ret_jx += temp * d[i + y];
129 template <
typename real_type>
130 __global__
void device_kernel_polynomial(
const real_type *q, real_type *ret,
const real_type *d,
const real_type *data_d,
const real_type QA_cost,
const real_type cost,
const kernel_index_type num_rows,
const kernel_index_type num_cols,
const real_type add,
const int degree,
const real_type gamma,
const real_type coef0) {
143 for (
kernel_index_type vec_index = 0; vec_index < num_cols * num_rows; vec_index += num_rows) {
145 #pragma unroll INTERNAL_BLOCK_SIZE
148 if (threadIdx.y == idx) {
149 data_intern_i[threadIdx.x][block_id] = data_d[block_id + vec_index + i];
152 if (threadIdx.y == idx_2) {
153 data_intern_j[threadIdx.x][block_id] = data_d[block_id + vec_index + ji];
158 #pragma unroll INTERNAL_BLOCK_SIZE
160 data_j[data_index] = data_intern_j[threadIdx.y][data_index];
163 #pragma unroll INTERNAL_BLOCK_SIZE
165 const real_type data_i = data_intern_i[threadIdx.x][l];
166 #pragma unroll INTERNAL_BLOCK_SIZE
168 matr[k][l] += data_i * data_j[k];
173 #pragma unroll INTERNAL_BLOCK_SIZE
175 real_type ret_jx = 0.0;
176 #pragma unroll INTERNAL_BLOCK_SIZE
178 const real_type temp = (pow(gamma * matr[x][y] + coef0, degree) + QA_cost - q[i + y] - q[j + x]) * add;
182 ret_jx += temp * d[i + y];
183 }
else if (i + x == j + y) {
185 ret_jx += (temp + cost * add) * d[i + y];
208 template <
typename real_type>
209 __global__
void device_kernel_rbf(
const real_type *q, real_type *ret,
const real_type *d,
const real_type *data_d,
const real_type QA_cost,
const real_type cost,
const kernel_index_type num_rows,
const kernel_index_type num_cols,
const real_type add,
const real_type gamma) {
222 for (
kernel_index_type vec_index = 0; vec_index < num_cols * num_rows; vec_index += num_rows) {
224 #pragma unroll INTERNAL_BLOCK_SIZE
227 if (threadIdx.y == idx) {
228 data_intern_i[threadIdx.x][block_id] = data_d[block_id + vec_index + i];
231 if (threadIdx.y == idx2) {
232 data_intern_j[threadIdx.x][block_id] = data_d[block_id + vec_index + ji];
237 #pragma unroll INTERNAL_BLOCK_SIZE
239 data_j[data_index] = data_intern_j[threadIdx.y][data_index];
242 #pragma unroll INTERNAL_BLOCK_SIZE
244 const real_type data_i = data_intern_i[threadIdx.x][l];
245 #pragma unroll INTERNAL_BLOCK_SIZE
247 matr[k][l] += (data_i - data_j[k]) * (data_i - data_j[k]);
252 #pragma unroll INTERNAL_BLOCK_SIZE
254 real_type ret_jx = 0.0;
255 #pragma unroll INTERNAL_BLOCK_SIZE
257 const real_type temp = (exp(-gamma * matr[x][y]) + QA_cost - q[i + y] - q[j + x]) * add;
261 ret_jx += temp * d[i + y];
262 }
else if (i + x == j + y) {
264 ret_jx += (temp + cost * add) * d[i + y];
__device__ __forceinline__ double atomicAdd(double *addr, const double val)
Atomically add the double precision val to the value denoted by addr.
Definition: atomics.cuh:24
Global type definitions and compile-time constants.
Namespace containing the C-SVM using the HIP backend.
Definition: csvm.hpp:34
__global__ void device_kernel_linear(const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type feature_range, const real_type add, const kernel_index_type id)
Calculates the C-SVM kernel using the linear kernel function.
Definition: svm_kernel.hip.hpp:39
__global__ void device_kernel_polynomial(const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type num_cols, const real_type add, const int degree, const real_type gamma, const real_type coef0)
Calculates the C-SVM kernel using the polynomial kernel function.
Definition: svm_kernel.hip.hpp:130
__global__ void device_kernel_rbf(const real_type *q, real_type *ret, const real_type *d, const real_type *data_d, const real_type QA_cost, const real_type cost, const kernel_index_type num_rows, const kernel_index_type num_cols, const real_type add, const real_type gamma)
Calculates the C-SVM kernel using the radial basis function kernel function.
Definition: svm_kernel.hip.hpp:209
constexpr kernel_index_type THREAD_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:25
int kernel_index_type
Integer type used inside kernels.
Definition: constants.hpp:19
constexpr kernel_index_type INTERNAL_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:32