PLSSVM - Parallel Least Squares Support Vector Machine  2.0.0
A Least Squares Support Vector Machine implementation using different backends.
predict_kernel.hip.hpp
Go to the documentation of this file.
1 
12 #ifndef PLSSVM_BACKENDS_HIP_PREDICT_KERNEL_HPP_
13 #define PLSSVM_BACKENDS_HIP_PREDICT_KERNEL_HPP_
14 #pragma once
15 
16 #include "hip/hip_runtime.h"
17 #include "hip/hip_runtime_api.h"
18 
19 #include "plssvm/constants.hpp" // plssvm::kernel_index_type, plssvm::THREAD_BLOCK_SIZE, plssvm::INTERNAL_BLOCK_SIZE
20 
21 namespace plssvm::hip {
22 
34 template <typename real_type>
35 __global__ void device_kernel_w_linear(real_type *w_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const kernel_index_type num_features) {
36  const kernel_index_type index = blockIdx.x * blockDim.x + threadIdx.x;
37  real_type temp{ 0.0 };
38  if (index < num_features) {
39  for (kernel_index_type dat = 0; dat < num_data_points - 1; ++dat) {
40  temp += alpha_d[dat] * data_d[dat + (num_data_points - 1 + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * index];
41  }
42  temp += alpha_d[num_data_points - 1] * data_last_d[index];
43  w_d[index] = temp;
44  }
45 }
46 
63 template <typename real_type>
64 __global__ void device_kernel_predict_polynomial(real_type *out_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const real_type *points, const kernel_index_type num_predict_points, const kernel_index_type num_features, const int degree, const real_type gamma, const real_type coef0) {
65  const kernel_index_type data_point_index = blockIdx.x * blockDim.x + threadIdx.x;
66  const kernel_index_type predict_point_index = blockIdx.y * blockDim.y + threadIdx.y;
67 
68  real_type temp{ 0.0 };
69  if (predict_point_index < num_predict_points) {
70  for (kernel_index_type feature_index = 0; feature_index < num_features; ++feature_index) {
71  if (data_point_index == num_data_points - 1) {
72  temp += data_last_d[feature_index] * points[predict_point_index + (num_predict_points + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index];
73  } else {
74  temp += data_d[data_point_index + (num_data_points - 1 + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index] * points[predict_point_index + (num_predict_points + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index];
75  }
76  }
77 
78  temp = alpha_d[data_point_index] * pow(gamma * temp + coef0, degree);
79 
80  atomicAdd(&out_d[predict_point_index], temp);
81  }
82 }
83 
98 template <typename real_type>
99 __global__ void device_kernel_predict_rbf(real_type *out_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const real_type *points, const kernel_index_type num_predict_points, const kernel_index_type num_features, const real_type gamma) {
100  const kernel_index_type data_point_index = blockIdx.x * blockDim.x + threadIdx.x;
101  const kernel_index_type predict_point_index = blockIdx.y * blockDim.y + threadIdx.y;
102 
103  real_type temp{ 0.0 };
104  if (predict_point_index < num_predict_points) {
105  for (kernel_index_type feature_index = 0; feature_index < num_features; ++feature_index) {
106  if (data_point_index == num_data_points - 1) {
107  temp += (data_last_d[feature_index] - points[predict_point_index + (num_predict_points + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index]) * (data_last_d[feature_index] - points[predict_point_index + (num_predict_points + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index]);
108  } else {
109  temp += (data_d[data_point_index + (num_data_points - 1 + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index] - points[predict_point_index + (num_predict_points + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index]) * (data_d[data_point_index + (num_data_points - 1 + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index] - points[predict_point_index + (num_predict_points + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index]);
110  }
111  }
112 
113  temp = alpha_d[data_point_index] * exp(-gamma * temp);
114 
115  atomicAdd(&out_d[predict_point_index], temp);
116  }
117 }
118 
119 } // namespace plssvm::hip
120 
121 #endif // PLSSVM_BACKENDS_HIP_PREDICT_KERNEL_HPP_
__device__ __forceinline__ double atomicAdd(double *addr, const double val)
Atomically add the double precision val to the value denoted by addr.
Definition: atomics.cuh:24
Global type definitions and compile-time constants.
Namespace containing the C-SVM using the HIP backend.
Definition: csvm.hpp:34
__global__ void device_kernel_w_linear(real_type *w_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const kernel_index_type num_features)
Calculate the w vector to speed up the prediction of the labels for data points using the linear kern...
Definition: predict_kernel.hip.hpp:35
__global__ void device_kernel_predict_polynomial(real_type *out_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const real_type *points, const kernel_index_type num_predict_points, const kernel_index_type num_features, const int degree, const real_type gamma, const real_type coef0)
Predicts the labels for data points using the polynomial kernel function.
Definition: predict_kernel.hip.hpp:64
__global__ void device_kernel_predict_rbf(real_type *out_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const real_type *points, const kernel_index_type num_predict_points, const kernel_index_type num_features, const real_type gamma)
Predicts the labels for data points using the radial basis functions kernel function.
Definition: predict_kernel.hip.hpp:99
constexpr kernel_index_type THREAD_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:25
int kernel_index_type
Integer type used inside kernels.
Definition: constants.hpp:19
constexpr kernel_index_type INTERNAL_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:32