PLSSVM/predict__kernel_8hpp_source.html

 #ifndef PLSSVM_BACKENDS_SYCL_PREDICT_KERNEL_HPP_

 #define PLSSVM_BACKENDS_SYCL_PREDICT_KERNEL_HPP_

 #pragma once


 #include "plssvm/backends/SYCL/detail/atomics.hpp"  // plssvm::sycl::detail::atomic_op

 #include "plssvm/constants.hpp"                     // plssvm::kernel_index_type, plssvm::THREAD_BLOCK_SIZE, plssvm::INTERNAL_BLOCK_SIZE


 #include "sycl/sycl.hpp"                            // sycl::nd_item, sycl::range, sycl::pow, sycl::exp


 namespace plssvm::sycl::detail {


 template <typename T>

 class device_kernel_w_linear {

   public:

     using real_type = T;


     device_kernel_w_linear(real_type *w_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const kernel_index_type num_features) :

         w_d_{ w_d }, data_d_{ data_d }, data_last_d_{ data_last_d }, alpha_d_{ alpha_d }, num_data_points_{ num_data_points }, num_features_{ num_features } {}


     void operator()(::sycl::id<1> index) const {

         real_type temp{ 0.0 };

         if (index < num_features_) {

             for (kernel_index_type dat = 0; dat < num_data_points_ - 1; ++dat) {

                 temp += alpha_d_[dat] * data_d_[dat + (num_data_points_ - 1 + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * index];

             }

             temp += alpha_d_[num_data_points_ - 1] * data_last_d_[index];

             w_d_[index] = temp;

         }

     }


   private:

     real_type *w_d_;

     const real_type *data_d_;

     const real_type *data_last_d_;

     const real_type *alpha_d_;

     const kernel_index_type num_data_points_;

     const kernel_index_type num_features_;

 };


 template <typename T>

 class device_kernel_predict_polynomial {

   public:

     using real_type = T;


     device_kernel_predict_polynomial(real_type *out_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const real_type *points, const kernel_index_type num_predict_points, const kernel_index_type num_features, const int degree, const real_type gamma, const real_type coef0) :

         out_d_{ out_d }, data_d_{ data_d }, data_last_d_{ data_last_d }, alpha_d_{ alpha_d }, num_data_points_{ num_data_points }, points_{ points }, num_predict_points_{ num_predict_points }, num_features_{ num_features }, degree_{ degree }, gamma_{ gamma }, coef0_{ coef0 } {}


     void operator()(::sycl::nd_item<2> idx) const {

         const kernel_index_type data_point_index = idx.get_global_id(0);

         const kernel_index_type predict_point_index = idx.get_global_id(1);


         real_type temp = 0;

         if (predict_point_index < num_predict_points_) {

             for (kernel_index_type feature_index = 0; feature_index < num_features_; ++feature_index) {

                 if (data_point_index == num_data_points_ - 1) {

                     temp += data_last_d_[feature_index] * points_[predict_point_index + (num_predict_points_ + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index];

                 } else {

                     temp += data_d_[data_point_index + (num_data_points_ - 1 + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index] * points_[predict_point_index + (num_predict_points_ + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index];

                 }

             }


             temp = alpha_d_[data_point_index] * ::sycl::pow(gamma_ * temp + coef0_, static_cast<real_type>(degree_));


             detail::atomic_op<real_type>{ out_d_[predict_point_index] } += temp;

         }

     }


   private:

     real_type *out_d_;

     const real_type *data_d_;

     const real_type *data_last_d_;

     const real_type *alpha_d_;

     const kernel_index_type num_data_points_;

     const real_type *points_;

     const kernel_index_type num_predict_points_;

     const kernel_index_type num_features_;

     const int degree_;

     const real_type gamma_;

     const real_type coef0_;

 };


 template <typename T>

 class device_kernel_predict_rbf {

   public:

     using real_type = T;


     device_kernel_predict_rbf(real_type *out_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const real_type *points, const kernel_index_type num_predict_points, const kernel_index_type num_features, const real_type gamma) :

         out_d_{ out_d }, data_d_{ data_d }, data_last_d_{ data_last_d }, alpha_d_{ alpha_d }, num_data_points_{ num_data_points }, points_{ points }, num_predict_points_{ num_predict_points }, num_features_{ num_features }, gamma_{ gamma } {}


     void operator()(::sycl::nd_item<2> idx) const {

         const kernel_index_type data_point_index = idx.get_global_id(0);

         const kernel_index_type predict_point_index = idx.get_global_id(1);


         real_type temp = 0;

         if (predict_point_index < num_predict_points_) {

             for (kernel_index_type feature_index = 0; feature_index < num_features_; ++feature_index) {

                 if (data_point_index == num_data_points_ - 1) {

                     temp += (data_last_d_[feature_index] - points_[predict_point_index + (num_predict_points_ + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index]) * (data_last_d_[feature_index] - points_[predict_point_index + (num_predict_points_ + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index]);

                 } else {

                     temp += (data_d_[data_point_index + (num_data_points_ - 1 + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index] - points_[predict_point_index + (num_predict_points_ + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index]) * (data_d_[data_point_index + (num_data_points_ - 1 + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index] - points_[predict_point_index + (num_predict_points_ + THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * feature_index]);

                 }

             }


             temp = alpha_d_[data_point_index] * ::sycl::exp(-gamma_ * temp);


             detail::atomic_op<real_type>{ out_d_[predict_point_index] } += temp;

         }

     }


   private:

     real_type *out_d_;

     const real_type *data_d_;

     const real_type *data_last_d_;

     const real_type *alpha_d_;

     const kernel_index_type num_data_points_;

     const real_type *points_;

     const kernel_index_type num_predict_points_;

     const kernel_index_type num_features_;

     const real_type gamma_;

 };


 }  // namespace plssvm::sycl::detail


 #endif  // PLSSVM_BACKENDS_SYCL_PREDICT_KERNEL_HPP_

atomics.hpp
Defines an atomic_ref wrapper for the SYCL backend.

plssvm::sycl::detail::device_kernel_predict_polynomial
Predicts the labels for data points using the polynomial kernel function.
Definition: predict_kernel.hpp:80

plssvm::sycl::detail::device_kernel_predict_polynomial::device_kernel_predict_polynomial
device_kernel_predict_polynomial(real_type *out_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const real_type *points, const kernel_index_type num_predict_points, const kernel_index_type num_features, const int degree, const real_type gamma, const real_type coef0)
Construct a new device kernel to predict the labels for data points using the polynomial kernel funct...
Definition: predict_kernel.hpp:100

plssvm::sycl::detail::device_kernel_predict_polynomial::operator()
void operator()(::sycl::nd_item< 2 > idx) const
Function call operator overload performing the actual calculation.
Definition: predict_kernel.hpp:107

plssvm::sycl::detail::device_kernel_predict_polynomial::real_type
T real_type
The type of the data.
Definition: predict_kernel.hpp:83

plssvm::sycl::detail::device_kernel_predict_rbf
Predicts the labels for data points using the radial basis functions kernel function.
Definition: predict_kernel.hpp:149

plssvm::sycl::detail::device_kernel_predict_rbf::operator()
void operator()(::sycl::nd_item< 2 > idx) const
Function call operator overload performing the actual calculation.
Definition: predict_kernel.hpp:174

plssvm::sycl::detail::device_kernel_predict_rbf::real_type
T real_type
The type of the data.
Definition: predict_kernel.hpp:152

plssvm::sycl::detail::device_kernel_predict_rbf::device_kernel_predict_rbf
device_kernel_predict_rbf(real_type *out_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const real_type *points, const kernel_index_type num_predict_points, const kernel_index_type num_features, const real_type gamma)
Construct a new device kernel to predict the labels for data points using the radial basis function k...
Definition: predict_kernel.hpp:167

plssvm::sycl::detail::device_kernel_w_linear
Calculate the w vector to speed up the prediction of the labels for data points using the linear kern...
Definition: predict_kernel.hpp:29

plssvm::sycl::detail::device_kernel_w_linear::operator()
void operator()(::sycl::id< 1 > index) const
Function call operator overload performing the actual calculation.
Definition: predict_kernel.hpp:52

plssvm::sycl::detail::device_kernel_w_linear::device_kernel_w_linear
device_kernel_w_linear(real_type *w_d, const real_type *data_d, const real_type *data_last_d, const real_type *alpha_d, const kernel_index_type num_data_points, const kernel_index_type num_features)
Construct a new device kernel generating the w vector used to speedup the prediction when using the l...
Definition: predict_kernel.hpp:44

plssvm::sycl::detail::device_kernel_w_linear::real_type
T real_type
The type of the data.
Definition: predict_kernel.hpp:32

constants.hpp
Global type definitions and compile-time constants.

plssvm::sycl::detail
Namespace containing the C-SVM using the SYCL backend with the preferred SYCL implementation....
Definition: atomics.hpp:18

plssvm::sycl::detail::atomic_op
::sycl::atomic_ref< T, ::sycl::memory_order::relaxed, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space > atomic_op
Shortcut alias for a sycl::atomic_ref targeting global memory.
Definition: atomics.hpp:25

plssvm::THREAD_BLOCK_SIZE
constexpr kernel_index_type THREAD_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:25

plssvm::kernel_index_type
int kernel_index_type
Integer type used inside kernels.
Definition: constants.hpp:19

plssvm::INTERNAL_BLOCK_SIZE
constexpr kernel_index_type INTERNAL_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:32