PLSSVM - Parallel Least Squares Support Vector Machine  2.0.0
A Least Squares Support Vector Machine implementation using different backends.
gpu_csvm.hpp
Go to the documentation of this file.
1 
12 #ifndef PLSSVM_BACKENDS_GPU_CSVM_HPP_
13 #define PLSSVM_BACKENDS_GPU_CSVM_HPP_
14 #pragma once
15 
16 #include "plssvm/constants.hpp" // plssvm::{THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE}
17 #include "plssvm/csvm.hpp" // plssvm::csvm
18 #include "plssvm/detail/execution_range.hpp" // plssvm::detail::execution_range
19 #include "plssvm/detail/layout.hpp" // plssvm::detail::{transform_to_layout, layout_type}
20 #include "plssvm/detail/logger.hpp" // plssvm::detail::log, plssvm::verbosity_level
21 #include "plssvm/detail/performance_tracker.hpp" // plssvm::detail::tracking_entry, PLSSVM_DETAIL_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY
22 #include "plssvm/parameter.hpp" // plssvm::parameter
23 
24 #include "fmt/chrono.h" // output std::chrono times using {fmt}
25 #include "fmt/core.h" // fmt::format
26 
27 #include <algorithm> // std::min, std::all_of, std::adjacent_find
28 #include <chrono> // std::chrono::{milliseconds, steady_clock, duration_cast}
29 #include <cmath> // std::ceil
30 #include <cstddef> // std::size_t
31 #include <functional> // std::less_equal
32 #include <iostream> // std::clog, std::cout, std::endl
33 #include <tuple> // std::tuple, std::make_tuple
34 #include <utility> // std::forward, std::pair, std::move, std::make_pair
35 #include <vector> // std::vector
36 
37 namespace plssvm::detail {
38 
45 template <template <typename> typename device_ptr_t, typename queue_t>
46 class gpu_csvm : public ::plssvm::csvm {
47  public:
49  template <typename real_type>
50  using device_ptr_type = device_ptr_t<real_type>;
52  using queue_type = queue_t;
53 
57  explicit gpu_csvm(plssvm::parameter params = {}) :
58  ::plssvm::csvm{ params } {}
64  template <typename... Args>
65  explicit gpu_csvm(Args &&...args) :
66  ::plssvm::csvm{ std::forward<Args>(args)... } {}
67 
71  gpu_csvm(const gpu_csvm &) = delete;
75  gpu_csvm(gpu_csvm &&) noexcept = default;
79  gpu_csvm &operator=(const gpu_csvm &) = delete;
83  gpu_csvm &operator=(gpu_csvm &&) noexcept = default;
87  ~gpu_csvm() override = default;
88 
93  [[nodiscard]] std::size_t num_available_devices() const noexcept {
94  return devices_.size();
95  }
96 
97  protected:
101  [[nodiscard]] std::pair<std::vector<float>, float> solve_system_of_linear_equations(const parameter<float> &params, const std::vector<std::vector<float>> &A, std::vector<float> b, float eps, unsigned long long max_iter) const final { return this->solve_system_of_linear_equations_impl(params, A, std::move(b), eps, max_iter); }
105  [[nodiscard]] std::pair<std::vector<double>, double> solve_system_of_linear_equations(const parameter<double> &params, const std::vector<std::vector<double>> &A, std::vector<double> b, double eps, unsigned long long max_iter) const final { return this->solve_system_of_linear_equations_impl(params, A, std::move(b), eps, max_iter); }
109  template <typename real_type>
110  [[nodiscard]] std::pair<std::vector<real_type>, real_type> solve_system_of_linear_equations_impl(const parameter<real_type> &params, const std::vector<std::vector<real_type>> &A, std::vector<real_type> b, real_type eps, unsigned long long max_iter) const;
111 
115  [[nodiscard]] std::vector<float> predict_values(const parameter<float> &params, const std::vector<std::vector<float>> &support_vectors, const std::vector<float> &alpha, float rho, std::vector<float> &w, const std::vector<std::vector<float>> &predict_points) const final { return this->predict_values_impl(params, support_vectors, alpha, rho, w, predict_points); }
119  [[nodiscard]] std::vector<double> predict_values(const parameter<double> &params, const std::vector<std::vector<double>> &support_vectors, const std::vector<double> &alpha, double rho, std::vector<double> &w, const std::vector<std::vector<double>> &predict_points) const final { return this->predict_values_impl(params, support_vectors, alpha, rho, w, predict_points); }
123  template <typename real_type>
124  [[nodiscard]] std::vector<real_type> predict_values_impl(const parameter<real_type> &params, const std::vector<std::vector<real_type>> &support_vectors, const std::vector<real_type> &alpha, real_type rho, std::vector<real_type> &w, const std::vector<std::vector<real_type>> &predict_points) const;
125 
134  [[nodiscard]] std::size_t select_num_used_devices(kernel_function_type kernel, std::size_t num_features) const noexcept;
146  template <typename real_type>
147  [[nodiscard]] std::tuple<std::vector<device_ptr_type<real_type>>, std::vector<device_ptr_type<real_type>>, std::vector<std::size_t>> setup_data_on_device(const std::vector<std::vector<real_type>> &data, std::size_t num_data_points_to_setup, std::size_t num_features_to_setup, std::size_t boundary_size, std::size_t num_used_devices) const;
148 
160  template <typename real_type>
161  [[nodiscard]] std::vector<real_type> generate_q(const parameter<real_type> &params, const std::vector<device_ptr_type<real_type>> &data_d, const std::vector<device_ptr_type<real_type>> &data_last_d, std::size_t num_data_points, const std::vector<std::size_t> &feature_ranges, std::size_t boundary_size) const;
172  template <typename real_type>
173  [[nodiscard]] std::vector<real_type> calculate_w(const std::vector<device_ptr_type<real_type>> &data_d, const std::vector<device_ptr_type<real_type>> &data_last_d, const std::vector<device_ptr_type<real_type>> &alpha_d, std::size_t num_data_points, const std::vector<std::size_t> &feature_ranges) const;
174 
189  template <typename real_type>
190  void run_device_kernel(std::size_t device, const parameter<real_type> &params, const device_ptr_type<real_type> &q_d, device_ptr_type<real_type> &r_d, const device_ptr_type<real_type> &x_d, const device_ptr_type<real_type> &data_d, const std::vector<std::size_t> &feature_ranges, real_type QA_cost, real_type add, std::size_t dept, std::size_t boundary_size) const;
196  template <typename real_type>
197  void device_reduction(std::vector<device_ptr_type<real_type>> &buffer_d, std::vector<real_type> &buffer) const;
198 
199  //*************************************************************************************************************************************//
200  // pure virtual, must be implemented by all subclasses //
201  //*************************************************************************************************************************************//
202  // Note: there are two versions of each function (one for float and one for double) since virtual template functions are not allowed in C++!
203 
208  virtual void device_synchronize(const queue_type &queue) const = 0;
220  virtual void run_q_kernel(std::size_t device, const detail::execution_range &range, const parameter<float> &params, device_ptr_type<float> &q_d, const device_ptr_type<float> &data_d, const device_ptr_type<float> &data_last_d, std::size_t num_data_points_padded, std::size_t num_features) const = 0;
224  virtual void run_q_kernel(std::size_t device, const detail::execution_range &range, const parameter<double> &params, device_ptr_type<double> &q_d, const device_ptr_type<double> &data_d, const device_ptr_type<double> &data_last_d, std::size_t num_data_points_padded, std::size_t num_features) const = 0;
239  virtual void run_svm_kernel(std::size_t device, const detail::execution_range &range, const parameter<float> &params, const device_ptr_type<float> &q_d, device_ptr_type<float> &r_d, const device_ptr_type<float> &x_d, const device_ptr_type<float> &data_d, float QA_cost, float add, std::size_t num_data_points_padded, std::size_t num_features) const = 0;
243  virtual void run_svm_kernel(std::size_t device, const detail::execution_range &range, const parameter<double> &params, const device_ptr_type<double> &q_d, device_ptr_type<double> &r_d, const device_ptr_type<double> &x_d, const device_ptr_type<double> &data_d, double QA_cost, double add, std::size_t num_data_points_padded, std::size_t num_features) const = 0;
255  virtual void run_w_kernel(std::size_t device, const detail::execution_range &range, device_ptr_type<float> &w_d, const device_ptr_type<float> &alpha_d, const device_ptr_type<float> &data_d, const device_ptr_type<float> &data_last_d, std::size_t num_data_points, std::size_t num_features) const = 0;
259  virtual void run_w_kernel(std::size_t device, const detail::execution_range &range, device_ptr_type<double> &w_d, const device_ptr_type<double> &alpha_d, const device_ptr_type<double> &data_d, const device_ptr_type<double> &data_last_d, std::size_t num_data_points, std::size_t num_features) const = 0;
273  virtual void run_predict_kernel(const detail::execution_range &range, const parameter<float> &params, device_ptr_type<float> &out_d, const device_ptr_type<float> &alpha_d, const device_ptr_type<float> &point_d, const device_ptr_type<float> &data_d, const device_ptr_type<float> &data_last_d, std::size_t num_support_vectors, std::size_t num_predict_points, std::size_t num_features) const = 0;
277  virtual void run_predict_kernel(const detail::execution_range &range, const parameter<double> &params, device_ptr_type<double> &out_d, const device_ptr_type<double> &alpha_d, const device_ptr_type<double> &point_d, const device_ptr_type<double> &data_d, const device_ptr_type<double> &data_last_d, std::size_t num_support_vectors, std::size_t num_predict_points, std::size_t num_features) const = 0;
278 
280  std::vector<queue_type> devices_{};
281 };
282 
283 template <template <typename> typename device_ptr_t, typename queue_t>
284 std::size_t gpu_csvm<device_ptr_t, queue_t>::select_num_used_devices(const kernel_function_type kernel, const std::size_t num_features) const noexcept {
285  PLSSVM_ASSERT(num_features > 0, "At lest one feature must be given!");
286 
287  // polynomial and rbf kernel currently only support single GPU execution
288  if ((kernel == kernel_function_type::polynomial || kernel == kernel_function_type::rbf) && devices_.size() > 1) {
289  std::clog << fmt::format("Warning: found {} devices, however only 1 device can be used since the polynomial and rbf kernels currently only support single GPU execution!", devices_.size()) << std::endl;
290  return 1;
291  }
292 
293  // the number of used devices may not exceed the number of features
294  const std::size_t num_used_devices = std::min(devices_.size(), num_features);
295  if (num_used_devices < devices_.size()) {
296  std::clog << fmt::format("Warning: found {} devices, however only {} device(s) can be used since the data set only has {} features!", devices_.size(), num_used_devices, num_features) << std::endl;
297  }
298  return num_used_devices;
299 }
300 
302 template <template <typename> typename device_ptr_t, typename queue_t>
303 template <typename real_type>
304 std::tuple<std::vector<device_ptr_t<real_type>>, std::vector<device_ptr_t<real_type>>, std::vector<std::size_t>>
305 gpu_csvm<device_ptr_t, queue_t>::setup_data_on_device(const std::vector<std::vector<real_type>> &data,
306  const std::size_t num_data_points_to_setup,
307  const std::size_t num_features_to_setup,
308  const std::size_t boundary_size,
309  const std::size_t num_used_devices) const {
310  PLSSVM_ASSERT(!data.empty(), "The data must not be empty!");
311  PLSSVM_ASSERT(!data.front().empty(), "The data points must contain at least one feature!");
312  PLSSVM_ASSERT(std::all_of(data.cbegin(), data.cend(), [&data](const std::vector<real_type> &data_point) { return data_point.size() == data.front().size(); }), "All data points must have the same number of features!");
313  PLSSVM_ASSERT(num_data_points_to_setup > 0, "At least one data point must be copied to the device!");
314  PLSSVM_ASSERT(num_data_points_to_setup <= data.size(), "Can't copy more data points to the device than are present!: {} <= {}", num_data_points_to_setup, data.size());
315  PLSSVM_ASSERT(num_features_to_setup > 0, "At least one feature must be copied to the device!");
316  PLSSVM_ASSERT(num_features_to_setup <= data.front().size(), "Can't copy more features to the device than are present!: {} <= {}", num_features_to_setup, data.front().size());
317  PLSSVM_ASSERT(num_used_devices <= devices_.size(), "Can't use more devices than are available!: {} <= {}", num_used_devices, devices_.size());
318 
319  // calculate the number of features per device
320  std::vector<std::size_t> feature_ranges(num_used_devices + 1);
321  for (typename std::vector<queue_type>::size_type device = 0; device <= num_used_devices; ++device) {
322  feature_ranges[device] = device * num_features_to_setup / num_used_devices;
323  }
324 
325  // transform 2D to 1D SoA data
326  const std::vector<real_type> transformed_data = detail::transform_to_layout(detail::layout_type::soa, data, boundary_size, num_data_points_to_setup);
327 
328  std::vector<device_ptr_type<real_type>> data_last_d(num_used_devices);
329  std::vector<device_ptr_type<real_type>> data_d(num_used_devices);
330 
331  #pragma omp parallel for default(none) shared(num_used_devices, devices_, feature_ranges, data_last_d, data_d, data, transformed_data) firstprivate(num_data_points_to_setup, boundary_size, num_features_to_setup)
332  for (typename std::vector<queue_type>::size_type device = 0; device < num_used_devices; ++device) {
333  const std::size_t num_features_in_range = feature_ranges[device + 1] - feature_ranges[device];
334 
335  // initialize data_last on device
336  data_last_d[device] = device_ptr_type<real_type>{ num_features_in_range + boundary_size, devices_[device] };
337  data_last_d[device].memset(0);
338  data_last_d[device].copy_to_device(data.back().data() + feature_ranges[device], 0, num_features_in_range);
339 
340  const std::size_t device_data_size = num_features_in_range * (num_data_points_to_setup + boundary_size);
341  data_d[device] = device_ptr_type<real_type>{ device_data_size, devices_[device] };
342  data_d[device].copy_to_device(transformed_data.data() + feature_ranges[device] * (num_data_points_to_setup + boundary_size), 0, device_data_size);
343  }
344 
345  return std::make_tuple(std::move(data_d), std::move(data_last_d), std::move(feature_ranges));
346 }
348 
349 template <template <typename> typename device_ptr_t, typename queue_t>
350 template <typename real_type>
352  const std::vector<device_ptr_type<real_type>> &data_d,
353  const std::vector<device_ptr_type<real_type>> &data_last_d,
354  const std::size_t num_data_points,
355  const std::vector<std::size_t> &feature_ranges,
356  const std::size_t boundary_size) const {
357  PLSSVM_ASSERT(!data_d.empty(), "The data_d array may not be empty!");
358  PLSSVM_ASSERT(std::all_of(data_d.cbegin(), data_d.cend(), [](const device_ptr_type<real_type> &ptr) { return !ptr.empty(); }), "Each device_ptr in data_d must at least contain one data point!");
359  PLSSVM_ASSERT(!data_last_d.empty(), "The data_last_d array may not be empty!");
360  PLSSVM_ASSERT(std::all_of(data_last_d.cbegin(), data_last_d.cend(), [](const device_ptr_type<real_type> &ptr) { return !ptr.empty(); }), "Each device_ptr in data_last_d must at least contain one data point!");
361  PLSSVM_ASSERT(data_d.size() == data_last_d.size(), "The number of used devices to the data_d and data_last_d vectors must be equal!: {} != {}", data_d.size(), data_last_d.size());
362  PLSSVM_ASSERT(num_data_points > 0, "At least one data point must be used to calculate q!");
363  PLSSVM_ASSERT(feature_ranges.size() == data_d.size() + 1, "The number of values in the feature_range vector must be exactly one more than the number of used devices!: {} != {} + 1", feature_ranges.size(), data_d.size());
364  PLSSVM_ASSERT(std::adjacent_find(feature_ranges.cbegin(), feature_ranges.cend(), std::less_equal<>{}) != feature_ranges.cend(), "The feature ranges are not monotonically increasing!");
365 
366  const std::size_t num_used_devices = data_d.size();
367  std::vector<device_ptr_type<real_type>> q_d(num_used_devices);
368 
369  #pragma omp parallel for default(none) shared(num_used_devices, q_d, devices_, data_d, data_last_d, feature_ranges, params) firstprivate(num_data_points, boundary_size, THREAD_BLOCK_SIZE)
370  for (typename std::vector<queue_type>::size_type device = 0; device < num_used_devices; ++device) {
371  q_d[device] = device_ptr_type<real_type>{ num_data_points + boundary_size, devices_[device] };
372  q_d[device].memset(0);
373 
374  // feature splitting on multiple devices
375  const detail::execution_range range({ static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_data_points) / static_cast<real_type>(THREAD_BLOCK_SIZE))) },
376  { std::min<std::size_t>(THREAD_BLOCK_SIZE, num_data_points) });
377 
378  run_q_kernel(device, range, params, q_d[device], data_d[device], data_last_d[device], num_data_points + boundary_size, feature_ranges[device + 1] - feature_ranges[device]);
379  }
380 
381  std::vector<real_type> q(num_data_points);
382  device_reduction(q_d, q);
383  return q;
384 }
385 
386 template <template <typename> typename device_ptr_t, typename queue_t>
387 template <typename real_type>
388 std::vector<real_type> gpu_csvm<device_ptr_t, queue_t>::calculate_w(const std::vector<device_ptr_type<real_type>> &data_d,
389  const std::vector<device_ptr_type<real_type>> &data_last_d,
390  const std::vector<device_ptr_type<real_type>> &alpha_d,
391  const std::size_t num_data_points,
392  const std::vector<std::size_t> &feature_ranges) const {
393  PLSSVM_ASSERT(!data_d.empty(), "The data_d array may not be empty!");
394  PLSSVM_ASSERT(std::all_of(data_d.cbegin(), data_d.cend(), [](const device_ptr_type<real_type> &ptr) { return !ptr.empty(); }), "Each device_ptr in data_d must at least contain one data point!");
395  PLSSVM_ASSERT(!data_last_d.empty(), "The data_last_d array may not be empty!");
396  PLSSVM_ASSERT(std::all_of(data_last_d.cbegin(), data_last_d.cend(), [](const device_ptr_type<real_type> &ptr) { return !ptr.empty(); }), "Each device_ptr in data_last_d must at least contain one data point!");
397  PLSSVM_ASSERT(data_d.size() == data_last_d.size(), "The number of used devices to the data_d and data_last_d vectors must be equal!: {} != {}", data_d.size(), data_last_d.size());
398  PLSSVM_ASSERT(!alpha_d.empty(), "The alpha_d array may not be empty!");
399  PLSSVM_ASSERT(std::all_of(alpha_d.cbegin(), alpha_d.cend(), [](const device_ptr_type<real_type> &ptr) { return !ptr.empty(); }), "Each device_ptr in alpha_d must at least contain one data point!");
400  PLSSVM_ASSERT(data_d.size() == alpha_d.size(), "The number of used devices to the data_d and alpha_d vectors must be equal!: {} != {}", data_d.size(), alpha_d.size());
401  PLSSVM_ASSERT(num_data_points > 0, "At least one data point must be used to calculate q!");
402  PLSSVM_ASSERT(feature_ranges.size() == data_d.size() + 1, "The number of values in the feature_range vector must be exactly one more than the number of used devices!: {} != {} + 1", feature_ranges.size(), data_d.size());
403  PLSSVM_ASSERT(std::adjacent_find(feature_ranges.cbegin(), feature_ranges.cend(), std::less_equal<>{}) != feature_ranges.cend(), "The feature ranges are not monotonically increasing!");
404 
405  const std::size_t num_used_devices = data_d.size();
406 
407  // create w vector and fill with zeros
408  std::vector<real_type> w(feature_ranges.back(), real_type{ 0.0 });
409 
410  #pragma omp parallel for default(none) shared(num_used_devices, devices_, feature_ranges, alpha_d, data_d, data_last_d, w) firstprivate(num_data_points, THREAD_BLOCK_SIZE)
411  for (typename std::vector<queue_type>::size_type device = 0; device < num_used_devices; ++device) {
412  // feature splitting on multiple devices
413  const std::size_t num_features_in_range = feature_ranges[device + 1] - feature_ranges[device];
414 
415  // create the w vector on the device
416  device_ptr_type<real_type> w_d = device_ptr_type<real_type>{ num_features_in_range, devices_[device] };
417 
418  const detail::execution_range range({ static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_features_in_range) / static_cast<real_type>(THREAD_BLOCK_SIZE))) },
419  { std::min<std::size_t>(THREAD_BLOCK_SIZE, num_features_in_range) });
420 
421  // calculate the w vector on the device
422  run_w_kernel(device, range, w_d, alpha_d[device], data_d[device], data_last_d[device], num_data_points, num_features_in_range);
423  device_synchronize(devices_[device]);
424 
425  // copy back to host memory
426  w_d.copy_to_host(w.data() + feature_ranges[device], 0, num_features_in_range);
427  }
428  return w;
429 }
430 
431 template <template <typename> typename device_ptr_t, typename queue_t>
432 template <typename real_type>
433 void gpu_csvm<device_ptr_t, queue_t>::run_device_kernel(const std::size_t device, const parameter<real_type> &params, const device_ptr_type<real_type> &q_d, device_ptr_type<real_type> &r_d, const device_ptr_type<real_type> &x_d, const device_ptr_type<real_type> &data_d, const std::vector<std::size_t> &feature_ranges, const real_type QA_cost, const real_type add, const std::size_t dept, const std::size_t boundary_size) const {
434  PLSSVM_ASSERT(device < devices_.size(), "Requested device {}, but only {} device(s) are available!", device, devices_.size());
435  PLSSVM_ASSERT(!q_d.empty(), "The q_d device_ptr may not be empty!");
436  PLSSVM_ASSERT(!r_d.empty(), "The r_d device_ptr may not be empty!");
437  PLSSVM_ASSERT(!x_d.empty(), "The x_d device_ptr may not be empty!");
438  PLSSVM_ASSERT(!data_d.empty(), "The data_d device_ptr may not be empty!");
439  PLSSVM_ASSERT(std::adjacent_find(feature_ranges.cbegin(), feature_ranges.cend(), std::less_equal<>{}) != feature_ranges.cend(), "The feature ranges are not monotonically increasing!");
440  PLSSVM_ASSERT(add == real_type{ -1.0 } || add == real_type{ 1.0 }, "add must either by -1.0 or 1.0, but is {}!", add);
441  PLSSVM_ASSERT(dept > 0, "At least one data point must be used to calculate q!");
442 
443  const auto grid = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / static_cast<real_type>(boundary_size)));
444  const detail::execution_range range({ grid, grid }, { THREAD_BLOCK_SIZE, THREAD_BLOCK_SIZE });
445 
446  run_svm_kernel(device, range, params, q_d, r_d, x_d, data_d, QA_cost, add, dept + boundary_size, feature_ranges[device + 1] - feature_ranges[device]);
447 }
448 
449 template <template <typename> typename device_ptr_t, typename queue_t>
450 template <typename real_type>
451 void gpu_csvm<device_ptr_t, queue_t>::device_reduction(std::vector<device_ptr_type<real_type>> &buffer_d, std::vector<real_type> &buffer) const {
452  PLSSVM_ASSERT(!buffer_d.empty(), "The buffer_d array may not be empty!");
453  PLSSVM_ASSERT(std::all_of(buffer_d.cbegin(), buffer_d.cend(), [](const device_ptr_type<real_type> &ptr) { return !ptr.empty(); }), "Each device_ptr in buffer_d must at least contain one data point!");
454  PLSSVM_ASSERT(!buffer.empty(), "The buffer array may not be empty!");
455 
456  using namespace plssvm::operators;
457 
458  device_synchronize(devices_[0]);
459  buffer_d[0].copy_to_host(buffer, 0, buffer.size());
460 
461  if (buffer_d.size() > 1) {
462  std::vector<real_type> ret(buffer.size());
463  for (typename std::vector<device_ptr_type<real_type>>::size_type device = 1; device < buffer_d.size(); ++device) {
464  device_synchronize(devices_[device]);
465  buffer_d[device].copy_to_host(ret, 0, ret.size());
466 
467  buffer += ret;
468  }
469 
470  #pragma omp parallel for default(none) shared(buffer_d, buffer)
471  for (typename std::vector<device_ptr_type<real_type>>::size_type device = 0; device < buffer_d.size(); ++device) {
472  buffer_d[device].copy_to_device(buffer, 0, buffer.size());
473  }
474  }
475 }
476 
477 template <template <typename> typename device_ptr_t, typename queue_t>
478 template <typename real_type>
479 std::pair<std::vector<real_type>, real_type> gpu_csvm<device_ptr_t, queue_t>::solve_system_of_linear_equations_impl(const parameter<real_type> &params,
480  const std::vector<std::vector<real_type>> &A,
481  std::vector<real_type> b,
482  const real_type eps,
483  const unsigned long long max_iter) const {
484  PLSSVM_ASSERT(!A.empty(), "The data must not be empty!");
485  PLSSVM_ASSERT(!A.front().empty(), "The data points must contain at least one feature!");
486  PLSSVM_ASSERT(std::all_of(A.cbegin(), A.cend(), [&A](const std::vector<real_type> &data_point) { return data_point.size() == A.front().size(); }), "All data points must have the same number of features!");
487  PLSSVM_ASSERT(A.size() == b.size(), "The number of data points in the matrix A ({}) and the values in the right hand side vector ({}) must be the same!", A.size(), b.size());
488  PLSSVM_ASSERT(eps > real_type{ 0.0 }, "The stopping criterion in the CG algorithm must be greater than 0.0, but is {}!", eps);
489  PLSSVM_ASSERT(max_iter > 0, "The number of CG iterations must be greater than 0!");
490 
491  using namespace plssvm::operators;
492 
493  const std::size_t dept = A.size() - 1;
494  constexpr auto boundary_size = static_cast<std::size_t>(THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE);
495  const std::size_t num_features = A.front().size();
496 
497  const std::size_t num_used_devices = this->select_num_used_devices(params.kernel_type, num_features);
498 
499  std::vector<device_ptr_type<real_type>> data_d;
500  std::vector<device_ptr_type<real_type>> data_last_d;
501  std::vector<std::size_t> feature_ranges;
502  std::tie(data_d, data_last_d, feature_ranges) = this->setup_data_on_device(A, dept, num_features, boundary_size, num_used_devices);
503 
504  // create q vector
505  const std::vector<real_type> q = this->generate_q(params, data_d, data_last_d, dept, feature_ranges, boundary_size);
506 
507  // calculate QA_costs
508  const real_type QA_cost = kernel_function(A.back(), A.back(), params) + real_type{ 1.0 } / params.cost;
509 
510  // update b
511  const real_type b_back_value = b.back();
512  b.pop_back();
513  b -= b_back_value;
514 
515  std::vector<real_type> x(dept, 1.0);
516  std::vector<device_ptr_type<real_type>> x_d(num_used_devices);
517 
518  std::vector<real_type> r(dept, 0.0);
519  std::vector<device_ptr_type<real_type>> r_d(num_used_devices);
520 
521  #pragma omp parallel for default(none) shared(num_used_devices, devices_, x, x_d, r_d) firstprivate(dept, boundary_size)
522  for (typename std::vector<queue_type>::size_type device = 0; device < num_used_devices; ++device) {
523  x_d[device] = device_ptr_type<real_type>{ dept + boundary_size, devices_[device] };
524  x_d[device].memset(0);
525  x_d[device].copy_to_device(x, 0, dept);
526 
527  r_d[device] = device_ptr_type<real_type>{ dept + boundary_size, devices_[device] };
528  r_d[device].memset(0);
529  }
530  r_d[0].copy_to_device(b, 0, dept);
531 
532  std::vector<device_ptr_type<real_type>> q_d(num_used_devices);
533  #pragma omp parallel for default(none) shared(num_used_devices, devices_, q, q_d, r_d, x_d, data_d, feature_ranges, params) firstprivate(dept, boundary_size, QA_cost, num_features)
534  for (typename std::vector<queue_type>::size_type device = 0; device < num_used_devices; ++device) {
535  q_d[device] = device_ptr_type<real_type>{ dept + boundary_size, devices_[device] };
536  q_d[device].memset(0);
537  q_d[device].copy_to_device(q, 0, dept);
538 
539  // r = Ax (r = b - Ax)
540  run_device_kernel(device, params, q_d[device], r_d[device], x_d[device], data_d[device], feature_ranges, QA_cost, real_type{ -1.0 }, dept, boundary_size);
541  }
542  device_reduction(r_d, r);
543 
544  // delta = r.T * r
545  real_type delta = transposed{ r } * r;
546  const real_type delta0 = delta;
547  std::vector<real_type> Ad(dept);
548 
549  std::vector<device_ptr_type<real_type>> Ad_d(num_used_devices);
550  for (typename std::vector<queue_type>::size_type device = 0; device < num_used_devices; ++device) {
551  Ad_d[device] = device_ptr_type<real_type>{ dept + boundary_size, devices_[device] };
552  }
553 
554  std::vector<real_type> d(r);
555 
556  // timing for each CG iteration
557  std::chrono::milliseconds average_iteration_time{};
558  std::chrono::steady_clock::time_point iteration_start_time{};
559  const auto output_iteration_duration = [&]() {
560  const auto iteration_end_time = std::chrono::steady_clock::now();
561  const auto iteration_duration = std::chrono::duration_cast<std::chrono::milliseconds>(iteration_end_time - iteration_start_time);
563  "Done in {}.\n", iteration_duration);
564  average_iteration_time += iteration_duration;
565  };
566 
567  unsigned long long iter = 0;
568  for (; iter < max_iter; ++iter) {
570  "Start Iteration {} (max: {}) with current residuum {} (target: {}). ", iter + 1, max_iter, delta, eps * eps * delta0);
571  iteration_start_time = std::chrono::steady_clock::now();
572 
573  // Ad = A * r (q = A * d)
574  #pragma omp parallel for default(none) shared(num_used_devices, devices_, Ad_d, r_d, q_d, data_d, feature_ranges, params) firstprivate(dept, QA_cost, boundary_size, num_features)
575  for (typename std::vector<queue_type>::size_type device = 0; device < num_used_devices; ++device) {
576  Ad_d[device].memset(0);
577  r_d[device].memset(0, dept);
578 
579  run_device_kernel(device, params, q_d[device], Ad_d[device], r_d[device], data_d[device], feature_ranges, QA_cost, real_type{ 1.0 }, dept, boundary_size);
580  }
581  // update Ad (q)
582  device_reduction(Ad_d, Ad);
583 
584  // (alpha = delta_new / (d^T * q))
585  const real_type alpha_cd = delta / (transposed{ d } * Ad);
586 
587  // (x = x + alpha * d)
588  x += alpha_cd * d;
589 
590  #pragma omp parallel for default(none) shared(num_used_devices, devices_, x, x_d) firstprivate(dept)
591  for (typename std::vector<queue_type>::size_type device = 0; device < num_used_devices; ++device) {
592  x_d[device].copy_to_device(x, 0, dept);
593  }
594 
595  if (iter % 50 == 49) {
596  #pragma omp parallel for default(none) shared(devices_, r_d, b, q_d, x_d, params, data_d, feature_ranges) firstprivate(QA_cost, dept)
597  for (typename std::vector<queue_type>::size_type device = 0; device < devices_.size(); ++device) {
598  if (device == 0) {
599  // r = b
600  r_d[device].copy_to_device(b, 0, dept);
601  } else {
602  // set r to 0
603  r_d[device].memset(0);
604  }
605  // r -= A * x
606  run_device_kernel(device, params, q_d[device], r_d[device], x_d[device], data_d[device], feature_ranges, QA_cost, real_type{ -1.0 }, dept, boundary_size);
607  }
608 
609  device_reduction(r_d, r);
610  } else {
611  // r -= alpha_cd * Ad (r = r - alpha * q)
612  r -= alpha_cd * Ad;
613  }
614 
615  // (delta = r^T * r)
616  const real_type delta_old = delta;
617  delta = transposed{ r } * r;
618  // if we are exact enough stop CG iterations
619  if (delta <= eps * eps * delta0) {
620  output_iteration_duration();
621  break;
622  }
623 
624  // (beta = delta_new / delta_old)
625  const real_type beta = delta / delta_old;
626  // d = beta * d + r
627  d = beta * d + r;
628 
629  // r_d = d
630  #pragma omp parallel for default(none) shared(num_used_devices, devices_, r_d, d) firstprivate(dept)
631  for (typename std::vector<queue_type>::size_type device = 0; device < num_used_devices; ++device) {
632  r_d[device].copy_to_device(d, 0, dept);
633  }
634 
635  output_iteration_duration();
636  }
638  "Finished after {}/{} iterations with a residuum of {} (target: {}) and an average iteration time of {}.\n",
639  detail::tracking_entry{ "cg", "iterations", std::min(iter + 1, max_iter) },
640  detail::tracking_entry{ "cg", "max_iterations", max_iter },
641  detail::tracking_entry{ "cg", "residuum", delta },
642  detail::tracking_entry{ "cg", "target_residuum", eps * eps * delta0 },
643  detail::tracking_entry{ "cg", "avg_iteration_time", average_iteration_time / std::min(iter + 1, max_iter) });
646  "optimization finished, #iter = {}\n", std::min(iter + 1, max_iter));
647 
648  // calculate bias
649  std::vector<real_type> alpha(x.begin(), x.begin() + dept);
650  const real_type bias = b_back_value + QA_cost * sum(alpha) - (transposed{ q } * alpha);
651  alpha.push_back(-sum(alpha));
652 
653  return std::make_pair(std::move(alpha), -bias);
654 }
655 
656 template <template <typename> typename device_ptr_t, typename queue_t>
657 template <typename real_type>
659  const std::vector<std::vector<real_type>> &support_vectors,
660  const std::vector<real_type> &alpha,
661  real_type rho,
662  std::vector<real_type> &w,
663  const std::vector<std::vector<real_type>> &predict_points) const {
664  PLSSVM_ASSERT(!support_vectors.empty(), "The support vectors must not be empty!");
665  PLSSVM_ASSERT(!support_vectors.front().empty(), "The support vectors must contain at least one feature!");
666  PLSSVM_ASSERT(std::all_of(support_vectors.cbegin(), support_vectors.cend(), [&support_vectors](const std::vector<real_type> &data_point) { return data_point.size() == support_vectors.front().size(); }), "All support vectors must have the same number of features!");
667  PLSSVM_ASSERT(support_vectors.size() == alpha.size(), "The number of support vectors ({}) and number of weights ({}) must be the same!", support_vectors.size(), alpha.size());
668  PLSSVM_ASSERT(w.empty() || support_vectors.front().size() == w.size(), "Either w must be empty or contain exactly the same number of values ({}) as features are present ({})!", w.size(), support_vectors.front().size());
669  PLSSVM_ASSERT(!predict_points.empty(), "The data points to predict must not be empty!");
670  PLSSVM_ASSERT(!predict_points.front().empty(), "The data points to predict must contain at least one feature!");
671  PLSSVM_ASSERT(std::all_of(predict_points.cbegin(), predict_points.cend(), [&predict_points](const std::vector<real_type> &data_point) { return data_point.size() == predict_points.front().size(); }), "All data points to predict must have the same number of features!");
672  PLSSVM_ASSERT(support_vectors.front().size() == predict_points.front().size(), "The number of features in the support vectors ({}) must be the same as in the data points to predict ({})!", support_vectors.front().size(), predict_points.front().size());
673 
674  using namespace plssvm::operators;
675 
676  const std::size_t num_support_vectors = support_vectors.size();
677  const std::size_t num_predict_points = predict_points.size();
678  const std::size_t num_features = predict_points.front().size();
679  constexpr auto boundary_size = static_cast<std::size_t>(THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE);
680 
681  const std::size_t num_used_devices = this->select_num_used_devices(params.kernel_type, num_features);
682 
683  auto [data_d, data_last_d, feature_ranges] = this->setup_data_on_device(support_vectors, num_support_vectors - 1, num_features, boundary_size, num_used_devices);
684 
685  std::vector<device_ptr_type<real_type>> alpha_d(num_used_devices);
686  #pragma omp parallel for default(none) shared(num_used_devices, devices_, alpha_d, alpha) firstprivate(num_support_vectors)
687  for (typename std::vector<queue_type>::size_type device = 0; device < num_used_devices; ++device) {
688  alpha_d[device] = device_ptr_type<real_type>{ num_support_vectors + THREAD_BLOCK_SIZE, devices_[device] };
689  alpha_d[device].memset(0);
690  alpha_d[device].copy_to_device(alpha, 0, num_support_vectors);
691  }
692 
693  std::vector<real_type> out(predict_points.size());
694 
695  // use faster methode in case of the linear kernel function
696  if (params.kernel_type == kernel_function_type::linear && w.empty()) {
697  w = calculate_w(data_d, data_last_d, alpha_d, support_vectors.size(), feature_ranges);
698  }
699 
701  // use faster methode in case of the linear kernel function
702  #pragma omp parallel for default(none) shared(out, predict_points, w) firstprivate(num_predict_points, rho)
703  for (typename std::vector<std::vector<real_type>>::size_type i = 0; i < num_predict_points; ++i) {
704  out[i] = transposed<real_type>{ w } * predict_points[i] + -rho;
705  }
706  } else {
707  // create result vector on the device
708  device_ptr_type<real_type> out_d{ num_predict_points + boundary_size, devices_[0] };
709  out_d.memset(0);
710 
711  // transform prediction data
712  const std::vector<real_type> transformed_data = detail::transform_to_layout(detail::layout_type::soa, predict_points, boundary_size, predict_points.size());
713  device_ptr_type<real_type> point_d{ num_features * (num_predict_points + boundary_size), devices_[0] };
714  point_d.memset(0);
715  point_d.copy_to_device(transformed_data, 0, transformed_data.size());
716 
717  const detail::execution_range range({ static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_support_vectors) / static_cast<real_type>(THREAD_BLOCK_SIZE))),
718  static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_predict_points) / static_cast<real_type>(THREAD_BLOCK_SIZE))) },
719  { std::min<std::size_t>(THREAD_BLOCK_SIZE, num_support_vectors), std::min<std::size_t>(THREAD_BLOCK_SIZE, num_predict_points) });
720 
721  // perform prediction on the first device
722  run_predict_kernel(range, params, out_d, alpha_d[0], point_d, data_d[0], data_last_d[0], num_support_vectors, num_predict_points, num_features);
723 
724  out_d.copy_to_host(out, 0, num_predict_points);
725 
726  // add bias_ to all predictions
727  out += -rho;
728  }
729  return out;
730 }
731 
732 } // namespace plssvm::detail
733 
734 #endif // PLSSVM_BACKENDS_GPU_CSVM_HPP_
#define PLSSVM_ASSERT(cond, msg,...)
Defines the PLSSVM_ASSERT macro if PLSSVM_ASSERT_ENABLED is defined.
Definition: assert.hpp:74
Base class for all C-SVM backends.
Definition: csvm.hpp:50
Class specifying a backend independent execution range.
Definition: execution_range.hpp:31
A C-SVM implementation for all GPU backends to reduce code duplication.
Definition: gpu_csvm.hpp:46
std::vector< real_type > generate_q(const parameter< real_type > &params, const std::vector< device_ptr_type< real_type >> &data_d, const std::vector< device_ptr_type< real_type >> &data_last_d, std::size_t num_data_points, const std::vector< std::size_t > &feature_ranges, std::size_t boundary_size) const
Calculate the q vector used in the dimensional reduction.
Definition: gpu_csvm.hpp:351
gpu_csvm(const gpu_csvm &)=delete
Delete copy-constructor since a CSVM is a move-only type.
std::vector< real_type > calculate_w(const std::vector< device_ptr_type< real_type >> &data_d, const std::vector< device_ptr_type< real_type >> &data_last_d, const std::vector< device_ptr_type< real_type >> &alpha_d, std::size_t num_data_points, const std::vector< std::size_t > &feature_ranges) const
Precalculate the w vector to speedup up the prediction using the linear kernel function.
Definition: gpu_csvm.hpp:388
std::size_t select_num_used_devices(kernel_function_type kernel, std::size_t num_features) const noexcept
Returns the number of usable devices given the kernel function kernel and the number of features num_...
Definition: gpu_csvm.hpp:284
std::pair< std::vector< float >, float > solve_system_of_linear_equations(const parameter< float > &params, const std::vector< std::vector< float >> &A, std::vector< float > b, float eps, unsigned long long max_iter) const final
Solves the equation using the Conjugated Gradients algorithm.
Definition: gpu_csvm.hpp:101
std::vector< double > predict_values(const parameter< double > &params, const std::vector< std::vector< double >> &support_vectors, const std::vector< double > &alpha, double rho, std::vector< double > &w, const std::vector< std::vector< double >> &predict_points) const final
Uses the already learned model to predict the class of multiple (new) data points.
Definition: gpu_csvm.hpp:119
std::tuple< std::vector< device_ptr_type< real_type > >, std::vector< device_ptr_type< real_type > >, std::vector< std::size_t > > setup_data_on_device(const std::vector< std::vector< real_type >> &data, std::size_t num_data_points_to_setup, std::size_t num_features_to_setup, std::size_t boundary_size, std::size_t num_used_devices) const
Performs all necessary steps such that the data is available on the device with the correct layout.
std::vector< queue_type > devices_
The available/used backend devices.
Definition: gpu_csvm.hpp:280
virtual void device_synchronize(const queue_type &queue) const =0
Synchronize the device denoted by queue.
void run_device_kernel(std::size_t device, const parameter< real_type > &params, const device_ptr_type< real_type > &q_d, device_ptr_type< real_type > &r_d, const device_ptr_type< real_type > &x_d, const device_ptr_type< real_type > &data_d, const std::vector< std::size_t > &feature_ranges, real_type QA_cost, real_type add, std::size_t dept, std::size_t boundary_size) const
Select the correct kernel based on the value of kernel_ and run it on the device denoted by device.
Definition: gpu_csvm.hpp:433
virtual void run_svm_kernel(std::size_t device, const detail::execution_range &range, const parameter< float > &params, const device_ptr_type< float > &q_d, device_ptr_type< float > &r_d, const device_ptr_type< float > &x_d, const device_ptr_type< float > &data_d, float QA_cost, float add, std::size_t num_data_points_padded, std::size_t num_features) const =0
Run the main device kernel used in the CG algorithm.
queue_t queue_type
The type of the device queue (dependent on the used backend).
Definition: gpu_csvm.hpp:52
virtual void run_w_kernel(std::size_t device, const detail::execution_range &range, device_ptr_type< double > &w_d, const device_ptr_type< double > &alpha_d, const device_ptr_type< double > &data_d, const device_ptr_type< double > &data_last_d, std::size_t num_data_points, std::size_t num_features) const =0
Run the device kernel the calculate the w vector used to speed up the prediction when using the linea...
std::pair< std::vector< double >, double > solve_system_of_linear_equations(const parameter< double > &params, const std::vector< std::vector< double >> &A, std::vector< double > b, double eps, unsigned long long max_iter) const final
Solves the equation using the Conjugated Gradients algorithm.
Definition: gpu_csvm.hpp:105
gpu_csvm(Args &&...args)
Construct a C-SVM forwarding all parameters args to the plssvm::parameter constructor.
Definition: gpu_csvm.hpp:65
void device_reduction(std::vector< device_ptr_type< real_type >> &buffer_d, std::vector< real_type > &buffer) const
Combines the data in buffer_d from all devices into buffer and distributes them back to each device.
Definition: gpu_csvm.hpp:451
gpu_csvm(plssvm::parameter params={})
Construct a C-SVM using the SVM parameter params.
Definition: gpu_csvm.hpp:57
virtual void run_predict_kernel(const detail::execution_range &range, const parameter< float > &params, device_ptr_type< float > &out_d, const device_ptr_type< float > &alpha_d, const device_ptr_type< float > &point_d, const device_ptr_type< float > &data_d, const device_ptr_type< float > &data_last_d, std::size_t num_support_vectors, std::size_t num_predict_points, std::size_t num_features) const =0
Run the device kernel (only on the first device) to predict the new data points point_d.
std::vector< float > predict_values(const parameter< float > &params, const std::vector< std::vector< float >> &support_vectors, const std::vector< float > &alpha, float rho, std::vector< float > &w, const std::vector< std::vector< float >> &predict_points) const final
Uses the already learned model to predict the class of multiple (new) data points.
Definition: gpu_csvm.hpp:115
virtual void run_q_kernel(std::size_t device, const detail::execution_range &range, const parameter< float > &params, device_ptr_type< float > &q_d, const device_ptr_type< float > &data_d, const device_ptr_type< float > &data_last_d, std::size_t num_data_points_padded, std::size_t num_features) const =0
Run the device kernel filling the q vector.
gpu_csvm(gpu_csvm &&) noexcept=default
Default move-constructor since a virtual destructor has been declared. noexcept
virtual void run_w_kernel(std::size_t device, const detail::execution_range &range, device_ptr_type< float > &w_d, const device_ptr_type< float > &alpha_d, const device_ptr_type< float > &data_d, const device_ptr_type< float > &data_last_d, std::size_t num_data_points, std::size_t num_features) const =0
Run the device kernel the calculate the w vector used to speed up the prediction when using the linea...
std::size_t num_available_devices() const noexcept
Return the number of available devices for the current backend.
Definition: gpu_csvm.hpp:93
virtual void run_q_kernel(std::size_t device, const detail::execution_range &range, const parameter< double > &params, device_ptr_type< double > &q_d, const device_ptr_type< double > &data_d, const device_ptr_type< double > &data_last_d, std::size_t num_data_points_padded, std::size_t num_features) const =0
Run the device kernel filling the q vector.
virtual void run_predict_kernel(const detail::execution_range &range, const parameter< double > &params, device_ptr_type< double > &out_d, const device_ptr_type< double > &alpha_d, const device_ptr_type< double > &point_d, const device_ptr_type< double > &data_d, const device_ptr_type< double > &data_last_d, std::size_t num_support_vectors, std::size_t num_predict_points, std::size_t num_features) const =0
Run the device kernel (only on the first device) to predict the new data points point_d.
std::pair< std::vector< real_type >, real_type > solve_system_of_linear_equations_impl(const parameter< real_type > &params, const std::vector< std::vector< real_type >> &A, std::vector< real_type > b, real_type eps, unsigned long long max_iter) const
Solves the equation using the Conjugated Gradients algorithm.
Definition: gpu_csvm.hpp:479
device_ptr_t< real_type > device_ptr_type
The type of the device pointer (dependent on the used backend).
Definition: gpu_csvm.hpp:50
virtual void run_svm_kernel(std::size_t device, const detail::execution_range &range, const parameter< double > &params, const device_ptr_type< double > &q_d, device_ptr_type< double > &r_d, const device_ptr_type< double > &x_d, const device_ptr_type< double > &data_d, double QA_cost, double add, std::size_t num_data_points_padded, std::size_t num_features) const =0
Run the main device kernel used in the CG algorithm.
std::vector< real_type > predict_values_impl(const parameter< real_type > &params, const std::vector< std::vector< real_type >> &support_vectors, const std::vector< real_type > &alpha, real_type rho, std::vector< real_type > &w, const std::vector< std::vector< real_type >> &predict_points) const
Uses the already learned model to predict the class of multiple (new) data points.
Definition: gpu_csvm.hpp:658
Global type definitions and compile-time constants.
Defines the base class for all C-SVM backends and implements the functionality shared by all of them.
Implement a backend independent class used to specify the execution range for all kernel invocations.
Defines functions to convert 2D vectors to 1D SoA or AoS vectors.
Defines a simple logging function.
void device_synchronize(int device)
Wait for the compute device to finish.
Namespace containing implementation details. Should not directly be used by users.
Definition: csvm.hpp:27
void log(const verbosity_level verb, const std::string_view msg, Args &&...args)
Definition: logger.hpp:109
std::vector< real_type > transform_to_layout(const layout_type layout, const std::vector< std::vector< real_type >> &matrix, const std::size_t boundary_size, const std::size_t num_points)
Convert a 2D matrix into a 1D array in the layout adding boundary_size values per data point or featu...
Definition: layout.hpp:118
Namespace containing operator overloads for std::vector and other mathematical functions on vectors.
Definition: core.hpp:49
T sum(const std::vector< T > &vec)
Accumulate all elements in the std::vector vec.
Definition: operators.hpp:144
The main namespace containing all public API functions.
Definition: backend_types.hpp:24
constexpr kernel_index_type THREAD_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:25
kernel_function_type
Enum class for all implemented kernel functions.
Definition: kernel_function_types.hpp:31
constexpr kernel_index_type INTERNAL_BLOCK_SIZE
Global compile-time constant used for internal caching. May be changed during the CMake configuration...
Definition: constants.hpp:32
real_type kernel_function(const std::vector< real_type > &xi, const std::vector< real_type > &xj, Args &&...args)
Computes the value of the two vectors xi and xj using the kernel function determined at compile-time.
Definition: kernel_function_types.hpp:76
Implements the parameter class encapsulating all important C-SVM parameters.
Defines a performance tracker which can dump performance information in a YAML file.
#define PLSSVM_DETAIL_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY(entry)
Defines the PLSSVM_DETAIL_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY macro if PLSSVM_PERFORMANCE_TRACKER_...
Definition: performance_tracker.hpp:245
default_value< real_type > cost
The cost parameter in the C-SVM.
Definition: parameter.hpp:165
default_value< kernel_function_type > kernel_type
The used kernel function: linear, polynomial, or radial basis functions (rbf).
Definition: parameter.hpp:157
A single tracking entry containing a specific category, a unique name, and the actual value to be tra...
Definition: performance_tracker.hpp:40
Wrapper struct for overloading the dot product operator.
Definition: operators.hpp:99