PLSSVM - Parallel Least Squares Support Vector Machine  2.0.0
A Least Squares Support Vector Machine implementation using different backends.
scaling_factors_parsing.hpp
Go to the documentation of this file.
1 
12 #ifndef PLSSVM_DETAIL_IO_SCALING_FACTORS_PARSING_HPP_
13 #define PLSSVM_DETAIL_IO_SCALING_FACTORS_PARSING_HPP_
14 #pragma once
15 
16 #include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT
17 #include "plssvm/detail/utility.hpp" // plssvm:detail::current_date_time
18 #include "plssvm/detail/io/file_reader.hpp" // plssvm::detail::io::file_reader
19 #include "plssvm/detail/string_conversion.hpp" // plssvm::detail::split_as
20 #include "plssvm/detail/string_utility.hpp" // plssvm::detail::trim
21 #include "plssvm/exceptions/exceptions.hpp" // plssvm::invalid_file_format_exception
22 
23 #include "fmt/core.h" // fmt::format
24 #include "fmt/os.h" // fmt::ostream, fmt::output_file
25 
26 #include <exception> // std::exception_ptr, std::exception, std::rethrow_exception
27 #include <string> // std::string
28 #include <string_view> // std::string_view
29 #include <utility> // std::pair, std::make_pair
30 #include <vector> // std::vector
31 
32 namespace plssvm::detail::io {
33 
56 template <typename real_type, typename factors_type>
57 [[nodiscard]] inline std::tuple<std::pair<real_type, real_type>, std::vector<factors_type>> parse_scaling_factors(const file_reader &reader) {
58  PLSSVM_ASSERT(reader.is_open(), "The file_reader is currently not associated with a file!");
59 
60  // at least two lines ("x" + scale interval)
61  if (reader.num_lines() < 2) {
62  throw invalid_file_format_exception{ fmt::format("At least two lines must be present, but only {} were given!", reader.num_lines()) };
63  }
64 
65  // first line must always contain the single character 'x'
66  if (detail::trim(reader.line(0)) != "x") {
67  throw invalid_file_format_exception{ fmt::format("The first line must only contain an 'x', but is \"{}\"!", reader.line(0)) };
68  }
69  // second line contains the scaling range
70  std::vector<real_type> scale_to_interval = detail::split_as<real_type>(reader.line(1));
71  if (scale_to_interval.size() != 2) {
72  throw invalid_file_format_exception{ fmt::format("The interval to which the data points should be scaled must exactly have two values, but {} were given!", scale_to_interval.size()) };
73  }
74  if (scale_to_interval[0] >= scale_to_interval[1]) {
75  throw invalid_file_format_exception{ fmt::format("Inconsistent scaling interval specification: lower ({}) must be less than upper ({})!", scale_to_interval[0], scale_to_interval[1]) };
76  }
77 
78  // parse scaling factors
79  std::exception_ptr parallel_exception;
80  std::vector<factors_type> scaling_factors(reader.num_lines() - 2);
81  #pragma omp parallel default(none) shared(parallel_exception, scaling_factors, reader)
82  {
83  #pragma omp for
84  for (typename std::vector<factors_type>::size_type i = 0; i < scaling_factors.size(); ++i) {
85  try {
86  // parse the current line
87  const std::string_view line = reader.line(i + 2);
88  const std::vector<real_type> values = detail::split_as<real_type>(line);
89  // check if the line contains the correct number of values
90  if (values.size() != 3) {
91  throw invalid_file_format_exception{ fmt::format("Each line must contain exactly three values, but {} were given!", values.size()) };
92  }
93  // set the scaling factor based on the parsed values
94  const auto feature = static_cast<decltype(scaling_factors[i].feature)>(values[0]);
95  // check if we are one-based, i.e., no 0 must be read as feature value
96  if (feature == 0) {
97  throw invalid_file_format_exception{ "The scaling factors must be provided one-based, but are zero-based!" };
98  }
99  scaling_factors[i].feature = feature - 1;
100  scaling_factors[i].lower = values[1];
101  scaling_factors[i].upper = values[2];
102  } catch (const std::exception &) {
103  // catch first exception and store it
104  #pragma omp critical
105  {
106  if (!parallel_exception) {
107  parallel_exception = std::current_exception();
108  }
109  }
110  }
111  }
112  }
113  // rethrow if an exception occurred inside the parallel region
114  if (parallel_exception) {
115  std::rethrow_exception(parallel_exception);
116  }
117 
118  return std::make_tuple(std::make_pair(scale_to_interval[0], scale_to_interval[1]), std::move(scaling_factors));
119 }
120 
138 template <typename real_type, typename factors_type>
139 inline void write_scaling_factors(const std::string &filename, const std::pair<real_type, real_type> &scaling_interval, const std::vector<factors_type> &scaling_factors) {
140  PLSSVM_ASSERT(scaling_interval.first < scaling_interval.second, "Illegal interval specification: lower ({}) < upper ({}).", scaling_interval.first, scaling_interval.second);
141 
142  // create output file
143  fmt::ostream out = fmt::output_file(filename);
144  // write timestamp as current date time
145  out.print("# These scaling factors have been created at {}\n", detail::current_date_time());
146 
147  // x must always be outputted
148  out.print("x\n");
149  // write the requested scaling interval
150  out.print("{} {}\n", scaling_interval.first, scaling_interval.second);
151  // write the scaling factors for each feature, note the one based indexing scheme!
152  for (const factors_type &f : scaling_factors) {
153  out.print("{} {} {}\n", f.feature + 1, f.lower, f.upper);
154  }
155 }
156 
157 } // namespace plssvm::detail::io
158 
159 #endif // PLSSVM_DETAIL_IO_SCALING_FACTORS_PARSING_HPP_
Implements a custom assert macro PLSSVM_ASSERT.
#define PLSSVM_ASSERT(cond, msg,...)
Defines the PLSSVM_ASSERT macro if PLSSVM_ASSERT_ENABLED is defined.
Definition: assert.hpp:74
The plssvm::detail::file_reader class is responsible for reading a file and splitting it into its lin...
Definition: file_reader.hpp:42
std::string_view line(typename std::vector< std::string_view >::size_type pos) const
Return the pos line of the parsed file.
bool is_open() const noexcept
Checks whether this file_reader is currently associated with a file.
std::vector< std::string_view >::size_type num_lines() const noexcept
Return the number of parsed lines (where all empty lines or lines starting with a comment are ignored...
Exception type thrown if the provided file has an invalid format for the selected parser (e....
Definition: exceptions.hpp:114
Defines universal utility functions.
Implements custom exception classes derived from std::runtime_error including source location informa...
Implements a file reader class responsible for reading the input file and parsing it into lines.
Namespace containing implementation details for the IO related functions. Should not directly be used...
Definition: core.hpp:44
std::tuple< std::pair< real_type, real_type >, std::vector< factors_type > > parse_scaling_factors(const file_reader &reader)
Read the scaling interval and factors stored using LIBSVM's file format from the file filename.
Definition: scaling_factors_parsing.hpp:57
void write_scaling_factors(const std::string &filename, const std::pair< real_type, real_type > &scaling_interval, const std::vector< factors_type > &scaling_factors)
Write the scaling_interval and scaling_factors to a file for later usage in scaling another data set ...
Definition: scaling_factors_parsing.hpp:139
std::string_view trim(std::string_view str) noexcept
Returns a new std::string_view equal to str where all leading and trailing whitespaces are removed.
std::string current_date_time()
Return the current date time in the format "YYYY-MM-DD hh:mm:ss".
Implements a conversion function from a string to an arithmetic type.
Implements utility functions for string manipulation and querying.