HiPipe  0.7.0
C++17 data pipeline with Python bindings.
column_t.hpp
1 /****************************************************************************
2  * hipipe library
3  * Copyright (c) 2018, Iterait a.s.
4  * Author(s) Filip Matzner
5  *
6  * This file is distributed under the MIT License.
7  * See the accompanying file LICENSE.txt for the complete license agreement.
8  ****************************************************************************/
9 
10 #pragma once
11 
12 #include <hipipe/build_config.hpp>
13 
14 #ifdef HIPIPE_BUILD_PYTHON
15 #include <hipipe/core/python/utility/ndim_vector_converter.hpp>
16 #endif
17 
18 #include <initializer_list>
19 #include <memory>
20 #include <stdexcept>
21 #include <string>
22 #include <vector>
23 
24 namespace hipipe::stream {
25 
28 class abstract_column {
29 private:
30 
33  template<typename Column>
34  std::runtime_error extraction_error() const
35  {
36  return std::runtime_error{
37  std::string{"Trying to extract column `"} + Column{}.name()
38  + "` from a column of type `" + this->name() + "`."};
39  }
40 
41 public:
42  // typed data extractor //
43 
57  template<typename Column>
58  typename Column::data_type& extract()
59  {
60  try {
61  return dynamic_cast<Column&>(*this).data();
62  } catch (const std::bad_cast&) {
63  throw extraction_error<Column>();
64  }
65  }
66 
70  template<typename Column>
71  const typename Column::data_type& extract() const
72  {
73  try {
74  return dynamic_cast<const Column&>(*this).data();
75  } catch (const std::bad_cast&) {
76  throw extraction_error<Column>();
77  }
78  }
79 
80  // name accessor //
81 
86  virtual std::string name() const = 0;
87 
88  // batch utilities //
89 
91  virtual std::size_t size() const = 0;
92 
96  virtual void push_back(std::unique_ptr<abstract_column> rhs) = 0;
97 
101  virtual std::unique_ptr<abstract_column> take(std::size_t n) = 0;
102 
103  // python conversion //
104 
105  #ifdef HIPIPE_BUILD_PYTHON
106  virtual boost::python::object to_python() = 0;
110  #endif
111 
112  // virtual destructor //
113 
114  virtual ~abstract_column() = default;
115 };
116 
117 
120 template <typename ColumnName, typename ExampleType>
121 class column_base : public abstract_column {
122 public:
123 
125  using example_type = ExampleType;
127  using data_type = std::vector<example_type>;
128 
129 private:
130 
132  data_type data_;
133 
134 public:
135 
136  // constructors //
137 
138  column_base() = default;
139  column_base(column_base&&) = default;
140 
145  template <typename... Args>
146  column_base(Args&&... args)
147  : data_{std::forward<Args>(args)...}
148  { }
149 
150  // batching utilities //
151 
153  std::size_t size() const override
154  {
155  return data_.size();
156  }
157 
176  std::unique_ptr<abstract_column> take(std::size_t n) override
177  {
178  if (n > data_.size()) {
179  throw std::runtime_error{"hipipe: Attempting to take "
180  + std::to_string(n) + " examples out of column `" + name()
181  + "` with " + std::to_string(size()) + " examples."};
182  }
183  data_type taken_examples(n);
184  std::move(data_.begin(), data_.begin() + n, taken_examples.begin());
185  data_.erase(data_.begin(), data_.begin() + n);
186  return std::make_unique<ColumnName>(std::move(taken_examples));
187  }
188 
205  void push_back(std::unique_ptr<abstract_column> rhs) override
206  {
207  try {
208  ColumnName& typed_rhs = dynamic_cast<ColumnName&>(*rhs);
209  data_.reserve(data_.size() + typed_rhs.data_.size());
210  for (example_type& example : typed_rhs.data_) {
211  data_.push_back(std::move(example));
212  }
213  } catch (const std::bad_cast&) {
214  throw std::runtime_error{"hipipe: Attempting to push back "
215  "column `" + rhs->name() + "` to column `" + name() + "."};
216  }
217  }
218 
219  // data accessors //
220 
222  data_type& data() { return data_; }
223 
225  const data_type& data() const { return data_; }
226 
227  // python converter //
228 
236  #ifdef HIPIPE_BUILD_PYTHON
237  boost::python::object to_python() override
238  {
239  return hipipe::python::utility::to_python(std::move(data_));
240  }
241  #endif
242 };
243 
244 } // namespace hipipe::stream
245 
246 
251 #define HIPIPE_DEFINE_COLUMN(column_name_, example_type_) \
252 struct column_name_ : hipipe::stream::column_base<column_name_, example_type_> { \
253  using hipipe::stream::column_base<column_name_, example_type_>::column_base; \
254  std::string name() const override { return #column_name_; } \
255 };
hipipe::stream::column_base::push_back
void push_back(std::unique_ptr< abstract_column > rhs) override
Concatenate the examples from two columns of the same type.
Definition: column_t.hpp:210
hipipe::utility::to_string
std::string to_string(const T &value)
Convert the given type to std::string.
Definition: string.hpp:90
hipipe::stream::column_base::example_type
ExampleType example_type
The type of a single example.
Definition: column_t.hpp:130
hipipe::stream::abstract_column::push_back
virtual void push_back(std::unique_ptr< abstract_column > rhs)=0
hipipe::stream::abstract_column::extract
Column::data_type & extract()
Definition: column_t.hpp:69
hipipe::stream::column_base::data_type
std::vector< example_type > data_type
The type of multiple examples. This is what the column actually stores.
Definition: column_t.hpp:132
hipipe::stream::column_base
Implementation stub of a column defined by HIPIPE_DEFINE_COLUMN macro.
Definition: column_t.hpp:126
hipipe::stream::column_base::size
std::size_t size() const override
Get the number of examples in this column.
Definition: column_t.hpp:158
hipipe::stream::abstract_column::size
virtual std::size_t size() const =0
Retrieve the number of examples stored in the column.
hipipe::stream::column_base::data
data_type & data()
Get a reference to the stored vector of examples.
Definition: column_t.hpp:227
hipipe::stream::abstract_column::take
virtual std::unique_ptr< abstract_column > take(std::size_t n)=0
hipipe::stream::abstract_column::name
virtual std::string name() const =0
hipipe::stream::column_base::take
std::unique_ptr< abstract_column > take(std::size_t n) override
Steal the given number of examples from this column and create a new column out of those.
Definition: column_t.hpp:181