HiPipe  0.7.0
C++17 data pipeline with Python bindings.
filter.hpp
1 /****************************************************************************
2  * hipipe library
3  * Copyright (c) 2017, Cognexa Solutions s.r.o.
4  * Copyright (c) 2018, Iterait a.s.
5  * Author(s) Filip Matzner
6  *
7  * This file is distributed under the MIT License.
8  * See the accompanying file LICENSE.txt for the complete license agreement.
9  ****************************************************************************/
10 
11 #pragma once
12 
13 #include <hipipe/core/stream/template_arguments.hpp>
14 #include <hipipe/core/stream/transform.hpp>
15 #include <hipipe/core/utility/tuple.hpp>
16 
17 #include <range/v3/view/filter.hpp>
18 #include <range/v3/view/move.hpp>
19 #include <range/v3/view/zip.hpp>
20 
21 #include <functional>
22 #include <utility>
23 
24 namespace hipipe::stream {
25 
26 namespace rgv = ranges::views;
27 
28 namespace detail {
29 
30  // Filter the stream using the given function.
31  // This function wrapper is to be applied in one lower
32  // dimension than the wrapped function itself.
33  // This function wrapper is to be called in dimensions higher than 0.
34  template<typename Fun, typename From, typename ByIdxs>
35  struct wrap_filter_fun_for_transform;
36  template<typename Fun, typename... FromTypes, std::size_t... ByIdxs>
37  struct wrap_filter_fun_for_transform<Fun, from_t<FromTypes...>, std::index_sequence<ByIdxs...>>
38  {
39  Fun fun;
40 
41  // Properly zips/unzips the data and applies the filter function.
42  utility::maybe_tuple<FromTypes...> operator()(FromTypes&... cols)
43  {
44  // the following is much nicer when written as a pipeline, but this
45  // is more compilation time friendly
46  auto range_of_tuples =
48  rgv::zip(cols...),
49  [this](const auto& tuple) -> bool {
50  return std::invoke(this->fun, std::get<ByIdxs>(tuple)...);
51  }
52  );
53  // If std::vector::assign() gets a pair of forward iterators, it first iterates
54  // through the range and calculates the distance to allocate the memory
55  // and afterwards iterates through the range once over and assigns
56  // the values. To avoid this double iteration, we convert the
57  // filter_view to a vector manually and let it exponentially
58  // reallocate.
59  std::vector<ranges::range_value_t<decltype(range_of_tuples)>> ts;
60  for (auto&& t : rgv::move(range_of_tuples)) ts.push_back(std::move(t));
61  return utility::maybe_untuple(utility::unzip(rgv::move(ts)));
62  }
63  };
64 
65  // Helper function wrapper for dimension 0.
66  // This wrapper takes a single tuple of columns as argument and
67  // applies the stored function to a subset of columns selected by types.
68  // The columns are projected to their value().
69  template<typename Fun, typename... ByColumns>
70  struct apply_filter_fun_to_columns
71  {
72  Fun fun;
73 
74  bool operator()(const batch_t& source)
75  {
76  std::tuple<const typename ByColumns::data_type&...> slice_view{
77  source.extract<ByColumns>()...
78  };
79  static_assert(std::is_invocable_r_v<
80  bool, Fun&, const typename ByColumns::data_type&...>,
81  "hipipe::stream::filter: "
82  "The function has to accept the selected `by<>` columns (specifically "
83  "const ByColumns::data_type&) and return a bool.");
84  return std::apply(fun, std::move(slice_view));
85  }
86  };
87 
88  // Entry point for stream::filter.
89  // For dimensions higher than 0, use stream::transform to Dim-1 and
90  // wrap_filter_fun_for_transform wrapper.
91  template<int Dim>
92  struct filter_impl
93  {
94  template<typename... FromColumns, typename... ByColumns, typename Fun>
95  static auto impl(from_t<FromColumns...> f, by_t<ByColumns...> b, Fun fun)
96  {
97  static_assert(sizeof...(ByColumns) <= sizeof...(FromColumns),
98  "Cannot have more ByColumns than FromColumns.");
99  static_assert(
100  ((utility::ndims<typename FromColumns::data_type>::value >= Dim) && ...) &&
101  ((utility::ndims<typename ByColumns::data_type>::value >= Dim) && ...),
102  "hipipe::stream::filter: The dimension in which to apply the operation needs"
103  " to be at most the lowest dimension of all the from<> and by<> columns.");
104 
105  detail::wrap_filter_fun_for_transform<
106  Fun, from_t<utility::ndim_type_t<typename FromColumns::data_type, Dim-1>...>,
107  std::index_sequence<utility::variadic_find<ByColumns, FromColumns...>::value...>>
108  fun_wrapper{std::move(fun)};
109 
110  return stream::transform(f, to<FromColumns...>, std::move(fun_wrapper), dim<Dim-1>);
111  }
112  };
113 
114  // Special case for batch filtering (Dim == 0).
115  template<>
116  struct filter_impl<0>
117  {
118  template<typename From, typename... ByColumns, typename Fun>
119  static auto impl(From, by_t<ByColumns...>, Fun fun)
120  {
121  apply_filter_fun_to_columns<Fun, ByColumns...> fun_wrapper{std::move(fun)};
122  return rgv::filter(std::move(fun_wrapper));
123  }
124  };
125 
126 } // namespace detail
127 
147 template<typename... FromColumns, typename... ByColumns, typename Fun, int Dim = 1>
148 auto filter(from_t<FromColumns...> f,
149  by_t<ByColumns...> b,
150  Fun fun,
151  dim_t<Dim> d = dim_t<1>{})
152 {
153  static_assert(
156  "hipipe::stream::filter: The dimension in which to apply the operation "
157  " needs to be at most the lowest dimension of all the from<> and by<> columns.");
158  // a bit of function type erasure to speed up compilation
159  using FunT = std::function<
160  bool(const utility::ndim_type_t<typename ByColumns::data_type, Dim>&...)>;
161  return detail::filter_impl<Dim>::impl(f, b, FunT{std::move(fun)});
162 }
163 
164 } // namespace hipipe::stream
hipipe::stream::filter
auto filter(from_t< FromColumns... > f, by_t< ByColumns... > b, Fun fun, dim_t< Dim > d=dim_t< 1 >{})
Filter stream data.
Definition: filter.hpp:154
hipipe::utility::ndims
Gets the number of dimensions of a multidimensional range.
Definition: ndim.hpp:50
hipipe::utility::maybe_untuple
decltype(auto) maybe_untuple(Tuple &&tuple)
Extract a value from a tuple if the tuple contains only a single value.
Definition: tuple.hpp:370
hipipe::stream::transform
auto transform(from_t< FromColumns... > f, to_t< ToColumns... > t, Fun fun, dim_t< Dim > d=dim_t< 1 >{})
Transform a subset of hipipe columns to a different subset of hipipe columns.
Definition: transform.hpp:218