14 #include <hipipe/core/index_mapper.hpp>
15 #include <hipipe/core/utility/string.hpp>
16 #include <hipipe/core/utility/tuple.hpp>
18 #include <range/v3/experimental/view/shared.hpp>
19 #include <range/v3/view/all.hpp>
20 #include <range/v3/view/iota.hpp>
21 #include <range/v3/view/move.hpp>
22 #include <range/v3/view/transform.hpp>
23 #include <range/v3/view/zip.hpp>
24 #include <range/v3/view/zip_with.hpp>
33 namespace rgv = ranges::views;
43 using data_table_t = std::vector<std::vector<std::string>>;
44 using header_t = index_mapper<std::string>;
55 static void throw_check_new_header(
57 const std::vector<std::string>&
header)
63 for (
const std::string& h :
header) {
65 throw std::invalid_argument{
"When providing a header to a dataframe,"
66 " all the column names have to be non-empty."};
71 void throw_check_insert_col_name(
const std::string& name)
const
73 if (header_.size() && !name.size()) {
74 throw std::invalid_argument{
"The dataframe has a header, please provide"
75 " a column name when inserting a new column."};
77 if (
n_cols() != 0 && !header_.size() && name.size()) {
78 throw std::invalid_argument{
"The dataframe has no header, but a column"
79 " name \"" + name +
"\" was provided when inserting a new column."};
83 void throw_check_insert_col_size(std::size_t col_size)
const
86 throw std::invalid_argument{
"Cannot insert a column of size "
92 void throw_check_insert_row_size(std::size_t row_size)
const
95 throw std::invalid_argument{
"Cannot insert a row of size "
101 void throw_check_row_idx(std::size_t row_idx)
const
103 if (row_idx < 0 || row_idx >=
n_rows()) {
109 void throw_check_col_idx(std::size_t col_idx)
const
111 if (col_idx < 0 || col_idx >=
n_cols()) {
112 throw std::out_of_range{
"Column index " +
std::to_string(col_idx) +
117 void throw_check_col_name(
const std::string& col_name)
const
119 if (header_.size() == 0) {
120 throw std::out_of_range{
"Dataframe has no header, cannot index by column name."};
122 if (!header_.contains(col_name)) {
123 throw std::out_of_range{
"Column " + col_name +
" not found in the dataframe."};
127 template <
typename This>
128 static auto raw_irows_impl(This this_ptr, std::vector<std::size_t> col_indexes)
130 return rgv::iota(0UL, this_ptr->n_rows())
131 |
rgv::transform([this_ptr, col_indexes=std::move(col_indexes)](std::size_t i) {
132 return this_ptr->raw_icols(col_indexes)
140 template<
typename This>
141 static auto raw_rows_impl(This this_ptr)
143 return rgv::iota(0UL, this_ptr->n_rows())
145 return rgv::iota(0UL, this_ptr->n_cols())
148 return this_ptr->raw_cols()[j][i];
153 template<
typename This>
154 static auto raw_icols_impl(This this_ptr, std::vector<std::size_t> col_indexes)
156 return std::move(col_indexes)
157 | ranges::experimental::views::shared
159 return this_ptr->raw_cols()[idx];
165 dataframe() =
default;
185 dataframe(std::vector<std::vector<T>> columns, std::vector<std::string>
header = {})
187 throw_check_new_header(columns.size(),
header);
188 for (std::size_t i = 0; i < columns.size(); ++i) {
189 std::string col_name =
header.empty() ?
"" : std::move(
header[i]);
190 insert_col(rgv::move(columns[i]), std::move(col_name));
215 template<
typename... Ts>
216 dataframe(std::tuple<std::vector<Ts>...> columns, std::vector<std::string>
header = {})
218 throw_check_new_header(
sizeof...(Ts),
header);
220 [
this, &
header](
auto& column,
auto index) {
221 std::string col_name =
header.empty() ?
"" : std::move(
header[index]);
222 this->
insert_col(rgv::move(column), std::move(col_name));
238 template<
typename Rng,
typename ValueT = ranges::range_value_t<Rng>>
239 std::size_t
insert_col(Rng&& rng, std::string col_name = {},
240 std::function<std::string(
const ValueT&)> cvt =
243 throw_check_insert_col_name(col_name);
244 throw_check_insert_col_size(ranges::size(rng));
245 if (col_name.size()) header_.insert(col_name);
259 template<
typename... Ts>
260 std::size_t
insert_row(std::tuple<Ts...> row_tuple,
261 std::tuple<std::function<std::string(
const Ts&)>...> cvts =
265 throw_check_insert_row_size(
sizeof...(Ts));
267 [
this, &cvts](
auto& field,
auto index) {
268 this->data_.at(index).push_back(std::get<index>(cvts)(std::move(field)));
282 std::size_t
insert_row(std::vector<std::string> row)
284 throw_check_insert_row_size(row.size());
285 for (std::size_t i = 0; i <
n_cols(); ++i) {
286 data_[i].push_back(std::move(row[i]));
298 throw_check_col_idx(col_index);
300 if (header_.size()) {
301 std::vector<std::string> new_header = header_.values();
302 new_header.erase(new_header.begin() + col_index);
303 header_ = new_header;
306 data_.erase(data_.begin() + col_index);
312 void drop_col(
const std::string& col_name)
314 throw_check_col_name(col_name);
315 return drop_icol(header_.index_for(col_name));
321 void drop_row(
const std::size_t row_idx)
323 throw_check_row_idx(row_idx);
324 for (
auto& column : data_) {
325 column.erase(column.begin() + row_idx);
369 auto raw_icols(std::vector<std::size_t> col_indexes)
371 for (
auto& col_idx : col_indexes) throw_check_col_idx(col_idx);
372 return raw_icols_impl(
this, std::move(col_indexes));
381 auto raw_icols(std::vector<std::size_t> col_indexes)
const
383 for (
auto& col_idx : col_indexes) throw_check_col_idx(col_idx);
384 return raw_icols_impl(
this, std::move(col_indexes));
399 auto raw_cols(
const std::vector<std::string>& col_names)
401 for (
auto& col_name : col_names) throw_check_col_name(col_name);
402 return raw_icols(header_.index_for(col_names));
411 auto raw_cols(
const std::vector<std::string>& col_names)
const
413 for (
auto& col_name : col_names) throw_check_col_name(col_name);
414 return raw_icols(header_.index_for(col_names));
428 template<
typename... Ts>
429 auto icols(std::vector<std::size_t> col_indexes,
430 std::tuple<std::function<Ts(
const std::string&)>...> cvts =
431 std::make_tuple(utility::string_to<Ts>...))
const
433 assert(
sizeof...(Ts) == ranges::size(col_indexes));
450 template<
typename... Ts>
451 auto cols(
const std::vector<std::string>& col_names,
452 std::tuple<std::function<Ts(
const std::string&)>...> cvts =
453 std::make_tuple(utility::string_to<Ts>...))
const
455 for (
auto& col_name : col_names) throw_check_col_name(col_name);
456 return icols<Ts...>(header_.index_for(col_names), std::move(cvts));
472 auto raw_icol(std::size_t col_index)
474 throw_check_col_idx(col_index);
475 return rgv::all(
raw_cols()[col_index]);
482 auto raw_icol(std::size_t col_index)
const
484 throw_check_col_idx(col_index);
485 return rgv::all(
raw_cols()[col_index]);
499 auto raw_col(
const std::string& col_name)
501 throw_check_col_name(col_name);
502 return raw_icol(header_.index_for(col_name));
511 auto raw_col(
const std::string& col_name)
const
513 throw_check_col_name(col_name);
514 return raw_icol(header_.index_for(col_name));
532 auto icol(std::size_t col_index,
533 std::function<T(
const std::string&)> cvt = utility::string_to<T>)
const
551 auto col(
const std::string& col_name,
552 std::function<T(
const std::string&)> cvt = utility::string_to<T>)
const
554 throw_check_col_name(col_name);
555 return icol<T>(header_.index_for(col_name), std::move(cvt));
570 return raw_rows_impl(
this);
580 return raw_rows_impl(
this);
593 auto raw_irows(std::vector<std::size_t> col_indexes)
595 for (
auto& col_idx : col_indexes) throw_check_col_idx(col_idx);
596 return raw_irows_impl(
this, std::move(col_indexes));
605 auto raw_irows(std::vector<std::size_t> col_indexes)
const
607 for (
auto& col_idx : col_indexes) throw_check_col_idx(col_idx);
608 return raw_irows_impl(
this, std::move(col_indexes));
621 auto raw_rows(
const std::vector<std::string>& col_names)
623 for (
auto& col_name : col_names) throw_check_col_name(col_name);
624 return raw_irows(header_.index_for(col_names));
633 auto raw_rows(
const std::vector<std::string>& col_names)
const
635 for (
auto& col_name : col_names) throw_check_col_name(col_name);
636 return raw_irows(header_.index_for(col_names));
653 template<
typename... Ts>
654 auto irows(std::vector<std::size_t> col_indexes,
655 std::tuple<std::function<Ts(
const std::string&)>...> cvts =
656 std::make_tuple(utility::string_to<Ts>...))
const
659 auto zip_as_std_tuple = [](
auto&&... rngs) {
660 auto std_tupler = [](Ts... ts) -> std::tuple<Ts...> {
return {std::move(ts)...}; };
661 return rgv::zip_with(std_tupler, std::forward<decltype(rngs)>(rngs)...);
665 icols<Ts...>(std::move(col_indexes), std::move(cvts)));
680 template<
typename... Ts>
681 auto rows(
const std::vector<std::string>& col_names,
682 std::tuple<std::function<Ts(
const std::string&)>...> cvts =
683 std::make_tuple(utility::string_to<Ts>...))
const
685 for (
auto& col_name : col_names) throw_check_col_name(col_name);
686 return irows<Ts...>(header_.index_for(col_names), std::move(cvts));
710 template <
typename IndexT,
typename ColT>
712 std::size_t val_col_index,
713 std::function<IndexT(
const std::string&)> key_col_cvt =
714 utility::string_to<IndexT>,
715 std::function<ColT(
const std::string&)> val_col_cvt =
716 utility::string_to<ColT>)
const
718 auto key_col = icol<IndexT>(key_col_index, std::move(key_col_cvt));
719 auto val_col = icol<ColT>(val_col_index, std::move(val_col_cvt));
720 return rgv::zip(key_col, val_col);
733 template<
typename IndexT,
typename ColT>
734 auto index_col(
const std::string& key_col_name,
735 const std::string& val_col_name,
736 std::function<IndexT(
const std::string&)> key_col_cvt =
737 utility::string_to<IndexT>,
738 std::function<ColT(
const std::string&)> val_col_cvt =
739 utility::string_to<ColT>)
const
741 throw_check_col_name(key_col_name);
742 throw_check_col_name(val_col_name);
743 return index_icol(header_.index_for(key_col_name),
744 header_.index_for(val_col_name),
745 std::move(key_col_cvt),
746 std::move(val_col_cvt));
763 template<
typename IndexT,
typename... Ts>
765 std::vector<std::size_t> val_col_indexes,
766 std::function<IndexT(
const std::string&)> key_col_cvt =
767 utility::string_to<IndexT>,
768 std::tuple<std::function<Ts(
const std::string&)>...> val_col_cvts =
769 std::make_tuple(utility::string_to<Ts>...))
const
771 auto key_col = icol<IndexT>(key_col_index, std::move(key_col_cvt));
772 auto val_cols =
irows<Ts...>(std::move(val_col_indexes), std::move(val_col_cvts));
773 return rgv::zip(key_col, val_cols);
788 template<
typename IndexT,
typename... Ts>
789 auto index_cols(
const std::string& key_col_name,
790 const std::vector<std::string>& val_col_names,
791 std::function<IndexT(
const std::string&)> key_col_cvt =
792 utility::string_to<IndexT>,
793 std::tuple<std::function<Ts(
const std::string&)>...> val_col_cvts =
794 std::make_tuple(utility::string_to<Ts>...))
const
796 throw_check_col_name(key_col_name);
797 for (
auto& col_name : val_col_names) throw_check_col_name(col_name);
798 assert(header_.size() &&
"Dataframe has no header, cannot index by column name.");
799 return index_icols(header_.index_for(key_col_name),
800 header_.index_for(val_col_names),
801 std::move(key_col_cvt),
802 std::move(val_col_cvts));
808 std::size_t
n_cols()
const
814 std::size_t
n_rows()
const
816 if (
n_cols() == 0)
return 0;
817 return data_.front().size();
824 void header(std::vector<std::string> new_header)
826 throw_check_new_header(
n_cols(), new_header);
827 header_ = std::move(new_header);
831 std::vector<std::string>
header()
const
833 return header_.values();
843 const data_table_t&
data()
const
853 std::ostream&
operator<<(std::ostream& out,
const dataframe& df);