HiPipe  0.7.0
C++17 data pipeline with Python bindings.
Public Member Functions | List of all members
hipipe::dataframe Class Reference

Tabular object with convenient data access methods. More...

#include <hipipe/core/dataframe.hpp>

Public Member Functions

template<typename T >
 dataframe (std::vector< std::vector< T >> columns, std::vector< std::string > header={})
 
template<typename... Ts>
 dataframe (std::tuple< std::vector< Ts >... > columns, std::vector< std::string > header={})
 
template<typename Rng , typename ValueT = ranges::range_value_t<Rng>>
std::size_t insert_col (Rng &&rng, std::string col_name={}, std::function< std::string(const ValueT &)> cvt=static_cast< std::string(*)(const ValueT &)>(utility::to_string))
 
template<typename... Ts>
std::size_t insert_row (std::tuple< Ts... > row_tuple, std::tuple< std::function< std::string(const Ts &)>... > cvts=std::make_tuple(static_cast< std::string(*)(const Ts &)>(utility::to_string)...))
 
std::size_t insert_row (std::vector< std::string > row)
 
void drop_icol (std::size_t col_index)
 
void drop_col (const std::string &col_name)
 
void drop_row (const std::size_t row_idx)
 
auto raw_cols ()
 
auto raw_cols () const
 
auto raw_icols (std::vector< std::size_t > col_indexes)
 
auto raw_icols (std::vector< std::size_t > col_indexes) const
 
auto raw_cols (const std::vector< std::string > &col_names)
 
auto raw_cols (const std::vector< std::string > &col_names) const
 
template<typename... Ts>
auto icols (std::vector< std::size_t > col_indexes, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename... Ts>
auto cols (const std::vector< std::string > &col_names, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
auto raw_icol (std::size_t col_index)
 
auto raw_icol (std::size_t col_index) const
 
auto raw_col (const std::string &col_name)
 
auto raw_col (const std::string &col_name) const
 
template<typename T >
auto icol (std::size_t col_index, std::function< T(const std::string &)> cvt=utility::string_to< T >) const
 
template<typename T >
auto col (const std::string &col_name, std::function< T(const std::string &)> cvt=utility::string_to< T >) const
 
auto raw_rows ()
 
auto raw_rows () const
 
auto raw_irows (std::vector< std::size_t > col_indexes)
 
auto raw_irows (std::vector< std::size_t > col_indexes) const
 
auto raw_rows (const std::vector< std::string > &col_names)
 
auto raw_rows (const std::vector< std::string > &col_names) const
 
template<typename... Ts>
auto irows (std::vector< std::size_t > col_indexes, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename... Ts>
auto rows (const std::vector< std::string > &col_names, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename IndexT , typename ColT >
auto index_icol (std::size_t key_col_index, std::size_t val_col_index, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::function< ColT(const std::string &)> val_col_cvt=utility::string_to< ColT >) const
 
template<typename IndexT , typename ColT >
auto index_col (const std::string &key_col_name, const std::string &val_col_name, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::function< ColT(const std::string &)> val_col_cvt=utility::string_to< ColT >) const
 
template<typename IndexT , typename... Ts>
auto index_icols (std::size_t key_col_index, std::vector< std::size_t > val_col_indexes, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::tuple< std::function< Ts(const std::string &)>... > val_col_cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename IndexT , typename... Ts>
auto index_cols (const std::string &key_col_name, const std::vector< std::string > &val_col_names, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::tuple< std::function< Ts(const std::string &)>... > val_col_cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
std::size_t n_cols () const
 Return the number of columns.
 
std::size_t n_rows () const
 Return the number of rows (excluding header).
 
void header (std::vector< std::string > new_header)
 
std::vector< std::string > header () const
 Return the names of columns.
 
data_table_t & data ()
 Return a reference to the raw data table.
 
const data_table_t & data () const
 Return a const reference to the raw data table.
 

Detailed Description

Tabular object with convenient data access methods.

By default, all fields are stored as std::string and they are cast to the requested type on demand.

Definition at line 39 of file dataframe.hpp.

Constructor & Destructor Documentation

◆ dataframe() [1/2]

template<typename T >
hipipe::dataframe::dataframe ( std::vector< std::vector< T >>  columns,
std::vector< std::string >  header = {} 
)
inline

Constructs the dataset from a vector of columns of the same type.

Example:

dataframe df{
// columns
std::vector<std::vector<int>>{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}},
// header
std::vector<std::string>{"A", "B", "C"}
};
Exceptions
std::invalid_argument1) If the header is provided, but some of the column names are empty. 2) If the column sizes mismatch. 3) If the provided header does not match the number of provided columns.

Definition at line 184 of file dataframe.hpp.

◆ dataframe() [2/2]

template<typename... Ts>
hipipe::dataframe::dataframe ( std::tuple< std::vector< Ts >... >  columns,
std::vector< std::string >  header = {} 
)
inline

Constructs the dataset from a tuple of columns of possibly different types.

Example:

dataframe df{
// columns
std::make_tuple(
std::vector<int>{1, 2, 3},
std::vector<std::string>{"a1", "a2", "a3"},
std::vector<std::string>{"1.1", "1.2", "1.3"}
),
// header
std::vector<std::string>{"Id", "A", "B"}
};
Exceptions
std::invalid_argument1) If the header is provided, but some of the column names are empty. 2) If the column sizes mismatch. 3) If the provided header does not match the number of provided columns.

Definition at line 215 of file dataframe.hpp.

Member Function Documentation

◆ col()

template<typename T >
auto hipipe::dataframe::col ( const std::string &  col_name,
std::function< T(const std::string &)>  cvt = utility::string_to<T> 
) const
inline

Return a typed view of a column.

By default, this function does not provide a direct access to the stored data. Instead, each field is converted to the type T and a copy is returned.

Example:

std::vector<long> data = df.col<long>("long column");
Returns
A range of T.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 550 of file dataframe.hpp.

◆ cols()

template<typename... Ts>
auto hipipe::dataframe::cols ( const std::vector< std::string > &  col_names,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple columns.

Example:

std::tuple<std::vector<int>, std::vector<double>> data =
df.cols<int, double>({"column 1", "column 2"});
Returns
A tuple of ranges of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 450 of file dataframe.hpp.

◆ drop_col()

void hipipe::dataframe::drop_col ( const std::string &  col_name)
inline

Drop a column with the given name.

Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 311 of file dataframe.hpp.

◆ drop_icol()

void hipipe::dataframe::drop_icol ( std::size_t  col_index)
inline

Drop a column with the given index.

Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 295 of file dataframe.hpp.

◆ drop_row()

void hipipe::dataframe::drop_row ( const std::size_t  row_idx)
inline

Drop a row.

Exceptions
std::out_of_rangeIf the row is not in the dataframe.

Definition at line 320 of file dataframe.hpp.

◆ header()

void hipipe::dataframe::header ( std::vector< std::string >  new_header)
inline

Set the column names.

Exceptions
std::invalid_argument1) If some of the column names are empty. 2) If the header does not match the number of columns.

Definition at line 823 of file dataframe.hpp.

◆ icol()

template<typename T >
auto hipipe::dataframe::icol ( std::size_t  col_index,
std::function< T(const std::string &)>  cvt = utility::string_to<T> 
) const
inline

Return a typed view of a column.

By default, this function does not provide a direct access to the stored data. Instead, each field is converted to the type T and a copy is returned.

Example:

std::vector<long> data = df.icol<long>(3);
Returns
A range of T.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 531 of file dataframe.hpp.

◆ icols()

template<typename... Ts>
auto hipipe::dataframe::icols ( std::vector< std::size_t >  col_indexes,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple columns.

Example:

std::tuple<std::vector<int>, std::vector<double>> data = df.icols<int, double>({1, 2});
Returns
A tuple of ranges of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 428 of file dataframe.hpp.

◆ index_col()

template<typename IndexT , typename ColT >
auto hipipe::dataframe::index_col ( const std::string &  key_col_name,
const std::string &  val_col_name,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::function< ColT(const std::string &)>  val_col_cvt = utility::string_to<ColT> 
) const
inline

Return an indexed typed view of a single column.

auto mapper =
df.index_col<int, double>("first", "second")
| ranges::to<std::unordered_map<int, double>>;

This function is the same as index_icol(), but columns are selected by name.

Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 733 of file dataframe.hpp.

◆ index_cols()

template<typename IndexT , typename... Ts>
auto hipipe::dataframe::index_cols ( const std::string &  key_col_name,
const std::vector< std::string > &  val_col_names,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::tuple< std::function< Ts(const std::string &)>... >  val_col_cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return an indexed typed view of multiple columns.

See index_icol().

auto mapper =
df.index_cols<int, long, double>("id", {"col1", "col2"})
| ranges::to<std::unordered_map<int, std::tuple<long, double>>>;

This function is similar to index_icols(), but columns are selected by name.

Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 788 of file dataframe.hpp.

◆ index_icol()

template<typename IndexT , typename ColT >
auto hipipe::dataframe::index_icol ( std::size_t  key_col_index,
std::size_t  val_col_index,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::function< ColT(const std::string &)>  val_col_cvt = utility::string_to<ColT> 
) const
inline

Return an indexed typed view of a single column.

This function returns a range of tuples, where the first tuple element is from the key column and the second element is from the value column. This range can be used to construct a map or a hashmap.

Example:

auto mapper =
df.index_icol<int, double>(0, 1)
| ranges::to<std::unordered_map<int, double>>;
Parameters
key_col_indexIndex of the column to be used as key.
val_col_indexIndex of the column to be used as value.
key_col_cvtFunction that is used to convert the keys from std::string to IndexT.
val_col_cvtFunction that is used to convert the values from std::string to ValueT.
Returns
A range of tuples <key, value>.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 710 of file dataframe.hpp.

◆ index_icols()

template<typename IndexT , typename... Ts>
auto hipipe::dataframe::index_icols ( std::size_t  key_col_index,
std::vector< std::size_t >  val_col_indexes,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::tuple< std::function< Ts(const std::string &)>... >  val_col_cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return an indexed typed view of multiple columns.

See index_icol().

auto mapper =
df.index_icols<int, long, double>(0, {1, 2});
| ranges::to<int, std::tuple<long, double>>;

This function is similar to index_icol(), but value type is a tuple of Ts.

Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 763 of file dataframe.hpp.

◆ insert_col()

template<typename Rng , typename ValueT = ranges::range_value_t<Rng>>
std::size_t hipipe::dataframe::insert_col ( Rng &&  rng,
std::string  col_name = {},
std::function< std::string(const ValueT &)>  cvt = static_cast<std::string (*)(const ValueT&)>(utility::to_string) 
)
inline

Inserts a new column to the dataframe.

Example:

df.insert_col(std::vector<int>{5, 6, 7}, "C");
Exceptions
std::invalid_argument1) If the dataframe has a header but no column name was provided. 2) If the column size is not equal to n_rows.

Definition at line 238 of file dataframe.hpp.

◆ insert_row() [1/2]

template<typename... Ts>
std::size_t hipipe::dataframe::insert_row ( std::tuple< Ts... >  row_tuple,
std::tuple< std::function< std::string(const Ts &)>... >  cvts = std::make_tuple(                               static_cast<std::string (*)(const Ts&)>(utility::to_string)...) 
)
inline

Inserts a new typed row to the dataframe.

Example:

df.insert_row(std::make_tuple(4, "a3", true));
Returns
The index of the new row.
Exceptions
std::invalid_argumentIf the row size is not equal to n_cols.

Definition at line 259 of file dataframe.hpp.

◆ insert_row() [2/2]

std::size_t hipipe::dataframe::insert_row ( std::vector< std::string >  row)
inline

Inserts a new raw row to the dataframe.

Example:

df.insert_row({"field 1", "field 2", "field 3"});
Returns
The index of the new row.
Exceptions
std::invalid_argumentIf the row size is not equal to n_cols.

Definition at line 281 of file dataframe.hpp.

◆ irows()

template<typename... Ts>
auto hipipe::dataframe::irows ( std::vector< std::size_t >  col_indexes,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple rows.

This function provides the same data as icols() but transposed.

Example:

std::vector<std::tuple<int, double>> data =
df.irows<int, double>({0, 2});
Returns
A range of tuples of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 653 of file dataframe.hpp.

◆ raw_col() [1/2]

auto hipipe::dataframe::raw_col ( const std::string &  col_name)
inline

Return a raw view of a column.

The data can be directly changed by writing to the view.

Example:

df.raw_col("long column")[2] = "new_value";
Returns
A of range of std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 498 of file dataframe.hpp.

◆ raw_col() [2/2]

auto hipipe::dataframe::raw_col ( const std::string &  col_name) const
inline

Return a raw view of a column.

This is just a const overload of the non-const raw_col().

Returns
A of range of const std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 510 of file dataframe.hpp.

◆ raw_cols() [1/4]

auto hipipe::dataframe::raw_cols ( )
inline

Return a raw view of all columns.

The data can be directly changed by writing to the view.

Example:

// get the third row from the sixth column
std::string field = df.raw_cols()[5][2];
Returns
A range of ranges of std::string&.

Definition at line 341 of file dataframe.hpp.

◆ raw_cols() [2/4]

auto hipipe::dataframe::raw_cols ( ) const
inline

Return a raw view of all columns.

This is just a const overload of the non-const argument-less raw_cols().

Returns
A range of ranges of const std::string&.

Definition at line 351 of file dataframe.hpp.

◆ raw_cols() [3/4]

auto hipipe::dataframe::raw_cols ( const std::vector< std::string > &  col_names)
inline

Return a raw view of multiple columns.

The data can be directly changed by writing to the view.

Example:

// get the sixth row from the column named "column 2"
std::string field = df.raw_cols({"column 1", "column 2"})[1][5];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 398 of file dataframe.hpp.

◆ raw_cols() [4/4]

auto hipipe::dataframe::raw_cols ( const std::vector< std::string > &  col_names) const
inline

Return a raw view of multiple columns.

This is just a const overload of the non-const raw_cols().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 410 of file dataframe.hpp.

◆ raw_icol() [1/2]

auto hipipe::dataframe::raw_icol ( std::size_t  col_index)
inline

Return a raw view of a column.

The data can be directly changed by writing to the view.

Example:

df.raw_icol(3)[2] = "new_value";
Returns
A of range of std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 471 of file dataframe.hpp.

◆ raw_icol() [2/2]

auto hipipe::dataframe::raw_icol ( std::size_t  col_index) const
inline

Return a raw view of a column.

Returns
A of range of const std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 481 of file dataframe.hpp.

◆ raw_icols() [1/2]

auto hipipe::dataframe::raw_icols ( std::vector< std::size_t >  col_indexes)
inline

Return a raw view of multiple columns.

The data can be directly changed by writing to the view.

Example:

// get the third row from the sixth column (with index 5)
std::string field = df.raw_icols({1, 5})[1][2];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 368 of file dataframe.hpp.

◆ raw_icols() [2/2]

auto hipipe::dataframe::raw_icols ( std::vector< std::size_t >  col_indexes) const
inline

Return a raw view of multiple columns.

This is just a const overload of the non-const raw_icols().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 380 of file dataframe.hpp.

◆ raw_irows() [1/2]

auto hipipe::dataframe::raw_irows ( std::vector< std::size_t >  col_indexes)
inline

Return a raw view of multiple rows.

Example:

// get the third row from the sixth column (with index 5)
std::string field = df.raw_irows({3, 5})[2][1];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 592 of file dataframe.hpp.

◆ raw_irows() [2/2]

auto hipipe::dataframe::raw_irows ( std::vector< std::size_t >  col_indexes) const
inline

Return a raw view of multiple rows.

This is just a const overload of the non-const raw_irows().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 604 of file dataframe.hpp.

◆ raw_rows() [1/4]

auto hipipe::dataframe::raw_rows ( )
inline

Return a raw view of all rows.

Example:

// get the third row from the sixth column
std::string field = df.raw_rows()[2][5];
Returns
A range of ranges of std::string&.

Definition at line 567 of file dataframe.hpp.

◆ raw_rows() [2/4]

auto hipipe::dataframe::raw_rows ( ) const
inline

Return a raw view of all rows.

This is just a const overload of the non-const argument-less raw_rows().

Returns
A range of ranges of const std::string&.

Definition at line 577 of file dataframe.hpp.

◆ raw_rows() [3/4]

auto hipipe::dataframe::raw_rows ( const std::vector< std::string > &  col_names)
inline

Return a raw view of multiple rows.

Example:

// get the third row from column named "col2"
std::string field = df.raw_rows({"col1", "col2"})[2][1];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 620 of file dataframe.hpp.

◆ raw_rows() [4/4]

auto hipipe::dataframe::raw_rows ( const std::vector< std::string > &  col_names) const
inline

Return a raw view of multiple rows.

This is just a const overload of the non-const raw_rows().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 632 of file dataframe.hpp.

◆ rows()

template<typename... Ts>
auto hipipe::dataframe::rows ( const std::vector< std::string > &  col_names,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple rows.

This function provides the same data as cols() but transposed.

Example:

std::vector<std::tuple<int, double>> data =
df.rows<int, double>({"int_col", "double_col"});
Returns
A range of tuples of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 680 of file dataframe.hpp.


The documentation for this class was generated from the following file:
hipipe::dataframe::data
data_table_t & data()
Return a reference to the raw data table.
Definition: dataframe.hpp:836