HiPipe  0.6.0
C++17 data pipeline with Python bindings.
Public Member Functions | List of all members
hipipe::dataframe Class Reference

Tabular object with convenient data access methods. More...

#include <hipipe/core/dataframe.hpp>

Public Member Functions

template<typename T >
 dataframe (std::vector< std::vector< T >> columns, std::vector< std::string > header={})
 
template<typename... Ts>
 dataframe (std::tuple< std::vector< Ts >... > columns, std::vector< std::string > header={})
 
template<typename Rng , typename ValueT = ranges::range_value_type_t<Rng>>
std::size_t insert_col (Rng &&rng, std::string col_name={}, std::function< std::string(const ValueT &)> cvt=static_cast< std::string(*)(const ValueT &)>(utility::to_string))
 
template<typename... Ts>
std::size_t insert_row (std::tuple< Ts... > row_tuple, std::tuple< std::function< std::string(const Ts &)>... > cvts=std::make_tuple(static_cast< std::string(*)(const Ts &)>(utility::to_string)...))
 
std::size_t insert_row (std::vector< std::string > row)
 
void drop_icol (std::size_t col_index)
 
void drop_col (const std::string &col_name)
 
void drop_row (const std::size_t row_idx)
 
auto raw_cols ()
 
auto raw_cols () const
 
auto raw_icols (std::vector< std::size_t > col_indexes)
 
auto raw_icols (std::vector< std::size_t > col_indexes) const
 
auto raw_cols (const std::vector< std::string > &col_names)
 
auto raw_cols (const std::vector< std::string > &col_names) const
 
template<typename... Ts>
auto icols (std::vector< std::size_t > col_indexes, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename... Ts>
auto cols (const std::vector< std::string > &col_names, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
auto raw_icol (std::size_t col_index)
 
auto raw_icol (std::size_t col_index) const
 
auto raw_col (const std::string &col_name)
 
auto raw_col (const std::string &col_name) const
 
template<typename T >
auto icol (std::size_t col_index, std::function< T(const std::string &)> cvt=utility::string_to< T >) const
 
template<typename T >
auto col (const std::string &col_name, std::function< T(const std::string &)> cvt=utility::string_to< T >) const
 
auto raw_rows ()
 
auto raw_rows () const
 
auto raw_irows (std::vector< std::size_t > col_indexes)
 
auto raw_irows (std::vector< std::size_t > col_indexes) const
 
auto raw_rows (const std::vector< std::string > &col_names)
 
auto raw_rows (const std::vector< std::string > &col_names) const
 
template<typename... Ts>
auto irows (std::vector< std::size_t > col_indexes, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename... Ts>
auto rows (const std::vector< std::string > &col_names, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename IndexT , typename ColT >
auto index_icol (std::size_t key_col_index, std::size_t val_col_index, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::function< ColT(const std::string &)> val_col_cvt=utility::string_to< ColT >) const
 
template<typename IndexT , typename ColT >
auto index_col (const std::string &key_col_name, const std::string &val_col_name, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::function< ColT(const std::string &)> val_col_cvt=utility::string_to< ColT >) const
 
template<typename IndexT , typename... Ts>
auto index_icols (std::size_t key_col_index, std::vector< std::size_t > val_col_indexes, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::tuple< std::function< Ts(const std::string &)>... > val_col_cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename IndexT , typename... Ts>
auto index_cols (const std::string &key_col_name, const std::vector< std::string > &val_col_names, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::tuple< std::function< Ts(const std::string &)>... > val_col_cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
std::size_t n_cols () const
 Return the number of columns.
 
std::size_t n_rows () const
 Return the number of rows (excluding header).
 
void header (std::vector< std::string > new_header)
 
std::vector< std::string > header () const
 Return the names of columns.
 
data_table_t & data ()
 Return a reference to the raw data table.
 
const data_table_t & data () const
 Return a const reference to the raw data table.
 

Detailed Description

Tabular object with convenient data access methods.

By default, all fields are stored as std::string and they are cast to the requested type on demand.

Definition at line 38 of file dataframe.hpp.

Constructor & Destructor Documentation

◆ dataframe() [1/2]

template<typename T >
hipipe::dataframe::dataframe ( std::vector< std::vector< T >>  columns,
std::vector< std::string >  header = {} 
)
inline

Constructs the dataset from a vector of columns of the same type.

Example:

dataframe df{
// columns
std::vector<std::vector<int>>{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}},
// header
std::vector<std::string>{"A", "B", "C"}
};
Exceptions
std::invalid_argument1) If the header is provided, but some of the column names are empty. 2) If the column sizes mismatch. 3) If the provided header does not match the number of provided columns.

Definition at line 185 of file dataframe.hpp.

◆ dataframe() [2/2]

template<typename... Ts>
hipipe::dataframe::dataframe ( std::tuple< std::vector< Ts >... >  columns,
std::vector< std::string >  header = {} 
)
inline

Constructs the dataset from a tuple of columns of possibly different types.

Example:

dataframe df{
// columns
std::make_tuple(
std::vector<int>{1, 2, 3},
std::vector<std::string>{"a1", "a2", "a3"},
std::vector<std::string>{"1.1", "1.2", "1.3"}
),
// header
std::vector<std::string>{"Id", "A", "B"}
};
Exceptions
std::invalid_argument1) If the header is provided, but some of the column names are empty. 2) If the column sizes mismatch. 3) If the provided header does not match the number of provided columns.

Definition at line 216 of file dataframe.hpp.

Member Function Documentation

◆ col()

template<typename T >
auto hipipe::dataframe::col ( const std::string &  col_name,
std::function< T(const std::string &)>  cvt = utility::string_to<T> 
) const
inline

Return a typed view of a column.

By default, this function does not provide a direct access to the stored data. Instead, each field is converted to the type T and a copy is returned.

Example:

std::vector<long> data = df.col<long>("long column");
Returns
A range of T.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 551 of file dataframe.hpp.

◆ cols()

template<typename... Ts>
auto hipipe::dataframe::cols ( const std::vector< std::string > &  col_names,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple columns.

Example:

std::tuple<std::vector<int>, std::vector<double>> data =
df.cols<int, double>({"column 1", "column 2"});
Returns
A tuple of ranges of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 451 of file dataframe.hpp.

◆ drop_col()

void hipipe::dataframe::drop_col ( const std::string &  col_name)
inline

Drop a column with the given name.

Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 312 of file dataframe.hpp.

◆ drop_icol()

void hipipe::dataframe::drop_icol ( std::size_t  col_index)
inline

Drop a column with the given index.

Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 296 of file dataframe.hpp.

◆ drop_row()

void hipipe::dataframe::drop_row ( const std::size_t  row_idx)
inline

Drop a row.

Exceptions
std::out_of_rangeIf the row is not in the dataframe.

Definition at line 321 of file dataframe.hpp.

◆ header()

void hipipe::dataframe::header ( std::vector< std::string >  new_header)
inline

Set the column names.

Exceptions
std::invalid_argument1) If some of the column names are empty. 2) If the header does not match the number of columns.

Definition at line 813 of file dataframe.hpp.

◆ icol()

template<typename T >
auto hipipe::dataframe::icol ( std::size_t  col_index,
std::function< T(const std::string &)>  cvt = utility::string_to<T> 
) const
inline

Return a typed view of a column.

By default, this function does not provide a direct access to the stored data. Instead, each field is converted to the type T and a copy is returned.

Example:

std::vector<long> data = df.icol<long>(3);
Returns
A range of T.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 532 of file dataframe.hpp.

◆ icols()

template<typename... Ts>
auto hipipe::dataframe::icols ( std::vector< std::size_t >  col_indexes,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple columns.

Example:

std::tuple<std::vector<int>, std::vector<double>> data = df.icols<int, double>({1, 2});
Returns
A tuple of ranges of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 429 of file dataframe.hpp.

◆ index_col()

template<typename IndexT , typename ColT >
auto hipipe::dataframe::index_col ( const std::string &  key_col_name,
const std::string &  val_col_name,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::function< ColT(const std::string &)>  val_col_cvt = utility::string_to<ColT> 
) const
inline

Return an indexed typed view of a single column.

std::unordered_map<int, double> mapper = df.index_col<int, double>("first", "second");

This function is the same as index_icol(), but columns are selected by name.

Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 725 of file dataframe.hpp.

◆ index_cols()

template<typename IndexT , typename... Ts>
auto hipipe::dataframe::index_cols ( const std::string &  key_col_name,
const std::vector< std::string > &  val_col_names,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::tuple< std::function< Ts(const std::string &)>... >  val_col_cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return an indexed typed view of multiple columns.

See index_icol().

std::unordered_map<int, std::tuple<long, double>> mapper =
df.index_cols<int, long, double>("id", {"col1", "col2"});

This function is similar to index_icols(), but columns are selected by name.

Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 778 of file dataframe.hpp.

◆ index_icol()

template<typename IndexT , typename ColT >
auto hipipe::dataframe::index_icol ( std::size_t  key_col_index,
std::size_t  val_col_index,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::function< ColT(const std::string &)>  val_col_cvt = utility::string_to<ColT> 
) const
inline

Return an indexed typed view of a single column.

This function returns a range of tuples, where the first tuple element is from the key column and the second element is from the value column. This range can be used to construct a map or a hashmap.

Example:

std::unordered_map<int, double> mapper = df.index_icol<int, double>(0, 1);
Parameters
key_col_indexIndex of the column to be used as key.
val_col_indexIndex of the column to be used as value.
key_col_cvtFunction that is used to convert the keys from std::string to IndexT.
val_col_cvtFunction that is used to convert the values from std::string to ValueT.
Returns
A range of tuples <key, value>.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 704 of file dataframe.hpp.

◆ index_icols()

template<typename IndexT , typename... Ts>
auto hipipe::dataframe::index_icols ( std::size_t  key_col_index,
std::vector< std::size_t >  val_col_indexes,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::tuple< std::function< Ts(const std::string &)>... >  val_col_cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return an indexed typed view of multiple columns.

See index_icol().

std::unordered_map<int, std::tuple<long, double>> mapper =
df.index_icols<int, long, double>(0, {1, 2});

This function is similar to index_icol(), but value type is a tuple of Ts.

Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 754 of file dataframe.hpp.

◆ insert_col()

template<typename Rng , typename ValueT = ranges::range_value_type_t<Rng>>
std::size_t hipipe::dataframe::insert_col ( Rng &&  rng,
std::string  col_name = {},
std::function< std::string(const ValueT &)>  cvt = static_cast<std::string (*)(const ValueT&)>(utility::to_string) 
)
inline

Inserts a new column to the dataframe.

Example:

df.insert_col(std::vector<int>{5, 6, 7}, "C");
Exceptions
std::invalid_argument1) If the dataframe has a header but no column name was provided. 2) If the column size is not equal to n_rows.

Definition at line 239 of file dataframe.hpp.

◆ insert_row() [1/2]

template<typename... Ts>
std::size_t hipipe::dataframe::insert_row ( std::tuple< Ts... >  row_tuple,
std::tuple< std::function< std::string(const Ts &)>... >  cvts = std::make_tuple(                               static_cast<std::string (*)(const Ts&)>(utility::to_string)...) 
)
inline

Inserts a new typed row to the dataframe.

Example:

df.insert_row(std::make_tuple(4, "a3", true));
Returns
The index of the new row.
Exceptions
std::invalid_argumentIf the row size is not equal to n_cols.

Definition at line 260 of file dataframe.hpp.

◆ insert_row() [2/2]

std::size_t hipipe::dataframe::insert_row ( std::vector< std::string >  row)
inline

Inserts a new raw row to the dataframe.

Example:

df.insert_row({"field 1", "field 2", "field 3"});
Returns
The index of the new row.
Exceptions
std::invalid_argumentIf the row size is not equal to n_cols.

Definition at line 282 of file dataframe.hpp.

◆ irows()

template<typename... Ts>
auto hipipe::dataframe::irows ( std::vector< std::size_t >  col_indexes,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple rows.

This function provides the same data as icols() but transposed.

Example:

std::vector<std::tuple<int, double>> data =
df.irows<int, double>({0, 2});
Returns
A range of tuples of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 654 of file dataframe.hpp.

◆ raw_col() [1/2]

auto hipipe::dataframe::raw_col ( const std::string &  col_name)
inline

Return a raw view of a column.

The data can be directly changed by writing to the view.

Example:

df.raw_col("long column")[2] = "new_value";
Returns
A of range of std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 499 of file dataframe.hpp.

◆ raw_col() [2/2]

auto hipipe::dataframe::raw_col ( const std::string &  col_name) const
inline

Return a raw view of a column.

This is just a const overload of the non-const raw_col().

Returns
A of range of const std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 511 of file dataframe.hpp.

◆ raw_cols() [1/4]

auto hipipe::dataframe::raw_cols ( )
inline

Return a raw view of all columns.

The data can be directly changed by writing to the view.

Example:

// get the third row from the sixth column
std::string field = df.raw_cols()[5][2];
Returns
A range of ranges of std::string&.

Definition at line 342 of file dataframe.hpp.

◆ raw_cols() [2/4]

auto hipipe::dataframe::raw_cols ( ) const
inline

Return a raw view of all columns.

This is just a const overload of the non-const argument-less raw_cols().

Returns
A range of ranges of const std::string&.

Definition at line 352 of file dataframe.hpp.

◆ raw_cols() [3/4]

auto hipipe::dataframe::raw_cols ( const std::vector< std::string > &  col_names)
inline

Return a raw view of multiple columns.

The data can be directly changed by writing to the view.

Example:

// get the sixth row from the column named "column 2"
std::string field = df.raw_cols({"column 1", "column 2"})[1][5];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 399 of file dataframe.hpp.

◆ raw_cols() [4/4]

auto hipipe::dataframe::raw_cols ( const std::vector< std::string > &  col_names) const
inline

Return a raw view of multiple columns.

This is just a const overload of the non-const raw_cols().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 411 of file dataframe.hpp.

◆ raw_icol() [1/2]

auto hipipe::dataframe::raw_icol ( std::size_t  col_index)
inline

Return a raw view of a column.

The data can be directly changed by writing to the view.

Example:

df.raw_icol(3)[2] = "new_value";
Returns
A of range of std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 472 of file dataframe.hpp.

◆ raw_icol() [2/2]

auto hipipe::dataframe::raw_icol ( std::size_t  col_index) const
inline

Return a raw view of a column.

Returns
A of range of const std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 482 of file dataframe.hpp.

◆ raw_icols() [1/2]

auto hipipe::dataframe::raw_icols ( std::vector< std::size_t >  col_indexes)
inline

Return a raw view of multiple columns.

The data can be directly changed by writing to the view.

Example:

// get the third row from the sixth column (with index 5)
std::string field = df.raw_icols({1, 5})[1][2];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 369 of file dataframe.hpp.

◆ raw_icols() [2/2]

auto hipipe::dataframe::raw_icols ( std::vector< std::size_t >  col_indexes) const
inline

Return a raw view of multiple columns.

This is just a const overload of the non-const raw_icols().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 381 of file dataframe.hpp.

◆ raw_irows() [1/2]

auto hipipe::dataframe::raw_irows ( std::vector< std::size_t >  col_indexes)
inline

Return a raw view of multiple rows.

Example:

// get the third row from the sixth column (with index 5)
std::string field = df.raw_irows({3, 5})[2][1];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 593 of file dataframe.hpp.

◆ raw_irows() [2/2]

auto hipipe::dataframe::raw_irows ( std::vector< std::size_t >  col_indexes) const
inline

Return a raw view of multiple rows.

This is just a const overload of the non-const raw_irows().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 605 of file dataframe.hpp.

◆ raw_rows() [1/4]

auto hipipe::dataframe::raw_rows ( )
inline

Return a raw view of all rows.

Example:

// get the third row from the sixth column
std::string field = df.raw_rows()[2][5];
Returns
A range of ranges of std::string&.

Definition at line 568 of file dataframe.hpp.

◆ raw_rows() [2/4]

auto hipipe::dataframe::raw_rows ( ) const
inline

Return a raw view of all rows.

This is just a const overload of the non-const argument-less raw_rows().

Returns
A range of ranges of const std::string&.

Definition at line 578 of file dataframe.hpp.

◆ raw_rows() [3/4]

auto hipipe::dataframe::raw_rows ( const std::vector< std::string > &  col_names)
inline

Return a raw view of multiple rows.

Example:

// get the third row from column named "col2"
std::string field = df.raw_rows({"col1", "col2"})[2][1];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 621 of file dataframe.hpp.

◆ raw_rows() [4/4]

auto hipipe::dataframe::raw_rows ( const std::vector< std::string > &  col_names) const
inline

Return a raw view of multiple rows.

This is just a const overload of the non-const raw_rows().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 633 of file dataframe.hpp.

◆ rows()

template<typename... Ts>
auto hipipe::dataframe::rows ( const std::vector< std::string > &  col_names,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple rows.

This function provides the same data as cols() but transposed.

Example:

std::vector<std::tuple<int, double>> data =
df.rows<int, double>({"int_col", "double_col"});
Returns
A range of tuples of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 676 of file dataframe.hpp.


The documentation for this class was generated from the following file: