damast.core.polars_dataframe#

Attributes#

Classes#

Module Contents#

damast.core.polars_dataframe.logger#
damast.core.polars_dataframe.VAEX_HDF5_ROOT: str = '/table'#
damast.core.polars_dataframe.VAEX_HDF5_COLUMNS: str#
class damast.core.polars_dataframe.Meta#

Bases: type

_base_impl: ClassVar[str] = 'polars'#
__getattr__(attr_name)#
class damast.core.polars_dataframe.PolarsDataFrame(df: polars.LazyFrame | polars.DataFrame)#
_dataframe: polars.LazyFrame#
property dataframe: PolarsDataFrame#

Allows to access the underlying dataframe directly.

Note

AnnotatedDataFrame behaves like a polars.LazyFrame, so typically you will not need to access the dataframe through this property.

Returns:

The underlying dataframe

__getitem__(column_name: str)#

Make dataframe subscriptable and behave more like the pandas.DataFrame.

Parameters:

item – Name of the key when using [] operators

Returns:

item/column from the underlying vaex.dataframe

property column_names: list[str]#

Get all column names (without collecting the full dataframe)

dtype(column_name: str) polars.datatypes.DataType#

Get column dtype (without collecting the full dataframe)

minmax(column_name: str) Tuple[any, any]#

Tuple of min and max values of the given column

set_dtype(column_name, representation_type) PolarsDataFrame#

Set the dtype for a column to the given representation type. Using polars cast functionality :return: The updated object

__getattr__(attr_name)#

Ensure that this object behaves like a polars.LazyFrame.

Parameters:

attr_name – Attribute / Name of column

Returns:

The column data

__setitem__(key, values)#

Set the column for the annotated dataframe, and allow to behave like the polars.Dataframe.

Parameters:
  • key – Column name

  • value – Value to set the column to

__len__() int#

Get the length of the (underlying) dataframe.

Returns:

Length of the dataframe

equals(other: PolarsDataFrame) bool#
open(sep=',') DataFrame#
classmethod from_vaex_hdf5(path: str | pathlib.Path) Tuple[DataFrame, damast.core.metadata.MetaData]#

Load hdf5 file and (damast) metadata if found in the file.

classmethod import_hdf5(filename: str | pathlib.Path) Tuple[DataFrame, MetaData]#

Import a dataframe stored as HDF5.

This method tries to load using pandas first, then falls back to reading a vaex-based format using pytables.

classmethod export_hdf5(df: DataFrame, path: str | pathlib.Path) pathlib.Path#

Export the dataframe as hdf5. Please use only if really needed, otherwise, stick with the default format (parquet).