Usage#

We start by exploring the data-processing pipeline part of DAMAST. We consider a manufactured dataset of Automatic Identification System (AIS) messages. The data is generated for 150 boats, where the minimal length of a trajectory is 30 messages, and the maximal length is 1000

!pip install damast

import polars
import damast.domains.maritime.ais.data_generator as generator

data = generator.AISTestData(number_of_trajectories=1000, min_length=25, max_length=300)
Requirement already satisfied: damast in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (0.1.12)
Requirement already satisfied: astropy in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (6.1.7)
Requirement already satisfied: cloudpickle in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (3.1.1)
Requirement already satisfied: keras>=3.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (3.11.3)
Requirement already satisfied: matplotlib in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (3.10.5)
Requirement already satisfied: numba in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (0.61.2)
Requirement already satisfied: numpy>=2 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (2.2.6)
Requirement already satisfied: polars>=1.20 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (1.32.3)
Requirement already satisfied: psutil in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (7.0.0)
Requirement already satisfied: pyais in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (2.13.1)
Requirement already satisfied: pyarrow in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (21.0.0)
Requirement already satisfied: pydantic>=2.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (2.11.7)
Requirement already satisfied: ratarmount>=1.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (1.2.0)
Requirement already satisfied: scikit-learn in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (1.7.1)
Requirement already satisfied: tables in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (3.10.1)
Requirement already satisfied: torch in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (2.8.0)
Requirement already satisfied: tqdm in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from damast) (4.67.1)
Requirement already satisfied: absl-py in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from keras>=3.0->damast) (2.3.1)
Requirement already satisfied: rich in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from keras>=3.0->damast) (14.1.0)
Requirement already satisfied: namex in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from keras>=3.0->damast) (0.1.0)
Requirement already satisfied: h5py in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from keras>=3.0->damast) (3.14.0)
Requirement already satisfied: optree in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from keras>=3.0->damast) (0.17.0)
Requirement already satisfied: ml-dtypes in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from keras>=3.0->damast) (0.5.3)
Requirement already satisfied: packaging in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from keras>=3.0->damast) (25.0)
Requirement already satisfied: annotated-types>=0.6.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from pydantic>=2.0->damast) (0.7.0)
Requirement already satisfied: pydantic-core==2.33.2 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from pydantic>=2.0->damast) (2.33.2)
Requirement already satisfied: typing-extensions>=4.12.2 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from pydantic>=2.0->damast) (4.15.0)
Requirement already satisfied: typing-inspection>=0.4.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from pydantic>=2.0->damast) (0.4.1)
Requirement already satisfied: ratarmountcore~=0.10.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (0.10.1)
Requirement already satisfied: mfusepy~=3.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from ratarmount>=1.1->damast) (3.0.0)
Requirement already satisfied: rarfile~=4.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (4.2)
Requirement already satisfied: indexed_zstd<2.0,>=1.2.2 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (1.6.1)
Requirement already satisfied: python-xz~=0.4.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (0.4.0)
Requirement already satisfied: rapidgzip~=0.15.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (0.15.2)
Requirement already satisfied: indexed_gzip~=1.7 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (1.10.1)
Requirement already satisfied: fast_zip_decryption in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (3.0.0)
Requirement already satisfied: libarchive-c<6.0,~=5.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (5.3)
Requirement already satisfied: py7zr~=1.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (1.0.0)
Requirement already satisfied: texttable in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from py7zr~=1.0->ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (1.7.0)
Requirement already satisfied: pycryptodomex>=3.20.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from py7zr~=1.0->ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (3.23.0)
Requirement already satisfied: brotli>=1.1.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from py7zr~=1.0->ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (1.1.0)
Requirement already satisfied: pyzstd>=0.16.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from py7zr~=1.0->ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (0.17.0)
Requirement already satisfied: pyppmd<1.3.0,>=1.1.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from py7zr~=1.0->ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (1.2.0)
Requirement already satisfied: pybcj<1.1.0,>=1.0.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from py7zr~=1.0->ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (1.0.6)
Requirement already satisfied: multivolumefile>=0.2.3 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from py7zr~=1.0->ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (0.2.3)
Requirement already satisfied: inflate64<1.1.0,>=1.0.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from py7zr~=1.0->ratarmountcore[7z,bzip2,fat,gzip,rar,xz,zip,zstd]~=0.10.0->ratarmount>=1.1->damast) (1.0.3)
Requirement already satisfied: pyerfa>=2.0.1.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from astropy->damast) (2.0.1.5)
Requirement already satisfied: astropy-iers-data>=0.2024.10.28.0.34.7 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from astropy->damast) (0.2025.8.25.0.36.58)
Requirement already satisfied: PyYAML>=3.13 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from astropy->damast) (6.0.2)
Requirement already satisfied: contourpy>=1.0.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from matplotlib->damast) (1.3.2)
Requirement already satisfied: cycler>=0.10 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from matplotlib->damast) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from matplotlib->damast) (4.59.1)
Requirement already satisfied: kiwisolver>=1.3.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from matplotlib->damast) (1.4.9)
Requirement already satisfied: pillow>=8 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from matplotlib->damast) (11.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from matplotlib->damast) (3.2.3)
Requirement already satisfied: python-dateutil>=2.7 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from matplotlib->damast) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib->damast) (1.17.0)
Requirement already satisfied: llvmlite<0.45,>=0.44.0dev0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from numba->damast) (0.44.0)
Requirement already satisfied: bitarray in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from pyais->damast) (3.7.0)
Requirement already satisfied: attrs in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from pyais->damast) (25.3.0)
Requirement already satisfied: markdown-it-py>=2.2.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from rich->keras>=3.0->damast) (3.0.0)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from rich->keras>=3.0->damast) (2.19.2)
Requirement already satisfied: mdurl~=0.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->keras>=3.0->damast) (0.1.2)
Requirement already satisfied: scipy>=1.8.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from scikit-learn->damast) (1.15.3)
Requirement already satisfied: joblib>=1.2.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from scikit-learn->damast) (1.5.1)
Requirement already satisfied: threadpoolctl>=3.1.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from scikit-learn->damast) (3.6.0)
Requirement already satisfied: numexpr>=2.6.2 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from tables->damast) (2.11.0)
Requirement already satisfied: py-cpuinfo in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from tables->damast) (9.0.0)
Requirement already satisfied: blosc2>=2.3.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from tables->damast) (3.7.2)
Requirement already satisfied: ndindex in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from blosc2>=2.3.0->tables->damast) (1.10.0)
Requirement already satisfied: msgpack in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from blosc2>=2.3.0->tables->damast) (1.1.1)
Requirement already satisfied: platformdirs in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from blosc2>=2.3.0->tables->damast) (4.3.8)
Requirement already satisfied: requests in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from blosc2>=2.3.0->tables->damast) (2.32.5)
Requirement already satisfied: charset_normalizer<4,>=2 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from requests->blosc2>=2.3.0->tables->damast) (3.4.3)
Requirement already satisfied: idna<4,>=2.5 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from requests->blosc2>=2.3.0->tables->damast) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from requests->blosc2>=2.3.0->tables->damast) (2.5.0)
Requirement already satisfied: certifi>=2017.4.17 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from requests->blosc2>=2.3.0->tables->damast) (2025.8.3)
Requirement already satisfied: filelock in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (3.19.1)
Requirement already satisfied: sympy>=1.13.3 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (1.14.0)
Requirement already satisfied: networkx in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (3.4.2)
Requirement already satisfied: jinja2 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (3.1.6)
Requirement already satisfied: fsspec in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (2025.7.0)
Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.93 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (12.8.93)
Requirement already satisfied: nvidia-cuda-runtime-cu12==12.8.90 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (12.8.90)
Requirement already satisfied: nvidia-cuda-cupti-cu12==12.8.90 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (12.8.90)
Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (9.10.2.21)
Requirement already satisfied: nvidia-cublas-cu12==12.8.4.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (12.8.4.1)
Requirement already satisfied: nvidia-cufft-cu12==11.3.3.83 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (11.3.3.83)
Requirement already satisfied: nvidia-curand-cu12==10.3.9.90 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (10.3.9.90)
Requirement already satisfied: nvidia-cusolver-cu12==11.7.3.90 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (11.7.3.90)
Requirement already satisfied: nvidia-cusparse-cu12==12.5.8.93 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (12.5.8.93)
Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (0.7.1)
Requirement already satisfied: nvidia-nccl-cu12==2.27.3 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (2.27.3)
Requirement already satisfied: nvidia-nvtx-cu12==12.8.90 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (12.8.90)
Requirement already satisfied: nvidia-nvjitlink-cu12==12.8.93 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (12.8.93)
Requirement already satisfied: nvidia-cufile-cu12==1.13.1.3 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (1.13.1.3)
Requirement already satisfied: triton==3.4.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from torch->damast) (3.4.0)
Requirement already satisfied: setuptools>=40.8.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from triton==3.4.0->torch->damast) (80.9.0)
Requirement already satisfied: mpmath<1.4,>=1.1.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from sympy>=1.13.3->torch->damast) (1.3.0)
Requirement already satisfied: MarkupSafe>=2.0 in /home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages (from jinja2->torch->damast) (3.0.2)

The data is stored in a polars.LazyFrame, and we can inspect the first and last 5 messages in the dataset.

print(data.dataframe)
shape: (160_798, 11)
┌───────────┬─────────────┬────────────┬──────────────┬───┬────────────┬─────┬────────────┬────────┐
│ mmsi      ┆ lon         ┆ lat        ┆ date_time_ut ┆ … ┆ nav_status ┆ rot ┆ message_nr ┆ source │
│ ---       ┆ ---         ┆ ---        ┆ c            ┆   ┆ ---        ┆ --- ┆ ---        ┆ ---    │
│ i64       ┆ f64         ┆ f64        ┆ ---          ┆   ┆ i64        ┆ f64 ┆ i64        ┆ str    │
│           ┆             ┆            ┆ str          ┆   ┆            ┆     ┆            ┆        │
╞═══════════╪═════════════╪════════════╪══════════════╪═══╪════════════╪═════╪════════════╪════════╡
│ 624781606 ┆ 46.671957   ┆ 29.128738  ┆ 1972-09-08   ┆ … ┆ 0          ┆ 0.0 ┆ 1          ┆ g      │
│           ┆             ┆            ┆ 06:21:44     ┆   ┆            ┆     ┆            ┆        │
│ 604927648 ┆ -138.892359 ┆ 30.879095  ┆ 1997-07-29   ┆ … ┆ 1          ┆ 0.0 ┆ 2          ┆ s      │
│           ┆             ┆            ┆ 22:41:58     ┆   ┆            ┆     ┆            ┆        │
│ 500375442 ┆ -98.899516  ┆ -85.038526 ┆ 1994-08-11   ┆ … ┆ 1          ┆ 0.0 ┆ 2          ┆ g      │
│           ┆             ┆            ┆ 23:59:52     ┆   ┆            ┆     ┆            ┆        │
│ 191856343 ┆ 140.360876  ┆ 64.983615  ┆ 1987-11-02   ┆ … ┆ 7          ┆ 0.0 ┆ 2          ┆ g      │
│           ┆             ┆            ┆ 12:56:22     ┆   ┆            ┆     ┆            ┆        │
│ 361758070 ┆ -160.089178 ┆ -69.483108 ┆ 1971-05-19   ┆ … ┆ 1          ┆ 0.0 ┆ 2          ┆ s      │
│           ┆             ┆            ┆ 15:32:44     ┆   ┆            ┆     ┆            ┆        │
│ …         ┆ …           ┆ …          ┆ …            ┆ … ┆ …          ┆ …   ┆ …          ┆ …      │
│ 649809939 ┆ 94.247519   ┆ -30.630982 ┆ null         ┆ … ┆ 0          ┆ 0.0 ┆ 3          ┆ g      │
│ 209762710 ┆ 80.342162   ┆ 87.195355  ┆ 1977-06-10   ┆ … ┆ 0          ┆ 0.0 ┆ 1          ┆ s      │
│           ┆             ┆            ┆ 03:41:05     ┆   ┆            ┆     ┆            ┆        │
│ 608026089 ┆ 18.136546   ┆ -80.601225 ┆ 1999-12-08   ┆ … ┆ 0          ┆ 0.0 ┆ 3          ┆ g      │
│           ┆             ┆            ┆ 10:23:36     ┆   ┆            ┆     ┆            ┆        │
│ 378222640 ┆ 34.989481   ┆ -39.538779 ┆ 1992-05-08   ┆ … ┆ 7          ┆ 0.0 ┆ 2          ┆ g      │
│           ┆             ┆            ┆ 00:37:27     ┆   ┆            ┆     ┆            ┆        │
│ 325517325 ┆ 23.458519   ┆ -4.7058    ┆ 2003-09-06   ┆ … ┆ 0          ┆ 0.0 ┆ 3          ┆ s      │
│           ┆             ┆            ┆ 20:17:31     ┆   ┆            ┆     ┆            ┆        │
└───────────┴─────────────┴────────────┴──────────────┴───┴────────────┴─────┴────────────┴────────┘

The dataset consists of 11 columns, which we will go through in detail.

Data-specification#

The Maritime Mobile Service Identity (MMSI) used to identify a ship. It should be a 9 digit number whose first integer should be between 2 and 7. The data we have generated should contain some invalid numbers. Let us inspect these.

from damast.domains.maritime.data_specification import MMSI
df = data.dataframe
invalid_mmsis = df.filter((polars.col('mmsi') < MMSI.min_value) | (polars.col('mmsi') > MMSI.max_value))
invalid_mmsis
shape: (13_211, 11)
mmsilonlatdate_time_utcsogcogtrue_headingnav_statusrotmessage_nrsource
i64f64f64strf64f64f64i64f64i64str
191856343140.36087664.983615"1987-11-02 12:56:22"16.328821-1.582761-1.49023270.02"g"
199362256-127.6895861.957546"2011-03-23 11:30:56"-3.963429-2.704236-2.67373800.02"s"
827208262-57.646735-50.072379"2021-12-30 07:20:45"-7.7712890.8789380.88217300.01"g"
81325169851.98028451.648132"2013-05-07 10:20:00"-7.5078343.235163.3025410.02"g"
197297559-160.93963287.823484"1984-01-30 11:07:54"-24.121599-1.257848-1.19635570.02"g"
811255383153.18972984.30045"2004-08-27 23:13:54"9.953916-1.323399-1.26790270.02"g"
820613276-126.627564-26.114735"2001-05-09 22:58:45"-26.2207487.226587.24306800.01"g"
80284249710.071865-82.916855"2019-01-24 14:58:02"2.176363-0.860705-0.81339200.02"s"
820389368173.203551-67.585602"2008-11-14 11:06:21"2.242550.8826470.96839100.03"s"
199362256-127.4960241.697298"2011-03-23 10:30:22"-6.903085-1.109969-1.07147810.02"s"

Before sending this data to a machine learning algorithm, one would have to filter out invalid data. We can do this by creating a damast.core.DataSpecification describing what valid output we would like in our data-frame.

from damast.core import DataSpecification, MinMax
mmsi_spec = DataSpecification(name="mmsi", description="Maritime Mobile Service Identity", representation_type=int,
                              value_range=MinMax(MMSI.min_value, MMSI.max_value))

We have here described what data this column is supposed to describe, how the data is represented in Python, and its minimum and maximum range. Next, we create a damast.core.MetaData object that we can apply to the dataframe.

from damast.core import MetaData,ValidationMode
metadata = MetaData([mmsi_spec])
metadata.apply(df.lazy(), ValidationMode.UPDATE_DATA)
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:617: UserWarning: DataSpecification.apply: column 'mmsi': expected representation type: <class 'int'>, but got 'Int64'
  warnings.warn(
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:626: UserWarning: Filtering out for column 'mmsi' values that are out of range.
  warnings.warn(
naive plan: (run LazyFrame.explain(optimized=True) to see the optimized plan)

FILTER [(col("mmsi")) <= (799999999)]

FROM

FILTER [(col("mmsi")) >= (200000000)]

FROM

WITH_COLUMNS:

[col("mmsi")]

DF ["mmsi", "lon", "lat", "date_time_utc", ...]; PROJECT */11 COLUMNS

Of course, we do not want to do this process manually per row. Therefore, we can create a DataSpecification per row, and let the damast.core.AnnotatedDataFrame handle the validation of the data. We can choose between three ways of handling the input data with metadata, we can either use:

  • ValidationMode.READONLY: Reads in the data, checks it against the meta-data and throws an error if the data does not adhere to the data-specification.

  • ValidationMode.UPDATE_METADATA: Update the metadata based on the input in the annotated data-frame. This might change the representation type, column name and valid rages of the data.

  • ValidationMode.UPDATE_DATA: Update data so that it adheres to the meta-data.

from damast.core.metadata import DataCategory
from damast.core.dataframe import AnnotatedDataFrame
dataspec = {
    "annotations": {"comment": "This is a autogenerated test data set"},
    "columns": [
        {"name": "mmsi", "is_optional": False, "category": DataCategory.STATIC,
         "value_range":{"MinMax": {"min": MMSI.min_value, "max": MMSI.max_value}}},
        {"name": "lon", "is_optional": False, "unit": "deg", "category": DataCategory.DYNAMIC},
        {"name": "lat", "is_optional": False, "unit": "deg", "category": DataCategory.DYNAMIC},
        {"name": "date_time_utc", "is_optional": False, "category": DataCategory.DYNAMIC},
        {"name": "sog", "is_optional": False, "category": DataCategory.DYNAMIC},
        {"name": "cog", "is_optional": False, "category": DataCategory.DYNAMIC},
        {"name": "true_heading", "is_optional": False, "category": DataCategory.DYNAMIC},
        {"name": "nav_status", "is_optional": False, "category": DataCategory.DYNAMIC},
        {"name": "rot", "is_optional": False, "category": DataCategory.DYNAMIC},
        {"name": "message_nr", "is_optional": False, "category": DataCategory.DYNAMIC},
        {"name": "source", "is_optional": False, "category": DataCategory.DYNAMIC},
    ]
}
metadata = MetaData.from_dict(dataspec)
data = generator.AISTestData(number_of_trajectories=1000, min_length=25, max_length=300)
adf = AnnotatedDataFrame(data.dataframe, metadata, validation_mode=ValidationMode.UPDATE_DATA)
adf
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:626: UserWarning: Filtering out for column 'mmsi' values that are out of range.
  warnings.warn(
naive plan: (run LazyFrame.explain(optimized=True) to see the optimized plan)

FILTER [([(col("mmsi")) >= (200000000)]) & ([(col("mmsi")) <= (799999999)])]

FROM

DF ["mmsi", "lon", "lat", "date_time_utc", ...]; PROJECT */11 COLUMNS

Data-processing#

Say we want to repeat this process on any data-set we read in. Then, we should create a damast.core.dataprocessing.DataProcessingPipeline. A pipeline consists of pipeline-elements, that is a set of transformations on the original dataset. We start by creating a Pipeline-element that drops all rows missing an "mmsi" entry.

from damast.data_handling.transformers.filters import DropMissingOrNan
from damast.core.dataprocessing import DataProcessingPipeline
pipeline = DataProcessingPipeline(name="Remove missing MMSI columns",
                                  base_dir="./output_dir",
                                  inplace_transformation=True)
pipeline.add(name="Remove MMSI column",
             transformer=DropMissingOrNan(),
             name_mappings={"x": "mmsi"})

transformed_adf = pipeline.transform(adf)
transformed_adf
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:654: UserWarning: Setting MinMax range (-14.72131553719547, 15.849677423035518) for cog
  warnings.warn(
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:654: UserWarning: Setting MinMax range (1970-02-02 04:09:14, 2022-12-07 18:10:35) for date_time_utc
  warnings.warn(
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:654: UserWarning: Setting MinMax range (-90.20437123237573, 90.70733286901888) for lat
  warnings.warn(
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:654: UserWarning: Setting MinMax range (-179.68976090924193, 179.76546628071202) for lon
  warnings.warn(
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:654: UserWarning: Setting MinMax range (1, 3) for message_nr
  warnings.warn(
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:654: UserWarning: Setting MinMax range (0, 7) for nav_status
  warnings.warn(
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:654: UserWarning: Setting MinMax range (0.0, 0.0) for rot
  warnings.warn(
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:654: UserWarning: Setting MinMax range (-59.18553421830903, 58.10775399219512) for sog
  warnings.warn(
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:654: UserWarning: Setting MinMax range (g, s) for source
  warnings.warn(
/home/runner/work/damast/damast/.tox/build_docs/lib/python3.10/site-packages/damast/core/metadata.py:654: UserWarning: Setting MinMax range (-14.72078441289966, 15.943930776321489) for true_heading
  warnings.warn(
naive plan: (run LazyFrame.explain(optimized=True) to see the optimized plan)

FILTER [(col("mmsi").is_nan()) !=v (true)]

FROM

FILTER col("mmsi").is_not_null()

FROM

FILTER [([(col("mmsi")) >= (200000000)]) & ([(col("mmsi")) <= (799999999)])]

FROM

DF ["mmsi", "lon", "lat", "date_time_utc", ...]; PROJECT */11 COLUMNS
transformed_adf.collect()
shape: (153_966, 11)
mmsilonlatdate_time_utcsogcogtrue_headingnav_statusrotmessage_nrsource
i64f64f64strf64f64f64i64f64i64str
59217818494.53908-52.105096"2019-05-23 14:59:26"11.591497.0047257.02304470.02"s"
454517013-68.947609-22.701939"2003-03-25 01:06:16"-1.1468520.4235130.42906610.01"g"
597589629-139.536821-82.887887"1996-10-29 12:05:36"-21.3872632.4107312.50967970.02"s"
345388503138.609988-20.473397"2015-10-19 10:02:17"10.0516554.6509934.71260610.01"g"
590585248-85.906616-39.436589"2016-12-22 16:02:15"10.948951-2.793369-2.76791570.01"g"
650489867-35.10660556.990558"1990-07-19 21:25:04"6.3461480.2108540.26349900.01"g"
2247980948.977763-46.36523null-3.931012-1.361354-1.26399970.01"s"
702302236-18.61865768.028834"2022-03-02 17:22:11"-4.144045-0.71478-0.68590900.01"g"
544366252-53.0978666.006257"2021-06-24 16:16:14"4.6710833.0074423.06298570.02"g"
377546132-98.05276471.009025"1979-05-09 07:46:26"-9.0559660.42320.51961810.02"g"