Source code for tempor.datasources.prediction.one_off.plugin_sine

"""Module with the sine data source."""

from typing import Any

import numpy as np
import pandas as pd

from tempor.core import plugins
from tempor.data import dataset
from tempor.datasources import datasource


[docs]@plugins.register_plugin(name="sine", category="prediction.one_off", plugin_type="datasource") class SineDataSource(datasource.OneOffPredictionDataSource): def __init__( self, no: int = 100, seq_len: int = 10, temporal_dim: int = 5, static_dim: int = 4, freq_scale: float = 1, with_missing: bool = False, miss_ratio: float = 0.1, static_scale: float = 1.0, ts_scale: float = 1.0, random_state: int = 42, **kwargs: Any, ) -> None: """Sinusoidal data generation. See source code for the specifics. Args: no (int, optional): The number of samples. Defaults to ``100``. seq_len (int, optional): Sequence length of the time-series. Defaults to ``10``. temporal_dim (int, optional): Time-series feature dimensions. Defaults to ``5``. static_dim (int, optional): Static feature dimensions. Defaults to ``4``. freq_scale (float, optional): The frequency scaling multiplier for the signal (``sin(freq_scale * random_drawn_freq * x + phase)``). Defaults to ``1``. with_missing (bool, optional): Whether to generate missing data points (`np.nan`). Defaults to `False`. miss_ratio (float, optional): The ration of missing data points. Defaults to ``0.1``. static_scale (float, optional): The scaling factor to apply to the static data. Defaults to ``1.0``. ts_scale (float, optional): The scaling factor to apply to the time series data. Defaults to ``1.0``. random_state (int, optional): The random seed to set for `numpy.random.seed`. Defaults to ``42``. **kwargs (Any): Any additional keyword arguments will be passed to parent class constructor. """ super().__init__(**kwargs) self.no = no self.seq_len = seq_len self.temporal_dim = temporal_dim self.static_dim = static_dim self.freq_scale = freq_scale self.with_missing = with_missing self.miss_ratio = miss_ratio self.static_scale = static_scale self.ts_scale = ts_scale self.random_state = random_state
[docs] @staticmethod def url() -> None: # noqa: D102 return None
[docs] @staticmethod def dataset_dir() -> None: # noqa: D102 return None
[docs] def load(self, **kwargs: Any) -> dataset.OneOffPredictionDataset: # noqa: D102 # Initialize the output. np.random.seed(self.random_state) static_data = pd.DataFrame(np.random.rand(self.no, self.static_dim) * self.static_scale) static_data["sample_idx"] = [i for i in range(self.no)] static_data.set_index(keys=["sample_idx"], drop=True, inplace=True) static_data.columns = static_data.columns.astype(str) if self.with_missing: for col in static_data.columns: static_data.loc[static_data.sample(frac=self.miss_ratio).index, col] = np.nan temporal_data = [] outcome = pd.DataFrame(np.random.randint(0, 2, self.no)) outcome["sample_idx"] = [i for i in range(self.no)] outcome.columns = outcome.columns.astype(str) outcome.set_index(keys=["sample_idx"], drop=True, inplace=True) # Generate sine data. for i in range(self.no): # Initialize each time-series local = list() # For each feature seq_len = self.seq_len for k in range(self.temporal_dim): # pylint: disable=unused-variable # Randomly drawn frequency and phase: freq = np.random.beta(2, 2) phase = np.random.normal() # Generate sine signal based on the drawn frequency and phase: temp_data = [np.sin(self.freq_scale * freq * j + phase) * self.ts_scale for j in range(seq_len)] local.append(temp_data) # Align row/column. # DataFrame with index - time, and columns - temporal features. local_data = pd.DataFrame(np.transpose(np.asarray(local))) local_data.columns = local_data.columns.astype(str) if self.with_missing: for col in local_data.columns: local_data.loc[local_data.sample(frac=self.miss_ratio).index, col] = np.nan # Stack the generated data: local_data["sample_idx"] = i local_data["time_idx"] = list(range(seq_len)) temporal_data.append(local_data) time_series_df = pd.concat(temporal_data, ignore_index=True) time_series_df.set_index(keys=["sample_idx", "time_idx"], drop=True, inplace=True) time_series_df.sort_index(level=[0, 1], inplace=True) static_data.sort_index(inplace=True) outcome.sort_index(inplace=True) return dataset.OneOffPredictionDataset( time_series=time_series_df, targets=outcome, static=static_data, sample_index="sample_idx", time_index="time_idx", )