Source code for tempor.models.clairvoyance2.utils.common

# mypy: ignore-errors

from typing import Any, Iterable, Mapping, Sequence, Tuple, Union

import numpy as np
import pandas as pd
from dotmap import DotMap


def _extract_np_dtype_from_pd(_type: type) -> type:
    """Helper to handle `pandas` references to `numpy` dtypes, e.g. `dtype('int64')`.
    If `_type` is a `pandas` reference to a `numpy` dtype, will return the underlying `numpy` dtype.
    Otherwise will return `_type` as passed.
    """
    try:
        _type = _type.type  # type: ignore
    except AttributeError:
        pass
    return _type


# For the purposes of data type comparisons in this library,
# we assume the following numpy types be equivalent to Python types.
NP_EQUIVALENT_TYPES_MAP: Mapping[type, type] = {
    np.int_: int,
    np.int32: int,
    np.int64: int,
    np.float_: float,
    np.float32: float,
    np.float64: float,
    np.object_: object,
}


def _np_dtype_to_python_type(dtype: type) -> type:
    if dtype in NP_EQUIVALENT_TYPES_MAP:
        return NP_EQUIVALENT_TYPES_MAP[dtype]
    else:
        return dtype


[docs]def python_type_from_np_pd_dtype(dtype: type) -> type:
    return _np_dtype_to_python_type(_extract_np_dtype_from_pd(dtype))


[docs]def isinstance_compat_np_pd_dtypes(o: Any, _type: type) -> bool:
    return issubclass(python_type_from_np_pd_dtype(type(o)), python_type_from_np_pd_dtype(_type))


[docs]def isnan(value: Union[int, float]) -> bool:
    if not isinstance(value, (int, float)):
        raise TypeError(f"Value of type {type(value)} is not supported")
    try:
        isnan_ = bool(np.isnan(value))  # numpy.bool_ --> bool
    except TypeError:  # pylint: disable=broad-except
        isnan_ = False
    return isnan_


[docs]def equal_or_nans(a: Any, b: Any) -> bool:
    a_isnan = isnan(a)
    b_isnan = isnan(b)
    if a_isnan:
        return True if b_isnan else False
    else:
        return a == b


TSequenceForRollingWindow = Union[Sequence, np.ndarray]  # np.ndarray isn't considered a Sequence but is suitable here.


[docs]def rolling_window(
    sequence: TSequenceForRollingWindow, window: int, expand: str = "neither"
) -> Tuple[TSequenceForRollingWindow, ...]:
    # TODO: Efficiency.
    if window <= 0:
        raise ValueError(f"`window` must be > 0, was {window}")
    len_ = len(sequence)
    if expand in ("left", "right", "both"):
        window = min(window, len_)
        if expand in ("left", "both"):
            slices = [(0, x) for x in range(1, window + 1)]
        else:
            slices = []
        if expand in ("right", "both"):
            slices += [(x, min(x + window, len_)) for x in range(len_)]
        else:
            slices += [(x, x + window) for x in range(len_ - window + 1)]
        slices = list(sorted(set(slices)))
    else:
        slices = [(x, x + window) for x in range(len_ - window + 1)]
    return tuple([sequence[s0:s1] for s0, s1 in slices])


[docs]def empty_df_like(like_df: pd.DataFrame) -> pd.DataFrame:
    df = like_df.iloc[:0, :].copy()  # Empty df with right dtypes etc.
    return df


[docs]def is_namedtuple(o: Any) -> bool:
    if isinstance(o, DotMap):
        return False  # Needed as the below hasattr checks will always return true for DotMap.
    # Credit for below line: https://stackoverflow.com/a/62692640
    return isinstance(o, tuple) and hasattr(o, "_asdict") and hasattr(o, "_fields")


[docs]def safe_init_dotmap(o: object) -> DotMap:
    return DotMap(o, _dynamic=False)


[docs]def split_multi_index_dataframe(df: pd.DataFrame) -> Iterable[pd.DataFrame]:
    if not isinstance(df.index, pd.MultiIndex):
        raise ValueError("Data frame did not have a multi-index.")
    iter_index = list(df.index.levels[0])
    return (df.loc[idx, :] for idx in iter_index)


[docs]def df_eq_indicator(df: pd.DataFrame, indicator: float) -> pd.DataFrame:
    if not isnan(indicator):
        return df == indicator
    else:
        return df.isnull()