Source code for tempor.automl.seeker

"""Module containing the interface for, and the implemented hyperparameter seekers."""

import abc
import copy
import functools
import warnings
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast

import numpy as np
import optuna
import pydantic
from typing_extensions import Literal, Type, get_args

from tempor import plugin_loader
from tempor.benchmarks import evaluation
from tempor.core import pydantic_utils
from tempor.core.types import PredictiveTaskType
from tempor.data import data_typing
from tempor.data.dataset import PredictiveDataset, TimeToEventAnalysisDataset
from tempor.log import logger
from tempor.methods.core import BasePredictor
from tempor.methods.core.params import Params
from tempor.metrics import metric_typing

from ._types import AutoMLCompatibleEstimator
from .pipeline_selector import (
    DEFAULT_STATIC_IMPUTERS,
    DEFAULT_STATIC_SCALERS,
    DEFAULT_TEMPORAL_IMPUTERS,
    DEFAULT_TEMPORAL_SCALERS,
    PipelineSelector,
)
from .tuner import BaseTuner, OptunaTuner

TunerType = Literal[
    "bayesian",
    "random",
    "cmaes",
    "qmc",
    "grid",
]
"""Hyperparameter tuner to use.

Available options:
  - `"bayesian"`: Use a tuner based on `optuna.samplers.TPESampler`.
  - `"random"`: Use a tuner based on `optuna.samplers.RandomSampler`.
  - `"cmaes"`: Use a tuner based on `optuna.samplers.CmaEsSampler`.
  - `"qmc"`: Use a tuner based on `optuna.samplers.QMCSampler`.
  - `"grid"`: Use a tuner based on `optuna.samplers.GridSampler`.
"""

TUNER_OPTUNA_SAMPLER_MAP: Dict[TunerType, Any] = {
    "bayesian": optuna.samplers.TPESampler,
    "random": optuna.samplers.RandomSampler,
    "cmaes": optuna.samplers.CmaEsSampler,
    "qmc": optuna.samplers.QMCSampler,
    "grid": optuna.samplers.GridSampler,
}
"""A map from `TunerType` to the corresponding `optuna` sampler class"""

# TODO: Parallelism support (per-estimator).


[docs]def evaluation_callback_dispatch( estimator: Type[BasePredictor], dataset: PredictiveDataset, task_type: PredictiveTaskType, metric: str, n_cv_folds: int, random_state: int, horizon: Optional[data_typing.TimeIndex], raise_exceptions: bool, silence_warnings: bool, *args: Any, **kwargs: Any, ) -> float: """Perform evaluation of ``estimator`` (of task type ``task_type``) on ``dataset``, using the appropriate evaluation function from the `tempor.benchmarks.evaluation` module. Args: estimator (Type[BasePredictor]): The predictor estimator class to use. dataset (PredictiveDataset): The dataset to use. task_type (PredictiveTaskType): The task type of the predictor. metric (str): The metric to be used for evaluation. n_cv_folds (int): Number of cross-validation folds to use. random_state (int): Random state used for data splitting. horizon (Optional[data_typing.TimeIndex]): The prediction horizon. Applicable to the "time_to_event" task case. raise_exceptions (bool): If set to `True`, if an exception is raised during evaluation, this will be raised and execution will be terminated. Otherwise the exception will be ignored and a dummy value returned. silence_warnings (bool, optional): Whether to silence warnings raised. Defaults to `False`. *args (Any): Positional arguments to pass to the estimator constructor. **kwargs (Any): Keyword arguments to pass to the estimator constructor. Returns: float: The mean evaluation metric across the cross-validation folds. """ model = estimator(*args, **kwargs) # TODO: Handle missing cases. if task_type == "prediction.one_off.classification": metrics = evaluation.evaluate_prediction_oneoff_classifier( model, dataset, n_splits=n_cv_folds, random_state=random_state, raise_exceptions=raise_exceptions, silence_warnings=silence_warnings, ) elif task_type == "prediction.one_off.regression": metrics = evaluation.evaluate_prediction_oneoff_regressor( model, dataset, n_splits=n_cv_folds, random_state=random_state, raise_exceptions=raise_exceptions, silence_warnings=silence_warnings, ) elif task_type == "prediction.temporal.classification": # pragma: no cover raise NotImplementedError elif task_type == "prediction.temporal.regression": # pragma: no cover raise NotImplementedError elif task_type == "time_to_event": if not isinstance(dataset, TimeToEventAnalysisDataset): raise ValueError( f"`dataset` must be of type {TimeToEventAnalysisDataset.__name__} for the '{task_type}' task" ) if horizon is None: raise ValueError(f"`horizon` must not be None for the '{task_type}' task") metrics = evaluation.evaluate_time_to_event( model, dataset, horizons=horizon, n_splits=n_cv_folds, random_state=random_state, raise_exceptions=raise_exceptions, silence_warnings=silence_warnings, ) elif task_type == "treatments.one_off.classification": # pragma: no cover raise NotImplementedError elif task_type == "treatments.one_off.regression": # pragma: no cover raise NotImplementedError elif task_type == "treatments.temporal.classification": # pragma: no cover raise NotImplementedError elif task_type == "treatments.temporal.regression": # pragma: no cover raise NotImplementedError else: # pragma: no cover raise ValueError(f"Unknown evaluation case: {task_type}") return metrics.loc[metric, "mean"] # pyright: ignore
[docs]class BaseSeeker(abc.ABC): @pydantic_utils.validate_arguments(config=pydantic.ConfigDict(arbitrary_types_allowed=True)) def __init__( # pylint: disable=unused-argument self, study_name: str, task_type: PredictiveTaskType, estimator_names: List[str], estimator_defs: List[Any], metric: str, dataset: PredictiveDataset, *, return_top_k: int = 3, num_cv_folds: int = 5, num_iter: int = 100, tuner_patience: int = 5, tuner_type: TunerType = "bayesian", timeout: int = 360, random_state: int = 0, override_hp_space: Optional[Dict[str, List[Params]]] = None, horizon: Optional[data_typing.TimeIndex] = None, compute_baseline_score: bool = False, grid: Optional[Dict[str, Dict[str, Any]]] = None, custom_tuner: Optional[BaseTuner] = None, raise_exceptions: bool = True, silence_warnings: bool = False, **kwargs: Any, ) -> None: """The base class for an AutoML Seeker, to be derived from by concrete implementations. Provides an AutoML interface, in particular, the ``search`` method. Args: study_name (str): The name of the AutoML study (that is, the set of all individual AutoML trials). task_type (PredictiveTaskType): The task type of the predictor estimators to be searched. estimator_names (List[str]): Friendly names of estimators. Will be passed one-by-one to ``_init_estimator`` method calls. estimator_defs (List[Any]): Definition of estimators. Will be passed one-by-one to ``_init_estimator`` method calls. metric (str): The metric to use for evaluation. dataset (PredictiveDataset): The dataset to use for evaluation. return_top_k (int, optional): How many best estimators to return. Defaults to ``3``. num_cv_folds (int, optional): How many cross-validation folds to use. Defaults to ``5``. num_iter (int, optional): Number of AutoML iterations. Defaults to ``100``. tuner_patience (int, optional): Patience of the AutoML tuner (for early-stopping). Defaults to ``5``. tuner_type (TunerType, optional): The type of AutoML tuner to use. Defaults to ``"bayesian"``. timeout (int, optional): AutoML optimization run time out (seconds). Defaults to ``360``. random_state (int, optional): Random state to use. Defaults to ``0``. override_hp_space (Optional[Dict[str, List[Params]]], optional): A dictionary with ``estimator_names`` keys and the hyperparameter space overrides values, e.g. ``{"my_estimator_A": [IntegerParams("some_param", low=1, high=100), ...], "my_estimator_B": ....}``. Defaults to `None`. horizon (Optional[data_typing.TimeIndex], optional): The prediction horizon for evaluation. Applicable to the "time_to_event" task case. Defaults to `None`. compute_baseline_score (bool, optional): (If supported by the ``Seeker`` implementation.) Whether to run a baseline trial and compute its score. A baseline trial is a trial with all the default parameters. Defaults to `False`. grid (Optional[Dict[str, Dict[str, Any]]], optional): (If supported by the ``Seeker`` implementation; only relevant to `"grid"` tuner type) The grid for the grid search tuner type. Keys are ``estimator_names`, values are ``(param_name: str -> List[param_value_candidate: Any])``. Defaults to `None`. custom_tuner (Optional[BaseTuner], optional): Pass a ``custom_tuner`` to override the default AutoML tuner for ``tuner_type``. Defaults to `None`. raise_exceptions (bool, optional): If set to `True`, if an exception is raised during AutoML study run, this will be raised and execution will be terminated. Otherwise the exception will be ignored. Defaults to `True`. silence_warnings (bool, optional): Whether to silence warnings raised. Some dependencies (e.g. `xgbse`) may circumvent this and raise warnings regardless. Defaults to `False`. **kwargs (Any): Currently unused. Raises: ValueError: If incompatible / invalid input arguments have been passed. """ if override_hp_space is None: override_hp_space = dict() self.study_name = study_name self.task_type: PredictiveTaskType = task_type self.metric = metric self.dataset = dataset self.return_top_k = return_top_k self.num_cv_folds = num_cv_folds self.num_iter = num_iter self.tuner_patience = tuner_patience self.tuner_type: TunerType = tuner_type self.timeout = timeout self.random_state = random_state self.horizon = horizon self.compute_baseline_score = compute_baseline_score self.grid = grid self.raise_exceptions = raise_exceptions self.silence_warnings = silence_warnings self._metric_object = plugin_loader.get(f"{task_type}.{self.metric}", plugin_type="metric") if len(estimator_defs) != len(estimator_names): raise ValueError("`estimator_defs` and `estimator_names` must be the same length.") self.estimator_names = estimator_names self.estimator_defs = estimator_defs # Validate "grid" case: if self.tuner_type == "grid": if self.grid is None: raise ValueError("Must provide `grid` argument when using the 'grid' search method") if self.estimator_names != list(self.grid.keys()): raise ValueError( f"`grid` keys ({list(self.grid.keys())}) did not match " f"estimator names ({self.estimator_names})" ) # Validate and set override_hp_space: for key in override_hp_space.keys(): if key not in self.estimator_names: raise ValueError( f"Estimator name '{key}' in `override_hp_space` was found that did not " f"correspond to any of the estimators provided: {self.estimator_names}" ) self.override_hp_space = override_hp_space # Validate custom tuner. if isinstance(custom_tuner, OptunaTuner): if isinstance(custom_tuner.sampler, optuna.samplers.GridSampler): raise ValueError("Passing a custom tuner with `optuna.samplers.GridSampler` is not supported") self.custom_tuner = custom_tuner self.direction: metric_typing.MetricDirection = self._metric_object.direction self.estimators: List[AutoMLCompatibleEstimator] = [] self.tuners: List[BaseTuner] = [] self._set_up_tuners() @abc.abstractmethod def _init_estimator(self, estimator_name: str, estimator_def: Any) -> AutoMLCompatibleEstimator: # pragma: no cover """Abstract method for overriding by concrete ``Seeker`` implementations. Create an AutoML estimator based on its name (``estimator_name``) and any other information provided in its definition (``estimator_def``). Args: estimator_name (str): Estimator friendly name. estimator_def (Any): Estimator definition - can be any data needed for creating the estimator. Returns: AutoMLCompatibleEstimator: The created estimator. """ ... # pylint: disable=unnecessary-ellipsis @abc.abstractmethod def _create_estimator_with_hps( self, estimator_cls: AutoMLCompatibleEstimator, hps: Dict[str, Any], score: float, ) -> BasePredictor: # pragma: no cover """Abstract method for overriding by concrete ``Seeker`` implementations. Create a concrete `BasePredictor` instance based on the hyperparameters provided. Used when a trial is completed and a set of parameters has been sampled. Args: estimator_cls (AutoMLCompatibleEstimator): The AutoML-compatible estimator to facilitate the creation of the instance. hps (Dict[str, Any]): Hyperparameter dictionary. score (float): The score corresponding to this trail (for information). Returns: BasePredictor: Predictor instance to return. """ ... # pylint: disable=unnecessary-ellipsis def _set_up_tuners(self) -> None: logger.info(f"Setting up estimators and tuners for study {self.study_name}.") for estimator_name, estimator_def in zip(self.estimator_names, self.estimator_defs): # Set up estimator. EstimatorCls = self._init_estimator(estimator_name=estimator_name, estimator_def=estimator_def) self.estimators.append(EstimatorCls) # Set up tuner. if self.custom_tuner is None: # Case: Default tuners based on tuner_type etc. # Pruner: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=optuna.exceptions.ExperimentalWarning) pruner = optuna.pruners.PatientPruner(None, self.tuner_patience) # Sampler: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=optuna.exceptions.ExperimentalWarning) SamplerCls = TUNER_OPTUNA_SAMPLER_MAP[self.tuner_type] if self.tuner_type != "grid": sampler = SamplerCls(seed=self.random_state) else: if TYPE_CHECKING: # pragma: no cover assert self.grid is not None # nosec B101 sampler = SamplerCls(seed=self.random_state, search_space=self.grid[estimator_name]) # Instantiate: if self.tuner_type in TUNER_OPTUNA_SAMPLER_MAP: tuner = OptunaTuner( study_name=f"{self.study_name}_{estimator_name}", direction=self.direction, study_sampler=sampler, study_storage=None, study_pruner=pruner, study_load_if_exists=False, ) self.tuners.append(tuner) else: # pragma: no cover # Should not get here as there is pydantic validation. raise ValueError(f"Unsupported tuner type: {self.tuner_type}. Available: {get_args(TunerType)}.") else: # Case: Custom tuner. # Copy the tuner for each estimator. if TYPE_CHECKING: # pragma: no cover assert isinstance(self.custom_tuner, OptunaTuner) # nosec B101 study_name = f"{self.study_name}_{estimator_name}" custom_tuner = copy.deepcopy(self.custom_tuner) custom_tuner.study_name = study_name # Update the study name to be unique. custom_tuner.create_study() self.tuners.append(custom_tuner)
[docs] def search(self) -> Tuple[List[BasePredictor], List[float]]: """Perform AutoML search. Returns: Tuple[List[BasePredictor], List[float]]: ``(best_estimators, best_scores)``, the best estimators and the corresponding base scores returned. """ search_results: List[Tuple[List[float], List[Dict]]] = [] for idx, (estimator_name, estimator_cls, tuner) in enumerate( zip(self.estimator_names, self.estimators, self.tuners) ): logger.info(f"Running search for estimator '{estimator_name}' {idx+1}/{len(self.estimators)}.") if estimator_name in self.override_hp_space: override = self.override_hp_space[estimator_name] else: override = None estimator_results = tuner.tune( estimator=estimator_cls, dataset=self.dataset, evaluation_callback=functools.partial( evaluation_callback_dispatch, task_type=self.task_type, metric=self.metric, n_cv_folds=self.num_cv_folds, random_state=self.random_state, horizon=self.horizon, raise_exceptions=self.raise_exceptions, silence_warnings=self.silence_warnings, ), override_hp_space=override, compute_baseline_score=self.compute_baseline_score, # NOTE: The below is OptunaTuner-only kwarg: optimize_kwargs=dict(n_trials=self.num_iter, timeout=self.timeout), ) search_results.append(estimator_results) all_estimators = [] all_scores = [] all_hps = [] for idx, (scores, hps) in enumerate(search_results): best_idx = np.argmin(scores) if self.direction == "minimize" else np.argmax(scores) all_scores.append(scores[best_idx]) all_hps.append(hps[best_idx]) all_estimators.append(self.estimators[idx]) logger.info(f"Evaluation for {self.estimator_names[idx]} scores:\n{scores}.") logger.info(f"All estimator definitions searched:\n{self.estimator_names}") logger.info(f"Best scores for each estimator searched:\n{all_scores}") logger.info(f"Best hyperparameters for each estimator searched:\n{all_hps}") all_scores_np = np.array(all_scores) top_k = min(self.return_top_k, len(all_scores)) unique_sorted = np.sort(np.unique(all_scores_np.ravel())) if self.direction == "minimize": unique_sorted = unique_sorted[::-1] top_k_scores = unique_sorted[-top_k:] result: List[Any] = [] result_scores: List[float] = [] for score in reversed(top_k_scores): result_scores.append(score) idx = np.argwhere(all_scores_np == score)[0][0] estimator_top_k = self._create_estimator_with_hps(all_estimators[idx], all_hps[idx], score) result.append(estimator_top_k) return result, result_scores
[docs]class MethodSeeker(BaseSeeker): @pydantic_utils.validate_arguments(config=pydantic.ConfigDict(arbitrary_types_allowed=True)) def __init__( self, study_name: str, task_type: PredictiveTaskType, estimator_names: List[str], metric: str, dataset: PredictiveDataset, *, return_top_k: int = 3, num_cv_folds: int = 5, num_iter: int = 100, tuner_patience: int = 5, tuner_type: TunerType = "bayesian", timeout: int = 360, random_state: int = 0, override_hp_space: Optional[Dict[str, List[Params]]] = None, horizon: Optional[data_typing.TimeIndex] = None, compute_baseline_score: bool = False, grid: Optional[Dict[str, Dict[str, Any]]] = None, custom_tuner: Optional[BaseTuner] = None, raise_exceptions: bool = True, silence_warnings: bool = False, **kwargs: Any, ) -> None: """An AutoML seeker which will search the hyperparameter space of each of the predictor estimators defined in ``estimator_names`` for the ``task_type`` task setting. Args: study_name (str): See `~tempor.automl.seeker.BaseSeeker`. task_type (PredictiveTaskType): See `~tempor.automl.seeker.BaseSeeker`. estimator_names (List[str]): The candidate predictors. Provide plugin names (without category qualification), e.g. like ``["nn_classifier", "cde_classifier"]``. metric (str): See `~tempor.automl.seeker.BaseSeeker`. dataset (PredictiveDataset): See `~tempor.automl.seeker.BaseSeeker`. return_top_k (int, optional): See `~tempor.automl.seeker.BaseSeeker`. num_cv_folds (int, optional): See `~tempor.automl.seeker.BaseSeeker`. num_iter (int, optional): See `~tempor.automl.seeker.BaseSeeker`. tuner_patience (int, optional): See `~tempor.automl.seeker.BaseSeeker`. tuner_type (TunerType, optional): See `~tempor.automl.seeker.BaseSeeker`. timeout (int, optional): See `~tempor.automl.seeker.BaseSeeker`. random_state (int, optional): See `~tempor.automl.seeker.BaseSeeker`. override_hp_space (Optional[Dict[str, List[Params]]], optional): See `~tempor.automl.seeker.BaseSeeker`. horizon (Optional[data_typing.TimeIndex], optional): See `~tempor.automl.seeker.BaseSeeker`. compute_baseline_score (bool, optional): See `~tempor.automl.seeker.BaseSeeker`. grid (Optional[Dict[str, Dict[str, Any]]], optional): See `~tempor.automl.seeker.BaseSeeker`. custom_tuner (Optional[BaseTuner], optional): See `~tempor.automl.seeker.BaseSeeker`. raise_exceptions (bool, optional): See `~tempor.automl.seeker.BaseSeeker`. silence_warnings (bool, optional): See `~tempor.automl.seeker.BaseSeeker`. **kwargs (Any): See `~tempor.automl.seeker.BaseSeeker`. """ estimator_defs = estimator_names super().__init__( study_name, task_type, estimator_names, estimator_defs, metric, dataset, return_top_k=return_top_k, num_cv_folds=num_cv_folds, num_iter=num_iter, tuner_patience=tuner_patience, tuner_type=tuner_type, timeout=timeout, random_state=random_state, override_hp_space=override_hp_space, horizon=horizon, compute_baseline_score=compute_baseline_score, grid=grid, custom_tuner=custom_tuner, raise_exceptions=raise_exceptions, silence_warnings=silence_warnings, **kwargs, ) def _init_estimator(self, estimator_name: str, estimator_def: Any) -> AutoMLCompatibleEstimator: logger.info(f"Creating estimator {estimator_name}.") estimator_fqn = f"{self.task_type}.{estimator_def}" return plugin_loader.get_class(estimator_fqn) def _create_estimator_with_hps( self, estimator_cls: AutoMLCompatibleEstimator, hps: Dict[str, Any], score: float ) -> BasePredictor: estimator_cls = cast(Type[BasePredictor], estimator_cls) logger.info(f"Selected score {score} for {estimator_cls.name} with hyperparameters:\n{hps}") return estimator_cls(**hps)
[docs]class PipelineSeeker(BaseSeeker): @pydantic_utils.validate_arguments(config=pydantic.ConfigDict(arbitrary_types_allowed=True)) def __init__( self, study_name: str, task_type: PredictiveTaskType, estimator_names: List[str], metric: str, dataset: PredictiveDataset, *, static_imputers: List[str] = DEFAULT_STATIC_IMPUTERS, static_scalers: List[str] = DEFAULT_STATIC_SCALERS, temporal_imputers: List[str] = DEFAULT_TEMPORAL_IMPUTERS, temporal_scalers: List[str] = DEFAULT_TEMPORAL_SCALERS, return_top_k: int = 3, num_cv_folds: int = 5, num_iter: int = 100, tuner_patience: int = 5, tuner_type: TunerType = "bayesian", timeout: int = 360, random_state: int = 0, override_hp_space: Optional[Dict[str, List[Params]]] = None, horizon: Optional[data_typing.TimeIndex] = None, compute_baseline_score: bool = False, grid: Optional[Dict[str, Dict[str, Any]]] = None, custom_tuner: Optional[BaseTuner] = None, raise_exceptions: bool = True, silence_warnings: bool = False, **kwargs: Any, ) -> None: """An AutoML seeker which will sample pipelines comprised of: - A static imputer (if at lease one candidate in ``static_imputers`` provided) - A static scaler (if at lease one candidate in ``static_scalers`` provided) - A temporal imputer (if at lease one candidate in ``temporal_imputers`` provided) - A temporal scaler (if at lease one candidate in ``temporal_scalers`` provided) - The final predictor, from the ``estimator_names`` options. The imputer/scaler candidates will be sampled as a categorical hyperparameter. The hyperparameter spaces of these, and of the final predictor, will be sampled. Note: - ``compute_baseline_score=True`` is not supported, as the pipeline is dynamic and there is no defined\ baseline case. - ``tuner_type="grid"`` is not currently supported. Args: study_name (str): See `~tempor.automl.seeker.BaseSeeker`. task_type (PredictiveTaskType): See `~tempor.automl.seeker.BaseSeeker`. estimator_names (List[str]): The candidate predictors that will be the last step of the pipeline. Provide plugin names (without category qualification), e.g. like ``["nn_classifier", "cde_classifier"]``. metric (str): See `~tempor.automl.seeker.BaseSeeker`. dataset (PredictiveDataset): See `~tempor.automl.seeker.BaseSeeker`. static_imputers (List[str], optional): A list of candidate static imputers. Defaults to `DEFAULT_STATIC_IMPUTERS`. static_scalers (List[str], optional): A list of candidate static scalers. Defaults to `DEFAULT_STATIC_SCALERS`. temporal_imputers (List[str], optional): A list of candidate temporal imputers. Defaults to `DEFAULT_TEMPORAL_IMPUTERS`. temporal_scalers (List[str], optional): A list of candidate temporal scalers. Defaults to `DEFAULT_TEMPORAL_SCALERS`. return_top_k (int, optional): See `~tempor.automl.seeker.BaseSeeker`. num_cv_folds (int, optional): See `~tempor.automl.seeker.BaseSeeker`. num_iter (int, optional): See `~tempor.automl.seeker.BaseSeeker`. tuner_patience (int, optional): See `~tempor.automl.seeker.BaseSeeker`. tuner_type (TunerType, optional): See `~tempor.automl.seeker.BaseSeeker`. timeout (int, optional): See `~tempor.automl.seeker.BaseSeeker`. random_state (int, optional): See `~tempor.automl.seeker.BaseSeeker`. override_hp_space (Optional[Dict[str, List[Params]]], optional): See `~tempor.automl.seeker.BaseSeeker`. Note that currently the hyperparameter space override in this case can only be specified for the last pipeline step (the predictive estimator), not the preceding data transformer steps. The default hyperparameter space will always be used for those. horizon (Optional[data_typing.TimeIndex], optional): See `~tempor.automl.seeker.BaseSeeker`. compute_baseline_score (bool, optional): See `~tempor.automl.seeker.BaseSeeker`. grid (Optional[Dict[str, Dict[str, Any]]], optional): See `~tempor.automl.seeker.BaseSeeker`. custom_tuner (Optional[BaseTuner], optional): See `~tempor.automl.seeker.BaseSeeker`. raise_exceptions (bool, optional): See `~tempor.automl.seeker.BaseSeeker`. silence_warnings (bool, optional): See `~tempor.automl.seeker.BaseSeeker`. **kwargs (Any): See `~tempor.automl.seeker.BaseSeeker`. """ # Define estimator definitions: estimator_defs = [] for predictor in estimator_names: estimator_defs.append( dict( predictor=predictor, static_imputers=static_imputers, static_scalers=static_scalers, temporal_imputers=temporal_imputers, temporal_scalers=temporal_scalers, ) ) # Define human-friendly names for pipelines (pre-predictor): estimator_human_names = [] for predictor in estimator_names: estimator_human_names.append(self._pipe_human_name(predictor)) # Validate override: if override_hp_space is not None: for key in override_hp_space.keys(): if key not in estimator_names: raise ValueError( f"Estimator name '{key}' in `override_hp_space` was found that did not " f"correspond to any of the estimators provided: {estimator_names}" ) # Set override keys: if override_hp_space is not None: override_hp_space_keys_renamed = dict() for predictor in estimator_names: if predictor in override_hp_space: override_hp_space_keys_renamed[self._pipe_human_name(predictor)] = override_hp_space[predictor] else: override_hp_space_keys_renamed = None # Grid search not supported. if tuner_type == "grid": raise ValueError(f"The 'grid' search method not supported with {self.__class__.__name__}") super().__init__( study_name, task_type, estimator_human_names, estimator_defs, metric, dataset, return_top_k=return_top_k, num_cv_folds=num_cv_folds, num_iter=num_iter, tuner_patience=tuner_patience, tuner_type=tuner_type, timeout=timeout, random_state=random_state, override_hp_space=override_hp_space_keys_renamed, horizon=horizon, compute_baseline_score=compute_baseline_score, grid=grid, custom_tuner=custom_tuner, raise_exceptions=raise_exceptions, silence_warnings=silence_warnings, **kwargs, ) def _pipe_human_name(self, predictor_name: str) -> str: return f"<Pipeline with {predictor_name}>" def _init_estimator(self, estimator_name: str, estimator_def: Any) -> AutoMLCompatibleEstimator: logger.info(f"Creating estimator {estimator_name}.") return PipelineSelector( task_type=self.task_type, predictor=estimator_def["predictor"], static_imputers=estimator_def["static_imputers"], static_scalers=estimator_def["static_scalers"], temporal_imputers=estimator_def["temporal_imputers"], temporal_scalers=estimator_def["temporal_scalers"], ) def _create_estimator_with_hps( self, estimator_cls: AutoMLCompatibleEstimator, hps: Dict[str, Any], score: float ) -> BasePredictor: estimator_cls = cast(PipelineSelector, estimator_cls) name = self._pipe_human_name(estimator_cls.predictor.name) logger.info(f"Selected score {score} for {name} with hyperparameters:\n{hps}") pipe = estimator_cls.pipeline_from_hps(hps) pipe_as_predictor = cast(BasePredictor, pipe) if not isinstance(pipe_as_predictor, BasePredictor): # pragma: no cover # Should not end up here. raise RuntimeError(f"Pipeline was not a subclass of {BasePredictor.__name__}") return pipe_as_predictor