[docs]classExtractTC(TransformerModel):requirements:Requirements=Requirements(dataset_requirements=DatasetRequirements(),)def_fit(self,data:Dataset,**kwargs)->"ExtractTC":# Nothing happens in `fit` here.returnself@abstractmethoddef_get_selector_param(self)->TFeatureSelector:...def_extract(self,temporal_covariates:TimeSeriesSamples,# type: ignore)->Tuple[TimeSeriesSamples,Optional[TimeSeriesSamples]]:all_features=set(temporal_covariates.feature_names)temporal_extracted:TimeSeriesSamples=temporal_covariates[:,self._get_selector_param()]# type: ignoreiflen(temporal_extracted.features)>0:extracted_features=set(temporal_extracted.feature_names)remaining_features=tuple(all_features-extracted_features)iflen(remaining_features)==0:raise_not_implemented("Selecting all temporal features so that no covariates remain")temporal_covariates:TimeSeriesSamples=temporal_covariates[:,remaining_features]# type: ignore# TODO: Need to make sure that __getitem__ supports collection of index items and update typehints.temporal_extracted_out=temporal_extractedelse:temporal_extracted_out=Nonereturntemporal_covariates,temporal_extracted_out
[docs]classTimeIndexFeatureConcatenator(TransformerModel):requirements:Requirements=Requirements(dataset_requirements=DatasetRequirements(requires_all_temporal_data_index_numeric=True,),)DEFAULT_PARAMS:_AddTimeIndexFeatureTCParams=_AddTimeIndexFeatureTCParams()def__init__(self,params:Optional[TParams]=None)->None:super().__init__(params)ifself.params.add_time_indexisFalseandself.params.add_time_deltaisFalse:raiseValueError("Must set at least one of `add_time_index` or `add_time_delta` to True")def_fit(self,data:Dataset,**kwargs)->"TimeIndexFeatureConcatenator":# Nothing happens in `fit` here.returnselfdef_transform(self,data:Dataset,**kwargs)->Dataset:data=data.copy()cast_time_series_samples_feature_names_to_str(data.temporal_covariates)# ^ Since we are adding features by string names below.fortsindata.temporal_covariates:df=ts.dfdf_new=df.copy()ifself.params.add_time_delta:diff=np.diff(df.index.values)ifself.params.time_delta_pad_at_backisFalse:diff=np.append(self.params.time_delta_pad_value,diff)else:diff=np.append(diff,self.params.time_delta_pad_value)df_new.insert(0,"time_delta",diff)ifself.params.add_time_index:df_new.insert(0,"time_index",df.index)ts.df=df_new# NOTE: No change to feature categorical definitions, as these new features are all numeric.returndata
[docs]classStaticFeaturesConcatenator(TransformerModel):requirements:Requirements=Requirements(dataset_requirements=DatasetRequirements(requires_static_covariates_present=True,))DEFAULT_PARAMS:_AddStaticCovariatesTCParams=_AddStaticCovariatesTCParams()def_fit(self,data:Dataset,**kwargs)->"StaticFeaturesConcatenator":# Nothing happens in `fit` here.returnselfdef_transform(self,data:Dataset,**kwargs)->Dataset:data=data.copy()assertdata.static_covariatesisnotNoneif(self.params.feature_name_prefixisnotNoneorpython_type_from_np_pd_dtype(data.static_covariates.df.columns.dtype)==object# type: ignore):# If static covariate features are indexed by str, do the same for temporal covariates.cast_time_series_samples_feature_names_to_str(data.temporal_covariates)s_cov:StaticSamples=data.static_covariatess_cov_new_feature_names:Dict[T_FeatureIndexDtype,T_FeatureIndexDtype]={k:(f"{self.params.feature_name_prefix}_{k}"ifself.params.feature_name_prefixisnotNoneelsek)forkins_cov.feature_names}clashing_feature_names=set(s_cov_new_feature_names.values()).intersection(data.temporal_covariates.feature_names)iflen(clashing_feature_names)>0:raiseValueError(f"Features named {clashing_feature_names} clash with existing temporal covariate features. ""Try setting/changing `feature_name_prefix` parameter.")t_cov_sample:TimeSeriesforsample_idx,t_cov_sampleinzip(data.temporal_covariates.sample_indices,data.temporal_covariates):t_cov_sample_df=t_cov_sample.dft_cov_sample_df_new=t_cov_sample_df.copy()# Get sample static covariates:s_cov_sample:pd.DataFrame=data.static_covariates.df.loc[[sample_idx],:]# Repeat this as long as the time index on the temporal covariates.to_append=pd.concat([s_cov_sample]*t_cov_sample_df.shape[0],ignore_index=True)# Give this new feature names as columns.to_append.rename(mapper=s_cov_new_feature_names,axis=1,inplace=True)# Give this the same exact index as temporal features dataframe.to_append.set_index(t_cov_sample_df.index,drop=True,inplace=True)# Actually append the features.ifself.params.append_at_beginningisFalse:t_cov_sample_df_new=pd.concat([t_cov_sample_df_new,to_append],axis=1)else:t_cov_sample_df_new=pd.concat([to_append,t_cov_sample_df_new],axis=1)assert(t_cov_sample_df.index==t_cov_sample_df_new.index).all()# print(t_cov_sample_df_new)t_cov_sample.df=t_cov_sample_df_new# t_cov_sample.df = t_cov_sample_df_newifself.params.drop_static_covariates:data.static_covariates=Nonereturndata