Source code for tempor.datasources.prediction.one_off.plugin_google_stocks
"""Module with the Google stocks data source."""importiofrompathlibimportPathfromtypingimportAnyimportnumpyasnpimportpandasaspdimportrequestsfromsklearn.preprocessingimportMinMaxScalerfromtempor.coreimportpluginsfromtempor.dataimportdatasetfromtempor.datasourcesimportdatasource
[docs]@plugins.register_plugin(name="google_stocks",category="prediction.one_off",plugin_type="datasource")classGoogleStocksDataSource(datasource.OneOffPredictionDataSource):def__init__(self,seq_len:int=10,**kwargs:Any)->None:"""Google stocks data source, as found here: https://raw.githubusercontent.com/PacktPublishing/Learning-Pandas-Second-Edition/master/data/goog.csv The next day's opening price is the target. The samples are the sequence chunks of length ``seq_len``. Args: seq_len (int, optional): The number of time steps to use. Defaults to ``10``. **kwargs (Any): Keyword arguments to be passed to the parent class. """super().__init__(**kwargs)self.seq_len=seq_lenself.df_path=Path(self.dataset_dir())/"goog.csv"
[docs]defload(self,**kwargs:Any)->dataset.OneOffPredictionDataset:# noqa: D102# Load Google Dataifnotself.df_path.exists():s=requests.get(self.url(),timeout=5).contentdf=pd.read_csv(io.StringIO(s.decode("utf-8")))df.to_csv(self.df_path,index=False)else:df=pd.read_csv(self.df_path)# Flip the data to make chronological datadf=pd.DataFrame(df.values[::-1],columns=df.columns)T=pd.to_datetime(df["Date"]).astype(np.int64).astype(np.float64)/10**9# pyright: ignoreT=pd.Series(MinMaxScaler().fit_transform(T.values.reshape(-1,1)).squeeze())# pyright: ignoredf=df.drop(columns=["Date"])df=pd.DataFrame(MinMaxScaler().fit_transform(df),columns=df.columns)# Build datasetdataX=[]outcome=[]# Cut data by sequence lengthsample_idxs=[]foriinrange(0,len(df)-self.seq_len-1):df_seq=df.loc[i:i+self.seq_len-1].copy()horizons=T.loc[i:i+self.seq_len-1].copy()out=df["Open"].loc[i+self.seq_len].copy()df_seq["time_idx"]=horizonsdf_seq["sample_idx"]=str(i)dataX.append(df_seq)outcome.append(out)sample_idxs.append(str(i))time_series_df=pd.concat(dataX,ignore_index=True)time_series_df.set_index(keys=["sample_idx","time_idx"],drop=True,inplace=True)outcome_df=pd.DataFrame(outcome)outcome_df.index=sample_idxs# pyright: ignoreoutcome_df.columns=["out"]time_series_df.sort_index(level=[0,1],inplace=True)outcome_df.sort_index(inplace=True)returndataset.OneOffPredictionDataset(time_series=time_series_df,targets=outcome_df,sample_index="sample_idx",time_index="time_idx",)