Source code for tempor.models.clairvoyance2.data.df_constraints
# mypy: ignore-errorsfromdataclassesimportdataclassfromtypingimportIterable,Optional,Sequence,Tupleimportnumpyasnpimportpandasaspdfrom..utils.commonimportpython_type_from_np_pd_dtypefrom.internal_utilsimportall_items_are_of_types# NOTE: Obtained from https://pbpython.com/pandas_dtypes.html. May not be fully accurate.PD_ACCEPTABLE_EQUIVALENT_DTYPES=(object,int,float,bool,np.datetime64,pd.Timedelta,pd.Categorical,np.int32,np.int64,np.float64,np.double,)# TODO: Unit test.
@staticmethoddef_get_all_object_columns(df:pd.DataFrame)->Iterable:return(colforcol,dtypeindf.dtypes.items()ifdtype==object)@staticmethoddef_check_index_or_columns(index_or_columns:pd.Index,constraints:IndexConstraints,index_or_columns_str:str)->None:ifconstraints.typesisnotNoneandlen(constraints.types)>0:ifnotisinstance(index_or_columns,tuple(constraints.types)):# type: ignoreraiseTypeError(f"DataFrame {index_or_columns_str} must be one of types: {constraints.types}. "f"Was found to be of type: {type(index_or_columns)}.")ifconstraints.dtypesisnotNoneandlen(constraints.dtypes)>0:ifpython_type_from_np_pd_dtype(index_or_columns.dtype)notinconstraints.dtypes:# type: ignoreraiseTypeError(f"DataFrame {index_or_columns_str} dtype must be one of: {constraints.dtypes}. "f"Was found to be of dtype: {index_or_columns.dtype}")ifconstraints.dtype_object_constrain_typesisnotNoneandlen(constraints.dtype_object_constrain_types)>0:ifindex_or_columns.dtype==object:ifany(notisinstance(r,tuple(constraints.dtype_object_constrain_types))forrinindex_or_columns):raiseTypeError(f"DataFrame {index_or_columns_str} of dtype object must be constrained "f"to the following types: {constraints.dtype_object_constrain_types}. "f"Check dtype of each element of DataFrame {index_or_columns_str}")ifconstraints.enforce_monotonic_increasing:ifnotindex_or_columns.is_monotonic_increasing:raiseTypeError(f"DataFrame {index_or_columns_str} must be monotonic increasing")ifconstraints.enforce_not_multi_index:ifnotisinstance(index_or_columns,pd.MultiIndex)isFalse:raiseTypeError(f"DataFrame {index_or_columns_str} must not be multi-index")ifconstraints.enforce_unique:ifnotindex_or_columns.is_unique:raiseTypeError(f"DataFrame {index_or_columns_str} must be unique")@staticmethoddef_check_elements(df:pd.DataFrame,constraints:ElementConstraints)->None:ifconstraints.dtypesisnotNoneandlen(constraints.dtypes)>0:ifnotall(python_type_from_np_pd_dtype(dtype)inconstraints.dtypesfordtypeindf.dtypes.values):raiseTypeError(f"DataFrame elements must be limited to dtypes: {constraints.dtypes}. ""Check by calling `.dtype()` on your DataFrame.")ifconstraints.dtype_object_constrain_typesandlen(constraints.dtype_object_constrain_types)>0:ifany(notall_items_are_of_types(df[col],tuple(constraints.dtype_object_constrain_types))forcolinConstraintsChecker._get_all_object_columns(df)):raiseTypeError(f"DataFrame elements of dtype object must be constrained "f"to the following types: {constraints.dtype_object_constrain_types}. ""Check elements of columns of dtype object.")ifconstraints.enforce_homogenous_type_per_column:iflen(df)>0andany(notall_items_are_of_types(df[col],type(df[col].iat[0]))forcolinConstraintsChecker._get_all_object_columns(df)):raiseTypeError("DataFrame elements must be of homogenous type in every column, ""including the type of elements in columns of dtype object.")