Source code for covsirphy.science.ml

from covsirphy.util.config import config
from covsirphy.util.validator import Validator
from covsirphy.util.term import Term
from covsirphy.science._autots import _AutoTSHandler


[docs] class MLEngineer(Term): """ Class for machine learning and preprocessing. Args: seed (int or None): random seed """ def __init__(self, seed=0, **kwargs): self._seed = Validator(seed, name="seed").int()
[docs] def pca(self, X, n_components=0.95): """Perform PCA (principal component analysis) after standardization (Z-score normalization) with pca package. Args: X (pandas.DataFrame or None): Index pandas.Timestamp: Observation date Columns (int or float): observed values of the training vectors n_components (float or int): _the number of principal components or percentage of variance to cover at least Returns: dict of {str: object}: as the same as pca.pca().fit_transform() {"loadings": pandas.DataFrame}: structured dataframe containing loadings for PCs {"PC": pandas.DataFrame}: reduced dimensionality space, the Principal Components (PCs) Index pandas.Timestamp COlumns PC1, PC2,... {"explained_var": array-like}: explained variance for each fo the PCs (same ordering as the PCs) {"variance_ratio": array-like};: variance ratio {"model": object}: fitted model to be used for further usage of the model {"scaler": object}: scaler model {"pcp": int}: pcp {"topfeat": pandas.DataFrame}: top features Index reset index Columns PC (str): PC1, PC2,... feature (str): feature name of X loading (float): loading values type (str): "best" or "weak {"outliers": pandas.DataFrame}: outliers Index pandas.Timestamp Columns y_proba (float) y_score (float) y_bool (bool) y_bool_spe (bool) y_score_spe (float) {"outlier_params": object}: parameter values of the model of finding outliers Note: Regarding pca package, please refer to https://github.com/erdogant/pca """ from pca import pca # https://github.com/lisphilar/covid19-sir/issues/1265 Validator(X, name="X", accept_none=False).dataframe(time_index=True, empty_ok=False) model = pca(n_components=n_components, normalize=True, random_state=self._seed, verbose=config.logger_level) return {**model.fit_transform(X), "model": model}
[docs] def forecast(self, Y, days, X=None, **kwargs): """Forecast Y for given days with/without indicators (X). Args: Y (pandas.DataFrame): Index pandas.Timestamp: Observation date Columns observed and the target variables (int or float) X (pandas.DataFrame or None): indicators for regression or None (no indicators) Index pandas.Timestamp: Observation date Columns observed and the target variables (int or float) days (int): days to predict **kwargs: keyword arguments of autots.AutoTS() except for verbose, forecast_length (always the same as @days) Return: pandas.DataFrame: Index pandas.Timestamp: Observation date, from the next date of Y.index to the ast predicted date Columns observed and the target variables (int or float) Note: AutoTS package is developed at https://github.com/winedarksea/AutoTS """ model = _AutoTSHandler(Y=Y, days=days, seed=self._seed, **kwargs) return model.fit(X=X).predict()