Source code for covsirphy.science.ml

from covsirphy.util.config import config
from covsirphy.util.validator import Validator
from covsirphy.util.term import Term
from covsirphy.science._autots import _AutoTSHandler



[docs]
class MLEngineer(Term):
    """
    Class for machine learning and preprocessing.

    Args:
        seed (int or None): random seed
    """

    def __init__(self, seed=0, **kwargs):
        self._seed = Validator(seed, name="seed").int()


[docs]
    def pca(self, X, n_components=0.95):
        """Perform PCA (principal component analysis) after standardization (Z-score normalization) with pca package.

        Args:
            X (pandas.DataFrame or None):
                Index
                    pandas.Timestamp: Observation date
                Columns
                    (int or float): observed values of the training vectors
            n_components (float or int): _the number of principal components or percentage of variance to cover at least

        Returns:
            dict of {str: object}: as the same as pca.pca().fit_transform()
                {"loadings": pandas.DataFrame}: structured dataframe containing loadings for PCs
                {"PC": pandas.DataFrame}: reduced dimensionality space, the Principal Components (PCs)
                    Index
                        pandas.Timestamp
                    COlumns
                        PC1, PC2,...
                {"explained_var": array-like}: explained variance for each fo the PCs (same ordering as the PCs)
                {"variance_ratio": array-like};: variance ratio
                {"model": object}: fitted model to be used for further usage of the model
                {"scaler": object}: scaler model
                {"pcp": int}: pcp
                {"topfeat": pandas.DataFrame}: top features
                    Index
                        reset index
                    Columns
                        PC (str): PC1, PC2,...
                        feature (str): feature name of X
                        loading (float): loading values
                        type (str): "best" or "weak
                {"outliers": pandas.DataFrame}: outliers
                    Index
                        pandas.Timestamp
                    Columns
                        y_proba (float)
                        y_score (float)
                        y_bool (bool)
                        y_bool_spe (bool)
                        y_score_spe (float)
                {"outlier_params": object}: parameter values of the model of finding outliers

        Note:
            Regarding pca package, please refer to https://github.com/erdogant/pca
        """
        from pca import pca  # https://github.com/lisphilar/covid19-sir/issues/1265
        Validator(X, name="X", accept_none=False).dataframe(time_index=True, empty_ok=False)
        model = pca(n_components=n_components, normalize=True, random_state=self._seed, verbose=config.logger_level)
        return {**model.fit_transform(X), "model": model}



[docs]
    def forecast(self, Y, days, X=None, **kwargs):
        """Forecast Y for given days with/without indicators (X).

        Args:
            Y (pandas.DataFrame):
                Index
                    pandas.Timestamp: Observation date
                Columns
                    observed and the target variables (int or float)
            X (pandas.DataFrame or None): indicators for regression or None (no indicators)
                Index
                    pandas.Timestamp: Observation date
                Columns
                    observed and the target variables (int or float)
            days (int): days to predict
            **kwargs: keyword arguments of autots.AutoTS() except for verbose, forecast_length (always the same as @days)

        Return:
           pandas.DataFrame:
                Index
                    pandas.Timestamp: Observation date, from the next date of Y.index to the ast predicted date
                Columns
                    observed and the target variables (int or float)

        Note:
            AutoTS package is developed at https://github.com/winedarksea/AutoTS
        """
        model = _AutoTSHandler(Y=Y, days=days, seed=self._seed, **kwargs)
        return model.fit(X=X).predict()