Module futureexpert.plot

Contains all the functionality to plot the checked in time series and the forecast and backtesting results.

Functions

def filter_models(models: list[Model],
ranks: list[int] | None = [1],
model_names: list[str] | None = None) ‑> list[Model]
Expand source code
def filter_models(models: list[Model],
                  ranks: Optional[list[int]] = [1],
                  model_names: Optional[list[str]] = None,) -> list[Model]:
    """Filter models based on the given criteria.

    Parameters
    ----------
    models: builtins.list
        List of models.
    model_names: typing.Optional
        Names of the models to filtered by.
    ranks: typing.Optional
        Ranks of the models to filtered by.
    """
    if model_names:
        models = [mo for mo in models if mo.model_name in model_names]
    if ranks:
        models = [mo for mo in models if mo.model_selection.ranking and mo.model_selection.ranking.rank_position in ranks]

    return models

Filter models based on the given criteria.

Parameters

models : builtins.list
List of models.
model_names : typing.Optional
Names of the models to filtered by.
ranks : typing.Optional
Ranks of the models to filtered by.
def plot_backtesting(result: ForecastResult,
iteration: int = 1,
plot_last_x_data_points_only: int | None = None,
model_names: list[str] | None = None,
ranks: list[int] | None = [1],
plot_prediction_intervals: bool = True,
plot_outliers: bool = False,
plot_change_points: bool = False,
plot_replaced_missings: bool = False,
plot_covariates: bool = False) ‑> None
Expand source code
def plot_backtesting(result: ForecastResult,
                     iteration: int = 1,
                     plot_last_x_data_points_only: Optional[int] = None,
                     model_names: Optional[list[str]] = None,
                     ranks: Optional[list[int]] = [1],
                     plot_prediction_intervals: bool = True,
                     plot_outliers: bool = False,
                     plot_change_points: bool = False,
                     plot_replaced_missings: bool = False,
                     plot_covariates: bool = False) -> None:
    """Plots actuals and backtesting results from a single time series.

    Parameters
    ----------
    result: futureexpert.forecast.ForecastResult
        Forecasting and backtesting results of a single time series and model.
    iteration: builtins.int
        Iteration of the backtesting forecast.
    plot_last_x_data_points_only: typing.Optional
        Number of data points of the actuals that should be shown in the plot.
    model_names: typing.Optional
        Names of the models to plot.
    ranks: typing.Optional
        Ranks of the models to plot.
    plot_prediction_intervals: builtins.bool
        Shows prediction intervals.
    plot_outliers: builtins.bool
        Shows outlieres and replacement values.
    plot_change_points: builtins.bool
        Shows change point like level shifts and few observations.
    plot_replaced_missings: builtins.bool
        Shows replaced missing values.
    plot_covariates: builtins.bool
        Shows the covariates that where used in the model.
    """

    actuals = result.input.actuals
    plot_models = filter_models(result.models, ranks, model_names)

    values = actuals.values
    if plot_last_x_data_points_only is not None:
        values = actuals.values[-plot_last_x_data_points_only:]

    actual_dates = [ac.time_stamp_utc for ac in values]
    actual_values = [ac.value for ac in values]

    # if the data has missing values, make sure to explicitly store them as nan. Otherwise the plot function will
    # display interpolated values. To use help function, a temporary df is needed.
    df_ac = pd.DataFrame({'date': actual_dates, 'actuals': actual_values})
    df_ac = _fill_missing_values_for_plot(granularity=result.input.actuals.granularity, df=df_ac)

    plot_few_observations = []
    if plot_change_points:

        change_points = result.ts_characteristics.change_points or []
        few_observations = [copy.deepcopy(x) for x in change_points if x.change_point_type.startswith('FEW_OBS')]

        plot_few_observations = _calculate_few_observation_borders(actual_dates, few_observations)

        level_shifts = [x for x in change_points if x.change_point_type == 'LEVEL_SHIFT']
        df_ac = _add_level_shifts(df_ac, level_shifts)

    if plot_outliers:
        outliers = result.ts_characteristics.outliers or []
        df_ac = _add_outliers(df_ac, outliers, result.changed_values)

    if plot_replaced_missings:
        df_ac = _add_replaced_missings(df_ac, result.changed_values)

    for model in plot_models:
        forecast = model.model_selection.backtesting
        model_name = model.model_name
        assert model.model_selection.ranking, 'No ranking, plotting not possible.'
        model_rank = model.model_selection.ranking.rank_position

        word_len_dict = defaultdict(list)

        for word in forecast:
            word_len_dict[word.fc_step].append(word)

        iterations = max([len(word_len_dict[x]) for x in word_len_dict])
        if iteration > iterations:
            raise ValueError('Selected iteration was not calculated.')

        bt_round = [word_len_dict[x][iteration] for x in word_len_dict]

        backtesting_dates = [ac.time_stamp_utc for ac in bt_round]
        backtesting_fc = [ac.point_forecast_value for ac in bt_round]
        backtesting_upper = [ac.upper_limit_value for ac in bt_round]
        backtesting_lower = [ac.lower_limit_value for ac in bt_round]

        if plot_covariates and len(model.covariates) > 0:
            df_ac = _add_covariates(df_ac, model.covariates, result.input.covariates)

        fig, ax = plt.subplots()
        fig.set_size_inches(12, 6)
        fig.suptitle(f'Backtesting of {actuals.name} - Iteration: {iteration}', fontsize=16)
        ax.set_title(f'using {model_name} (Rank {model_rank})')

        # plot
        ax.plot(df_ac.date, df_ac.actuals, color=prog_color.loc[0, "darkblue"],  label='Time Series')
        ax.plot(backtesting_dates, backtesting_fc, color=prog_color.loc[0, "cyan"], label='Forecast')

        if 'replaced_missing' in df_ac.columns:
            ax.fill_between(df_ac.date, min(df_ac.actuals), max(df_ac.actuals),
                            where=df_ac.replaced_missing.notnull(),
                            color=prog_color.loc[2, 'red'], alpha=0.30, label='Missings')

        if 'original_outlier' in df_ac.columns:
            ax.plot(df_ac.date, df_ac.original_outlier, 'o-',
                    color=prog_color.loc[0, 'red'], label='Original Outlier')
            ax.plot(df_ac.date, df_ac.replace_outlier, 'o-',
                    color=prog_color.loc[0, 'green'], label='Replacement Values')
            ax.plot(df_ac.date, df_ac.outlier_connection, '-',
                    color=prog_color.loc[4, 'red'], zorder=1)

        if 'level_shift' in df_ac.columns:
            ax.plot(df_ac.date, df_ac.level_shift, color=prog_color.loc[0, 'gold'], label='Levels')

        for idx, time_frame in enumerate(plot_few_observations):
            ax.fill_between(df_ac.date, min(df_ac.actuals), max(df_ac.actuals),
                            where=(df_ac.date >= time_frame[0]) & (df_ac.date < time_frame[1]),
                            color=prog_color.loc[1, 'greyblue'], alpha=0.30, label='Few Observations' if idx == 0 else None)

        if plot_prediction_intervals and None not in backtesting_lower and None not in backtesting_upper:
            ax.fill_between(backtesting_dates, backtesting_lower, backtesting_upper,
                            color=prog_color.loc[2, "cyan"], alpha=0.30, label='Prediction Interval')

        covariate_column = [col for col in df_ac if col.startswith('covariate_lag')]
        if len(covariate_column) > 0:
            _add_covariates_to_plot(ax, covariate_column, df_ac)
        else:
            # legend
            ax.legend(loc=legend_position['loc'], bbox_to_anchor=(1, 1))

        # style
        ax.set_frame_on(False)

        # margin
        plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

        plt.show()

Plots actuals and backtesting results from a single time series.

Parameters

result : ForecastResult
Forecasting and backtesting results of a single time series and model.
iteration : builtins.int
Iteration of the backtesting forecast.
plot_last_x_data_points_only : typing.Optional
Number of data points of the actuals that should be shown in the plot.
model_names : typing.Optional
Names of the models to plot.
ranks : typing.Optional
Ranks of the models to plot.
plot_prediction_intervals : builtins.bool
Shows prediction intervals.
plot_outliers : builtins.bool
Shows outlieres and replacement values.
plot_change_points : builtins.bool
Shows change point like level shifts and few observations.
plot_replaced_missings : builtins.bool
Shows replaced missing values.
plot_covariates : builtins.bool
Shows the covariates that where used in the model.
def plot_forecast(result: ForecastResult,
plot_last_x_data_points_only: int | None = None,
model_names: list[str] | None = None,
ranks: list[int] | None = [1],
plot_prediction_intervals: bool = True,
plot_outliers: bool = False,
plot_change_points: bool = False,
plot_replaced_missings: bool = False,
plot_covariates: bool = False) ‑> None
Expand source code
def plot_forecast(result: ForecastResult,
                  plot_last_x_data_points_only: Optional[int] = None,
                  model_names: Optional[list[str]] = None,
                  ranks: Optional[list[int]] = [1],
                  plot_prediction_intervals: bool = True,
                  plot_outliers: bool = False,
                  plot_change_points: bool = False,
                  plot_replaced_missings: bool = False,
                  plot_covariates: bool = False) -> None:
    """Plots actuals and forecast from a single time series.

    Parameters
    ----------
    forecasts
        Forecasting results of a single time series and model.
    plot_last_x_data_points_only: typing.Optional
        Number of data points of the actuals that should be shown in the plot.
    model_names: typing.Optional
        Names of the models to plot.
    ranks: typing.Optional
        Ranks of the models to plot.
    plot_prediction_intervals: builtins.bool
        Shows prediction intervals.
    plot_outliers: builtins.bool
        Shows outlieres and replacement values.
    plot_change_points: builtins.bool
        Shows change point like level shifts and few observations.
    plot_replaced_missings: builtins.bool
        Shows replaced missing values.
    plot_covariates: builtins.bool
        Shows the covariates that where used in the model.
    """

    actuals = result.input.actuals
    plot_models = filter_models(result.models, ranks, model_names)

    # prepare actual values
    values = actuals.values
    if plot_last_x_data_points_only is not None:
        values = actuals.values[-plot_last_x_data_points_only:]

    actual_dates = [fc.time_stamp_utc for fc in values]
    actual_values = [fc.value for fc in values]
    df_ac = pd.DataFrame({'date': actual_dates, 'actuals': actual_values})
    df_ac = _fill_missing_values_for_plot(granularity=result.input.actuals.granularity, df=df_ac)
    index_last_actual = len(df_ac.index)-1

    plot_few_observations = []
    if plot_change_points:

        change_points = result.ts_characteristics.change_points or []
        few_observations = [copy.deepcopy(x) for x in change_points if x.change_point_type.startswith('FEW_OBS')]

        plot_few_observations = _calculate_few_observation_borders(actual_dates, few_observations)

        level_shifts = [x for x in change_points if x.change_point_type == 'LEVEL_SHIFT']
        df_ac = _add_level_shifts(df_ac, level_shifts)

    if plot_outliers:
        outliers = result.ts_characteristics.outliers or []
        df_ac = _add_outliers(df_ac, outliers, result.changed_values)

    if plot_replaced_missings:
        df_ac = _add_replaced_missings(df_ac, result.changed_values)

    for model in plot_models:
        forecast = model.forecasts

        name = actuals.name
        model_name = model.model_name
        assert model.model_selection.ranking, 'No ranking, plotting not possible.'
        model_rank = model.model_selection.ranking.rank_position

        fc_date = [fc.time_stamp_utc for fc in forecast]
        # forecast values
        fc_value = [fc.point_forecast_value for fc in forecast]
        # forecast intervals
        fc_upper_value = [fc.upper_limit_value for fc in forecast]
        fc_lower_value = [fc.lower_limit_value for fc in forecast]

        df_fc = pd.DataFrame({'date': fc_date, 'fc': fc_value, 'upper': fc_upper_value, 'lower': fc_lower_value})

        df_concat = pd.concat([df_ac, df_fc], axis=0).reset_index(drop=True)

        # connected forecast line with actual line
        df_concat.loc[index_last_actual, 'fc'] = df_concat.loc[index_last_actual, 'actuals']
        df_concat.loc[index_last_actual, 'upper'] = df_concat.loc[index_last_actual, 'actuals']
        df_concat.loc[index_last_actual, 'lower'] = df_concat.loc[index_last_actual, 'actuals']
        df_concat.date = pd.to_datetime(df_concat.date)
        df_concat.sort_values('date', inplace=True)
        df_concat.reset_index(drop=True, inplace=True)

        if plot_covariates and len(model.covariates) > 0:
            df_concat = _add_covariates(df_concat, model.covariates, result.input.covariates)

        fig, ax = plt.subplots(figsize=(12, 6))
        fig.suptitle(f'Forecast for {name}', fontsize=16)
        ax.set_title(f'using {model_name} (Rank {model_rank})')

        # plot
        ax.plot(df_concat.date, df_concat.actuals, color=prog_color.loc[0, 'darkblue'], label='Time Series')
        ax.plot(df_concat.date, df_concat.fc, color=prog_color.loc[0, 'cyan'], label='Forecast')
        if 'replaced_missing' in df_concat.columns:
            ax.fill_between(df_concat.date, min(df_concat.actuals), max(df_concat.actuals),
                            where=df_concat.replaced_missing.notnull(),
                            color=prog_color.loc[2, 'red'], alpha=0.30, label='Missings')

        if 'original_outlier' in df_concat.columns:
            ax.plot(df_concat.date, df_concat.original_outlier, 'o-',
                    color=prog_color.loc[0, 'red'], label='Original Outlier')
            ax.plot(df_concat.date, df_concat.replace_outlier, 'o-',
                    color=prog_color.loc[0, 'green'], label='Replacement Values')
            ax.plot(df_concat.date, df_concat.outlier_connection, '-',
                    color=prog_color.loc[4, 'red'], zorder=1)

        if 'level_shift' in df_concat.columns:
            ax.plot(df_concat.date, df_concat.level_shift, color=prog_color.loc[0, 'gold'], label='Levels')

        for idx, time_frame in enumerate(plot_few_observations):
            ax.fill_between(df_concat.date, min(df_concat.actuals), max(df_concat.actuals),
                            where=(df_concat.date >= time_frame[0]) & (df_concat.date < time_frame[1]),
                            color=prog_color.loc[1, 'greyblue'], alpha=0.30, label='Few Observations' if idx == 0 else None)

        if plot_prediction_intervals and not any(v is None for v in df_concat.lower):
            ax.fill_between(df_concat.date, df_concat.lower, df_concat.upper,
                            color=prog_color.loc[2, 'cyan'], alpha=0.30, label='Prediction Interval')

        covariate_column = [col for col in df_concat if col.startswith('covariate_lag')]
        if len(covariate_column) > 0:
            _add_covariates_to_plot(ax, covariate_column, df_concat)
        else:
            # legend
            ax.legend(loc=legend_position['loc'], bbox_to_anchor=(1, 1))

        # style
        ax.set_frame_on(False)
        ax.tick_params(axis='both', labelsize=10)

        # margin
        plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

        plt.show()

Plots actuals and forecast from a single time series.

Parameters

forecasts
Forecasting results of a single time series and model.
plot_last_x_data_points_only : typing.Optional
Number of data points of the actuals that should be shown in the plot.
model_names : typing.Optional
Names of the models to plot.
ranks : typing.Optional
Ranks of the models to plot.
plot_prediction_intervals : builtins.bool
Shows prediction intervals.
plot_outliers : builtins.bool
Shows outlieres and replacement values.
plot_change_points : builtins.bool
Shows change point like level shifts and few observations.
plot_replaced_missings : builtins.bool
Shows replaced missing values.
plot_covariates : builtins.bool
Shows the covariates that where used in the model.
def plot_time_series(ts: TimeSeries,
covariate: Covariate | None = None,
plot_last_x_data_points_only: int | None = None) ‑> None
Expand source code
def plot_time_series(ts: TimeSeries,
                     covariate: Optional[Covariate] = None,
                     plot_last_x_data_points_only: Optional[int] = None) -> None:
    """Plots actuals from a single time series. Optional a Covariate can be plotted next to it.

    Parameters
    ----------
    ts: futureexpert.shared_models.TimeSeries
        time series data
    covariate: typing.Optional
        covariate data
    plot_last_x_data_points_only: typing.Optional
        Number of data points of the actuals that should be shown in the plot.
    """

    actual_dates = [fc.time_stamp_utc for fc in ts.values]
    actual_values = [fc.value for fc in ts.values]

    if plot_last_x_data_points_only is not None:
        actual_dates = actual_dates[-plot_last_x_data_points_only:]
        actual_values = actual_values[-plot_last_x_data_points_only:]

    name = ts.name
    df_ac = pd.DataFrame({'date': actual_dates, 'actuals': actual_values})
    df_ac = _fill_missing_values_for_plot(granularity=ts.granularity, df=df_ac)

    if covariate:
        cov_date = [value.time_stamp_utc for value in covariate.ts.values]
        cov_value = [value.value for value in covariate.ts.values]
        df_cov = pd.DataFrame({'date': cov_date, 'covariate': cov_value, 'covariate_lag': cov_value})
        df_ac = pd.merge(df_ac, df_cov, on='date', how='outer', validate='1:1').reset_index(drop=True)
        df_ac = df_ac.sort_values(by='date')
        df_ac.covariate_lag = df_ac.covariate_lag.shift(covariate.lag)

        # remove all covariate values from befor the start of actuals
        min_value = df_ac[df_ac['actuals'].notna()][['date']].min()
        df_ac = df_ac[df_ac['date'] >= min_value[0]]
        df_ac.reset_index(drop=True, inplace=True)

    fig, ax = plt.subplots()
    fig.set_size_inches(12, 6)
    fig.suptitle(name, fontsize=16)
    ax.set_frame_on(False)
    ax.tick_params(axis='both', labelsize=10)

    # plot
    ax.plot(df_ac.date, df_ac.actuals, color=prog_color.loc[0, "darkblue"])
    if covariate:
        ax.set_title(f'with covariate: {covariate.ts.name} and lag {covariate.lag}')
        ax2 = ax.twinx()
        ax2.grid(False)
        ax2.set_frame_on(False)
        ax2.tick_params(axis='both', labelsize=10)
        ax.yaxis.set_major_locator(mpl.ticker.LinearLocator(numticks=6))
        ax2.yaxis.set_major_locator(mpl.ticker.LinearLocator(numticks=6))
        ax2.plot(df_ac.date, df_ac.covariate_lag, color=prog_color.loc[0, 'violet'], label=covariate.ts.name)

    # margin
    plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

    plt.show()

Plots actuals from a single time series. Optional a Covariate can be plotted next to it.

Parameters

ts : TimeSeries
time series data
covariate : typing.Optional
covariate data
plot_last_x_data_points_only : typing.Optional
Number of data points of the actuals that should be shown in the plot.