Source code for portfoliofinder.portfolio.backtested_statistics

from __future__ import annotations

from collections import namedtuple
from functools import wraps
from typing import Dict

import matplotlib.pyplot as plt
import mplcursors
import pandas as pd

from ..stats.functions import percentile_for
from ..stats.types import StatList
from ..util.progressbar import progressbar
from ..util.self_pickling import SelfPickling
from ..util.to_dataframe import to_dataframe


[docs]class BacktestedStatistics(SelfPickling): """Statistical results for backtested portfolio data.""" def __init__(self, data_by_allocation: Dict[tuple, pd.Series], statistics: StatList, use_progressbar: bool): """Create statistical results for backtested portfolio data. :param data_by_allocation: backtested portfolio data :param statistics: array of statistic functions for pandas Series :return: statistical results for the data """ stats_by_allocation = {} for allocation in progressbar(data_by_allocation.keys(), use_progressbar): portfolio_timeframe_by_startyear = _get_statistics( data_by_allocation[allocation], statistics) stats_by_allocation[allocation] = portfolio_timeframe_by_startyear self._df = to_dataframe(stats_by_allocation) data_type = next(iter(stats_by_allocation.values())).name allocation_symbols = list( next(iter(stats_by_allocation.keys()))._fields) self._allocation_namedtuple = namedtuple('Allocation', allocation_symbols) self._to_allocation_symbols_and_value = \ _generate_to_allocation_symbols_and_value_method(allocation_symbols, data_type) @classmethod def _from_stats(cls, stats_df, allocation_namedtuple, to_allocation_symbols_and_value_func)\ -> BacktestedStatistics: new_stats = cls.__new__(cls) super().__init__(new_stats) # pylint: disable=protected-access new_stats._df = stats_df new_stats._allocation_namedtuple = allocation_namedtuple new_stats._to_allocation_symbols_and_value = to_allocation_symbols_and_value_func # pylint: enable=protected-access return new_stats def __repr__(self): return self._df.__repr__() def __str__(self): return self._df.__str__()
[docs] def as_dataframe(self) -> pd.DataFrame: """Gets this as a pandas DataFrame. Note that changes to the returned DataFrame will modify this object. """ return self._df
[docs] def get_allocations_which_max_each_statistic(self) -> pd.DataFrame: """Gets allocations which maximize each statistic.""" allocations_which_max_each_statistic = self._df[self._df.columns].idxmax() return self._append_value_for_each_statistic_allocation( allocations_which_max_each_statistic)
[docs] def get_allocations_which_min_each_statistic(self) -> pd.DataFrame: """Gets allocation which minimize each statistic.""" allocations_which_min_each_statistic = self._df[self._df.columns].idxmin() return self._append_value_for_each_statistic_allocation( allocations_which_min_each_statistic)
def _append_value_for_each_statistic_allocation(self, allocations_for_each_statistic)\ -> pd.DataFrame: all_statistics_for_each_allocation = self._df.loc[allocations_for_each_statistic]\ .reset_index()\ .set_index(allocations_for_each_statistic.index) allocation_and_value_for_each_statistic = all_statistics_for_each_allocation\ .apply(self._to_allocation_symbols_and_value, axis=1) allocation_and_value_for_each_statistic.columns.name = '' return allocation_and_value_for_each_statistic
[docs] def get_allocation_which_min_statistic(self, statistic) -> pd.Series: """Gets allocation which minimizes the specified statistic. :param statistic: statistic label (e.g., 'mean') :return: allocation which minimizes the statistic """ allocation_which_min_statistic = self._df.idxmin().loc[statistic] res = self._df.loc[allocation_which_min_statistic] res.name = str(self._allocation_namedtuple(*res.name)) return res
[docs] def get_allocation_which_max_statistic(self, statistic) -> pd.Series: """Gets allocation which maximizes the specified statistic. :param statistic: statistic label (e.g., 'mean') :return: allocation which maximizes the statistic """ allocation_which_max_statistic = self._df.idxmax().loc[statistic] res = self._df.loc[allocation_which_max_statistic] res.name = str(self._allocation_namedtuple(*res.name)) return res
[docs] def graph(self, x_axis, y_axis): """Creates a scattergraph to visualize the data. :param x_axis: statistic label for the x axis (e.g., 'mean') :param y_axis: statistic label for the y axis (e.g., 'std') """ self._df.plot.scatter(x_axis, y_axis) mplcursors.cursor(hover=True).connect( "add", lambda sel: sel.annotation.set_text( self._allocation_namedtuple(*self._df.iloc[sel.target.index].name)) ) plt.show()
[docs] def filter(self, dataframe_filter_function) -> BacktestedStatistics: """Filters these statistical results with the specified function. :param dataframe_filter_function: function to filter results with :return: a new set of statistical results """ new_df = self._df[dataframe_filter_function(self._df)] return BacktestedStatistics._from_stats(new_df, self._allocation_namedtuple, self._to_allocation_symbols_and_value)
[docs] def filter_by_min_of(self, statistic_label) -> BacktestedStatistics: """Filters these statistical results to only include data which minimizes the specified statistic. :param statistic_label: label of the statistic :return: a new set of statistical results """ new_df = self._df[self._df[statistic_label] == min(self._df[statistic_label])] return BacktestedStatistics._from_stats(new_df, self._allocation_namedtuple, self._to_allocation_symbols_and_value)
[docs] def filter_by_max_of(self, statistic_label) -> BacktestedStatistics: """Filters these statistical results to only include data which maximizes the specified statistic. :param statistic_label: label of the statistic :return: a new set of statistical results """ new_df = self._df[self._df[statistic_label] == max(self._df[statistic_label])] return BacktestedStatistics._from_stats(new_df, self._allocation_namedtuple, self._to_allocation_symbols_and_value)
[docs] def filter_by_gte_percentile_of(self, percentile: int, statistic_label) \ -> BacktestedStatistics: """Filters these statistical results to only include data which are greater than or equal to the specified percential for the specified statistic. :param percentile: percentile as int from 0 to 100 :param statistic_label: label of the statistic :return: a new set of statistical results """ new_df = self._df[self._df[statistic_label] >= percentile_for(percentile)(self._df[statistic_label])] return BacktestedStatistics._from_stats(new_df, self._allocation_namedtuple, self._to_allocation_symbols_and_value)
[docs] def filter_by_lte_percentile_of(self, percentile, statistic_label) \ -> BacktestedStatistics: """Filters these statistical results to only include data which are less than or equal to the specified percential for the specified statistic. :param percentile: percentile as int from 0 to 100 :param statistic_label: label of the statistic :return: a new set of statistical results """ new_df = self._df[self._df[statistic_label] <= percentile_for(percentile)(self._df[statistic_label])] return BacktestedStatistics._from_stats(new_df, self._allocation_namedtuple, self._to_allocation_symbols_and_value)
def _generate_to_allocation_symbols_and_value_method(allocation_symbols, data_type): def to_allocation_symbols_and_value(row): res = row[allocation_symbols] res[data_type] = row[row.name] return res return to_allocation_symbols_and_value def _get_statistics_by_allocation(portfolio_values_by_allocations, statistics: StatList, use_progressbar): return statistics_by_allocation def _get_statistics(portfolio_values: pd.Series, statistics: StatList) -> pd.Series: statistics = list(map(lambda stat: _typecheck_series( stat) if callable(stat) else stat, statistics)) statistics = portfolio_values.agg(statistics) statistics.index.name = "Statistic" return statistics def _typecheck_series(func): @wraps(func) def wrapper(series): if isinstance(series, pd.Series): return func(series) raise TypeError return wrapper