import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import empyrical as ep
import alphalens as al
import pyfolio as pf
from quantopian.pipeline.data import EquityPricing
from quantopian.research.experimental import get_factor_returns, get_factor_loadings
bt = get_backtest('5e10e63ee0e324554f318194')
results = bt.pyfolio_positions.drop('cash', axis=1)
results.columns = results.columns.str.split('-').to_series().apply(lambda x: int(x[1]))
results = results.div(results.abs().sum(axis=1), axis=0)
results = results.stack().dropna()
Load pricing data:
start = results.index.levels[0][0]
end = results.index.levels[0][-1] + pd.Timedelta(days=30)
end_to_today = end - pd.Timestamp.today(tz='UTC')
# If date goes into the future, cut it
if end_to_today.days > 2:
end = pd.Timestamp.today(tz='UTC') - pd.offsets.BDay() * 2
end = end.normalize()
assets = results.index.levels[1]
pricing = get_pricing(assets, start, end, fields="close_price")
stock_rets = pricing.pct_change()
# Load risk factor loadings and returns
factor_loadings = get_factor_loadings(assets, start, end + pd.Timedelta(days=30))
factor_returns = get_factor_returns(start, end + pd.Timedelta(days=30))
# Fix a bug in the risk returns
factor_returns.loc[factor_returns.value.idxmax(), 'value'] = 0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import empyrical as ep
import alphalens as al
import pyfolio as pf
def calc_perf_attrib(portfolio_returns, portfolio_pos, factor_returns, factor_loadings):
import empyrical as ep
start = portfolio_returns.index[0]
end = portfolio_returns.index[-1]
factor_loadings.index = factor_loadings.index.set_names(['dt', 'ticker'])
portfolio_pos.index = portfolio_pos.index.set_names(['dt'])
portfolio_pos = portfolio_pos.drop('cash', axis=1)
portfolio_pos.columns.name = 'ticker'
portfolio_pos.columns = portfolio_pos.columns.astype('int')
return ep.perf_attrib(
portfolio_returns,
portfolio_pos.stack().dropna(),
factor_returns.loc[start:end],
factor_loadings.loc[start:end])
def plot_exposures(risk_exposures, ax=None):
rep = risk_exposures.stack().reset_index()
rep.columns = ['dt', 'factor', 'exposure']
sns.boxplot(x='exposure', y='factor', data=rep, orient='h', ax=ax, order=risk_exposures.columns[::-1])
def compute_turnover(df):
return df.dropna().unstack().dropna(how='all').fillna(0).diff().abs().sum(1)
def get_max_median_position_concentration(expos):
longs = expos.loc[expos > 0]
shorts = expos.loc[expos < 0]
return expos.groupby(level=0).quantile([.05, .25, .5, .75, .95]).unstack()
# alloc_summary = pd.DataFrame()
# alloc_summary['95_long'] = longs.perc(95, axis=1)
# alloc_summary['75_long'] = longs.perc(75, axis=1)
# alloc_summary['median_long'] = longs.median(axis=1)
# alloc_summary['median_short'] = shorts.median(axis=1)
# alloc_summary['25_short'] = shorts.perc(25, axis=1)
# alloc_summary['5_short'] = shorts.perc(5, axis=1)
# return alloc_summary
def compute_factor_stats(factor, pricing, factor_returns, factor_loadings, periods=range(1, 15), view=None):
factor_data_total = al.utils.get_clean_factor_and_forward_returns(
factor,
pricing,
quantiles=None,
bins=(-np.inf, 0, np.inf),
periods=periods,
cumulative_returns=False,
)
portfolio_returns_total = al.performance.factor_returns(factor_data_total)
portfolio_returns_total.columns = portfolio_returns_total.columns.map(lambda x: int(x[:-1]))
for i in portfolio_returns_total.columns:
portfolio_returns_total[i] = portfolio_returns_total[i].shift(i)
#portfolio_returns_specific = al.performance.factor_returns(factor_data_specific)
#portfolio_returns_specific.columns = portfolio_returns_specific.columns.map(lambda x: int(x[:-1]))
#for i in portfolio_returns_specific.columns:
# portfolio_returns_specific[i] = portfolio_returns_specific[i].shift(i)
portfolio_returns_specific = pd.DataFrame(columns=portfolio_returns_total.columns, index=portfolio_returns_total.index)
# closure
def calc_perf_attrib_c(i, portfolio_returns_total=portfolio_returns_total,
factor_data_total=factor_data_total, factor_returns=factor_returns,
factor_loadings=factor_loadings):
return calc_perf_attrib(portfolio_returns_total[i],
factor_data_total['factor'].unstack().assign(cash=0).shift(i),
factor_returns, factor_loadings)
if view is None:
perf_attrib = map(calc_perf_attrib_c, portfolio_returns_total.columns)
else:
perf_attrib = view.map_sync(calc_perf_attrib_c, portfolio_returns_total.columns)
for i, pa in enumerate(perf_attrib):
if i == 0:
risk_exposures_portfolio = pa[0]
perf_attribution = pa[1]
portfolio_returns_specific[i + 1] = pa[1]['specific_returns']
delay_sharpes_total = portfolio_returns_total.apply(ep.sharpe_ratio)
delay_sharpes_specific = portfolio_returns_specific.apply(ep.sharpe_ratio)
turnover = compute_turnover(factor)
n_holdings = factor.groupby(level=0).count()
perc_holdings = get_max_median_position_concentration(factor)
return {'factor_data_total': factor_data_total,
'portfolio_returns_total': portfolio_returns_total,
'portfolio_returns_specific': portfolio_returns_specific,
'risk_exposures_portfolio': risk_exposures_portfolio,
'perf_attribution': perf_attribution,
'delay_sharpes_total': delay_sharpes_total,
'delay_sharpes_specific': delay_sharpes_specific,
'turnover': turnover,
'n_holdings': n_holdings,
'perc_holdings': perc_holdings,
}
def plot_overview_tear_sheet(factor, pricing, factor_returns, factor_loadings, periods=range(1, 15), view=None):
fig = plt.figure(figsize=(16, 16))
gs = plt.GridSpec(5, 4)
ax1 = plt.subplot(gs[0:2, 0:2])
factor_stats = compute_factor_stats(factor, pricing, factor_returns, factor_loadings, periods=periods, view=view)
pd.DataFrame({'specific': factor_stats['delay_sharpes_specific'],
'total': factor_stats['delay_sharpes_total']}).plot.bar(ax=ax1)
ax1.set(xlabel='delay', ylabel='IR')
ax2a = plt.subplot(gs[0, 2:4])
delay_cum_rets_total = factor_stats['portfolio_returns_total'][list(range(1, 5))].apply(ep.cum_returns)
delay_cum_rets_total.plot(ax=ax2a)
ax2a.set(title='Total returns', ylabel='Cumulative returns')
ax2b = plt.subplot(gs[1, 2:4])
delay_cum_rets_specific = factor_stats['portfolio_returns_specific'][list(range(1, 5))].apply(ep.cum_returns)
delay_cum_rets_specific.plot(ax=ax2b)
ax2b.set(title='Specific returns', ylabel='Cumulative returns')
ax3 = plt.subplot(gs[2:4, 0:2])
plot_exposures(factor_stats['risk_exposures_portfolio'].reindex(columns=factor_stats['perf_attribution'].columns),
ax=ax3)
ax4 = plt.subplot(gs[2:4, 2])
ep.cum_returns_final(factor_stats['perf_attribution']).plot.barh(ax=ax4)
ax4.set(xlabel='Cumulative returns')
ax5 = plt.subplot(gs[2:4, 3], sharey=ax4)
factor_stats['perf_attribution'].apply(ep.annual_volatility).plot.barh(ax=ax5)
ax5.set(xlabel='Ann. volatility')
ax6 = plt.subplot(gs[-1, 0:2])
factor_stats['n_holdings'].plot(color='b', ax=ax6)
ax6.set_ylabel('# holdings', color='b')
ax6.tick_params(axis='y', labelcolor='b')
ax62 = ax6.twinx()
factor_stats['turnover'].plot(color='r', ax=ax62)
ax62.set_ylabel('turnover', color='r')
ax62.tick_params(axis='y', labelcolor='r')
ax7 = plt.subplot(gs[-1, 2:4])
factor_stats['perc_holdings'].plot(ax=ax7)
ax7.set(ylabel='Long/short perc holdings')
gs.tight_layout(fig)
return fig, factor_stats
_, factor_stats = plot_overview_tear_sheet(results,
pricing,
factor_returns,
factor_loadings);