from __future__ import division
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import itertools
import sklearn.covariance
from sklearn import preprocessing
import scipy as sp
import scipy.stats as stats
import time
import datetime
import statsmodels.api as sm
import statsmodels.tsa.stattools as ts
# load SPY and IEF to use as benchmarks in some plots. SPY will also be used to compute portfolio beta
# also load some stock data to show how to generate single-stock performance tearsheets at the end of notebook
symbol_list = ['SPY', 'IEF', 'AAPL', 'EFA', 'EEM']
securities_panel = get_pricing(symbol_list, fields=['price']
, start_date='2000-01-01', end_date='2015-06-11')
securities_panel.minor_axis = map(lambda x: x.symbol, securities_panel.minor_axis)
benchmark = securities_panel.loc['price']['SPY'].dropna()
benchmark_rets = benchmark.pct_change().dropna()
benchmark2 = securities_panel.loc['price']['IEF'].dropna()
benchmark2_rets = benchmark2.pct_change().dropna()
def extract_date(timestamp):
return str.split(str(timestamp))[0]
##### TIMESERIES ANALYSIS FUNCTIONS - Part 1
def var_cov_var_normal(P, c, mu=0, sigma=1, **kwargs):
"""
Variance-Covariance calculation of daily Value-at-Risk
using confidence level c, with mean of returns mu
and standard deviation of returns sigma, on a portfolio
of value P.
"""
alpha = sp.stats.norm.ppf(1-c, mu, sigma)
return P - P*(alpha + 1)
def cum_returns(df, withStartingValue=None):
if withStartingValue is None:
return np.exp(np.log(1 + df).cumsum()) - 1
else:
return np.exp(np.log(1 + df).cumsum()) * withStartingValue
def aggregate_returns(df_daily_rets, convert_to):
cumulate_returns = lambda x: cum_returns(x)[-1]
if convert_to == 'daily':
return df_daily_rets
elif convert_to == 'weekly':
return df_daily_rets.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.isocalendar()[1]]).apply(cumulate_returns)
elif convert_to == 'monthly':
return df_daily_rets.groupby([lambda x: x.year, lambda x: x.month]).apply(cumulate_returns)
elif convert_to == 'yearly':
return df_daily_rets.groupby([lambda x: x.year]).apply(cumulate_returns)
else:
ValueError('convert_to must be daily, weekly, monthly or yearly')
def max_drawdown(ts, inputIsNAV=True):
if ts.size < 1:
return np.nan
if inputIsNAV:
temp_ts = ts
else:
temp_ts = cum_returns(ts, withStartingValue=100)
MDD = 0
DD = 0
peak = -99999
for value in temp_ts:
if (value > peak):
peak = value
else:
DD = (peak - value) / peak
if (DD > MDD):
MDD = DD
return -1 * MDD
def annual_return(ts, inputIsNAV=True, style='calendar'):
# if style == 'compound' then return will be calculated in geometric terms: (1+mean(all_daily_returns))^252 - 1
# if style == 'calendar' then return will be calculated as ((last_value - start_value)/start_value)/num_of_years
# if style == 'arithmetic' then return is simply
# mean(all_daily_returns)*252
if ts.size < 1:
return np.nan
if inputIsNAV:
tempReturns = ts.pct_change().dropna()
if style == 'calendar':
num_years = len(tempReturns) / 252
start_value = ts[0]
end_value = ts[-1]
return ((end_value - start_value) / start_value) / num_years
if style == 'compound':
return pow((1 + tempReturns.mean()), 252) - 1
else:
return tempReturns.mean() * 252
else:
if style == 'calendar':
num_years = len(ts) / 252
temp_NAV = cum_returns(ts, withStartingValue=100)
start_value = temp_NAV[0]
end_value = temp_NAV[-1]
return ((end_value - start_value) / start_value) / num_years
if style == 'compound':
return pow((1 + ts.mean()), 252) - 1
else:
return ts.mean() * 252
def annual_volatility(ts, inputIsNAV=True):
if ts.size < 2:
return np.nan
if inputIsNAV:
tempReturns = ts.pct_change().dropna()
return tempReturns.std() * np.sqrt(252)
else:
return ts.std() * np.sqrt(252)
def calmar_ratio(ts, inputIsNAV=True, returns_style='calendar'):
temp_max_dd = max_drawdown(ts=ts, inputIsNAV=inputIsNAV)
# print(temp_max_dd)
if temp_max_dd < 0:
if inputIsNAV:
temp = annual_return(ts=ts,
inputIsNAV=True,
style=returns_style) / abs(max_drawdown(ts=ts,
inputIsNAV=True))
else:
tempNAV = cum_returns(ts, withStartingValue=100)
temp = annual_return(ts=tempNAV,
inputIsNAV=True,
style=returns_style) / abs(max_drawdown(ts=tempNAV,
inputIsNAV=True))
else:
return np.nan
if np.isinf(temp):
return np.nan
else:
return temp
def sharpe_ratio(ts, inputIsNAV=True, returns_style='calendar'):
return annual_return(ts,
inputIsNAV=inputIsNAV,
style=returns_style) / annual_volatility(ts,
inputIsNAV=inputIsNAV)
def stability_of_timeseries(ts, logValue=True, inputIsNAV=True):
if ts.size < 2:
return np.nan
if logValue:
if inputIsNAV:
tempValues = np.log10(ts.values)
tsLen = ts.size
else:
temp_ts = cum_returns(ts, withStartingValue=100)
tempValues = np.log10(temp_ts.values)
tsLen = temp_ts.size
else:
if inputIsNAV:
tempValues = ts.values
tsLen = ts.size
else:
temp_ts = cum_returns(ts, withStartingValue=100)
tempValues = temp_ts.values
tsLen = temp_ts.size
X = range(0, tsLen)
X = sm.add_constant(X)
model = sm.OLS(tempValues, X).fit()
return model.rsquared
def calc_multifactor(df_rets, factors):
import statsmodels.api as sm
factors = factors.loc[df_rets.index]
factors = sm.add_constant(factors)
factors = factors.dropna(axis=0)
results = sm.OLS(df_rets[factors.index], factors).fit()
return results.params
def rolling_multifactor_beta(ser, multi_factor_df, rolling_window=63):
results = [ calc_multifactor( ser[beg:end], multi_factor_df)
for beg,end in zip(ser.index[0:-rolling_window],ser.index[rolling_window:]) ]
return pd.DataFrame(index=ser.index[rolling_window:], data=results)
def multi_factor_alpha( factors_ts_list, single_ts, factor_names_list, input_is_returns=False
, annualized=False, annualize_factor=252, show_output=False):
factors_ts = [ i.asfreq(freq='D',normalize=True) for i in factors_ts_list ]
dep_var = single_ts.asfreq(freq='D',normalize=True)
if not input_is_returns:
factors_ts = [ i.pct_change().dropna() for i in factors_ts ]
dep_var = dep_var.pct_change().dropna()
factors_align = pd.DataFrame( factors_ts ).T.dropna()
factors_align.columns = factor_names_list
if show_output:
print factors_align.head(5)
print dep_var.head(5)
if dep_var.shape[0] < 2:
return np.nan
if factors_align.shape[0] < 2:
return np.nan
factor_regress = pd.ols(y=dep_var, x=factors_align, intercept=True)
factor_alpha = factor_regress.summary_as_matrix.intercept.beta
if show_output:
print factor_regress.resid
print factor_regress.summary_as_matrix
if annualized:
return factor_alpha * annualize_factor
else:
return factor_alpha
def calc_alpha_beta(df_rets,
benchmark_rets,
startDate=None, endDate=None,
return_beta_only=False,
inputs_are_returns=True,
normalize=False, remove_zeros=False):
if not inputs_are_returns:
df_rets = df_rets.pct_change().dropna()
benchmark_rets = benchmark_rets.pct_change().dropna()
if startDate != None:
df_rets = df_rets[startDate:]
if endDate != None:
df_rets = df_rets[:endDate]
if df_rets.ndim == 1:
if remove_zeros:
df_rets = df_rets[df_rets != 0]
if normalize:
ret_index = df_rets.index.normalize()
else:
ret_index = df_rets.index
beta, alpha = sp.stats.linregress(benchmark_rets.loc[ret_index].values,
df_rets.values)[:2]
if df_rets.ndim == 2:
beta = pd.Series(index=df_rets.columns)
alpha = pd.Series(index=df_rets.columns)
for algo_id in df_rets:
df = df_rets[algo_id]
if remove_zeros:
df = df[df != 0]
if normalize:
ret_index = df.index.normalize()
else:
ret_index = df.index
beta[algo_id], alpha[algo_id] = sp.stats.linregress(benchmark_rets.loc[ret_index].values,
df.values)[:2]
alpha.name = 'alpha'
beta.name = 'beta'
if return_beta_only:
return beta
else:
return alpha * 252, beta
def rolling_beta(ser, benchmark_rets, rolling_window=63):
results = [ calc_alpha_beta( ser[beg:end], benchmark_rets, return_beta_only=True, normalize=True)
for beg,end in zip(ser.index[0:-rolling_window],ser.index[rolling_window:]) ]
return pd.Series(index=ser.index[rolling_window:], data=results)
def out_of_sample_vs_in_sample_returns_kde(bt_ts, oos_ts,
transform_style='scale',
return_zero_if_exception=True):
bt_ts_pct = bt_ts.pct_change().dropna()
oos_ts_pct = oos_ts.pct_change().dropna()
bt_ts_r = bt_ts_pct.reshape(len(bt_ts_pct),1)
oos_ts_r = oos_ts_pct.reshape(len(oos_ts_pct),1)
if transform_style == 'raw':
bt_scaled = bt_ts_r
oos_scaled = oos_ts_r
if transform_style == 'scale':
bt_scaled = preprocessing.scale(bt_ts_r, axis=0)
oos_scaled = preprocessing.scale(oos_ts_r, axis=0)
if transform_style == 'normalize_L2':
bt_scaled = preprocessing.normalize(bt_ts_r, axis=1)
oos_scaled = preprocessing.normalize(oos_ts_r, axis=1)
if transform_style == 'normalize_L1':
bt_scaled = preprocessing.normalize(bt_ts_r, axis=1, norm='l1')
oos_scaled = preprocessing.normalize(oos_ts_r, axis=1, norm='l1')
X_train = bt_scaled
X_test = oos_scaled
X_train = X_train.reshape(len(X_train))
X_test = X_test.reshape(len(X_test))
x_axis_dim = np.linspace(-4, 4, 100)
kernal_method = 'scott'
try:
scipy_kde_train = stats.gaussian_kde(X_train, bw_method=kernal_method)(x_axis_dim)
scipy_kde_test = stats.gaussian_kde(X_test, bw_method=kernal_method)(x_axis_dim)
except:
if return_zero_if_exception:
return 0.0
else:
return np.nan
kde_diff = sum(abs(scipy_kde_test - scipy_kde_train)) / (sum(scipy_kde_train) + sum(scipy_kde_test))
return kde_diff
def perf_stats(
ts,
inputIsNAV=True,
returns_style='compound',
return_as_dict=False):
all_stats = {}
all_stats['annual_return'] = annual_return(
ts,
inputIsNAV=inputIsNAV,
style=returns_style)
all_stats['annual_volatility'] = annual_volatility(
ts,
inputIsNAV=inputIsNAV)
all_stats['sharpe_ratio'] = sharpe_ratio(
ts,
inputIsNAV=inputIsNAV,
returns_style=returns_style)
all_stats['calmar_ratio'] = calmar_ratio(
ts,
inputIsNAV=inputIsNAV,
returns_style=returns_style)
all_stats['stability'] = stability_of_timeseries(ts, inputIsNAV=inputIsNAV)
all_stats['max_drawdown'] = max_drawdown(ts, inputIsNAV=inputIsNAV)
if return_as_dict:
return all_stats
else:
all_stats_df = pd.DataFrame(
index=all_stats.keys(),
data=all_stats.values())
all_stats_df.columns = ['perf_stats']
return all_stats_df
##### TIMESERIES ANALYSIS FUNCTIONS - Part 2
def get_max_draw_down_underwater(underwater):
valley = np.argmax(underwater) # end of the period
# Find first 0
peak = underwater[:valley][underwater[:valley] == 0].index[-1]
# Find last 0
try:
recovery = underwater[valley:][underwater[valley:] == 0].index[0]
except IndexError:
recovery = np.nan # drawdown not recovered
return peak, valley, recovery
def get_max_draw_down(df_rets):
df_rets = df_rets.copy()
df_cum = cum_returns(df_rets, 1.0)
running_max = np.maximum.accumulate(df_cum)
underwater = (running_max - df_cum) / running_max
#import pdb; pdb.set_trace()
return get_max_draw_down_underwater(underwater)
def get_top_draw_downs(df_rets, top=10):
df_rets = df_rets.copy()
df_cum = cum_returns(df_rets, 1.0)
running_max = np.maximum.accumulate(df_cum)
underwater = running_max - df_cum
drawdowns = []
for t in range(top):
peak, valley, recovery = get_max_draw_down_underwater(underwater)
# Slice out draw-down period
if not pd.isnull(recovery):
underwater = pd.concat(
[underwater.loc[:peak].iloc[:-1], underwater.loc[recovery:].iloc[1:]])
else:
# drawdown has not ended yet
underwater = underwater.loc[:peak]
drawdowns.append((peak, valley, recovery))
if len(df_rets) == 0:
break
return drawdowns
def gen_drawdown_table(df_rets, top=10):
df_cum = cum_returns(df_rets, 1.0)
drawdown_periods = get_top_draw_downs(df_rets, top=top)
df_drawdowns = pd.DataFrame(index=range(top), columns=['net drawdown in %',
'peak date',
'valley date',
'recovery date',
'duration'])
for i, (peak, valley, recovery) in enumerate(drawdown_periods):
if pd.isnull(recovery):
df_drawdowns.loc[i, 'duration'] = np.nan
else:
df_drawdowns.loc[i, 'duration'] = len(pd.date_range(peak,
recovery,
freq='B'))
df_drawdowns.loc[i, 'peak date'] = peak
df_drawdowns.loc[i, 'valley date'] = valley
df_drawdowns.loc[i, 'recovery date'] = recovery
df_drawdowns.loc[i, 'net drawdown in %'] = np.round( (
(df_cum.loc[peak] - df_cum.loc[valley]) / df_cum.loc[peak]) * 100, 2 )
df_drawdowns['peak date'] = pd.to_datetime(
df_drawdowns['peak date'],
unit='D')
df_drawdowns['valley date'] = pd.to_datetime(
df_drawdowns['valley date'],
unit='D')
df_drawdowns['recovery date'] = pd.to_datetime(
df_drawdowns['recovery date'],
unit='D')
return df_drawdowns
def rolling_sharpe(df_rets, rolling_sharpe_window):
return pd.rolling_mean(df_rets, rolling_sharpe_window) / pd.rolling_std(df_rets, rolling_sharpe_window) * np.sqrt(252)
def cone_rolling(input_rets, num_stdev=1.0, warm_up_days_pct=0.5, std_scale_factor=252,
update_std_oos_rolling=False, cone_fit_end_date=None, extend_fit_trend=True, create_future_cone=True):
# if specifying 'cone_fit_end_date' please use a pandas compatible format, e.g. '2015-8-4', 'YYYY-MM-DD'
warm_up_days = int(warm_up_days_pct*input_rets.size)
# create initial linear fit from beginning of timeseries thru warm_up_days or the specified 'cone_fit_end_date'
if cone_fit_end_date is None:
df_rets = input_rets[:warm_up_days]
else:
df_rets = input_rets[ input_rets.index < cone_fit_end_date]
perf_ts = cum_returns(df_rets, 1)
X = range(0, perf_ts.size)
X = sm.add_constant(X)
sm.OLS(perf_ts , range(0,len(perf_ts)))
line_ols = sm.OLS(perf_ts.values , X).fit()
fit_line_ols_coef = line_ols.params[1]
fit_line_ols_inter = line_ols.params[0]
x_points = range(0, perf_ts.size)
x_points = np.array(x_points) * fit_line_ols_coef + fit_line_ols_inter
perf_ts_r = pd.DataFrame(perf_ts)
perf_ts_r.columns = ['perf']
warm_up_std_pct = np.std(perf_ts.pct_change().dropna())
std_pct = warm_up_std_pct * np.sqrt(std_scale_factor)
perf_ts_r['line'] = x_points
perf_ts_r['sd_up'] = perf_ts_r['line'] * ( 1 + num_stdev * std_pct )
perf_ts_r['sd_down'] = perf_ts_r['line'] * ( 1 - num_stdev * std_pct )
std_pct = warm_up_std_pct * np.sqrt(std_scale_factor)
last_backtest_day_index = df_rets.index[-1]
cone_end_rets = input_rets[ input_rets.index > last_backtest_day_index ]
new_cone_day_scale_factor = int(1)
oos_intercept_shift = perf_ts_r.perf[-1] - perf_ts_r.line[-1]
# make the cone for the out-of-sample/live papertrading period
for i in cone_end_rets.index:
df_rets = input_rets[:i]
perf_ts = cum_returns(df_rets, 1)
if extend_fit_trend:
line_ols_coef = fit_line_ols_coef
line_ols_inter = fit_line_ols_inter
else:
X = range(0, perf_ts.size)
X = sm.add_constant(X)
sm.OLS(perf_ts , range(0,len(perf_ts)))
line_ols = sm.OLS(perf_ts.values , X).fit()
line_ols_coef = line_ols.params[1]
line_ols_inter = line_ols.params[0]
x_points = range(0, perf_ts.size)
x_points = np.array(x_points) * line_ols_coef + line_ols_inter + oos_intercept_shift
temp_line = x_points
if update_std_oos_rolling:
#std_pct = np.sqrt(std_scale_factor) * np.std(perf_ts.pct_change().dropna())
std_pct = np.sqrt(new_cone_day_scale_factor) * np.std(perf_ts.pct_change().dropna())
else:
std_pct = np.sqrt(new_cone_day_scale_factor) * warm_up_std_pct
temp_sd_up = temp_line * ( 1 + num_stdev * std_pct )
temp_sd_down = temp_line * ( 1 - num_stdev * std_pct )
new_daily_cone = pd.DataFrame(index=[i], data={'perf':perf_ts[i],
'line':temp_line[-1],
'sd_up':temp_sd_up[-1],
'sd_down':temp_sd_down[-1] } )
perf_ts_r = perf_ts_r.append(new_daily_cone)
new_cone_day_scale_factor+=1
if create_future_cone:
extend_ahead_days = 252
future_cone_dates = pd.date_range(cone_end_rets.index[-1], periods=extend_ahead_days, freq='B')
future_cone_intercept_shift = perf_ts_r.perf[-1] - perf_ts_r.line[-1]
future_days_scale_factor = np.linspace(1,extend_ahead_days,extend_ahead_days)
std_pct = np.sqrt(future_days_scale_factor) * warm_up_std_pct
x_points = range(perf_ts.size, perf_ts.size + extend_ahead_days)
x_points = np.array(x_points) * line_ols_coef + line_ols_inter + oos_intercept_shift + future_cone_intercept_shift
temp_line = x_points
temp_sd_up = temp_line * ( 1 + num_stdev * std_pct )
temp_sd_down = temp_line * ( 1 - num_stdev * std_pct )
future_cone = pd.DataFrame(index=map( np.datetime64, future_cone_dates ), data={'perf':temp_line,
'line':temp_line,
'sd_up':temp_sd_up,
'sd_down':temp_sd_down } )
perf_ts_r = perf_ts_r.append(future_cone)
return perf_ts_r
# PLOTTING FUNCTIONS
def plot_cone_chart(cone_df, warm_up_days_pct, lines_to_plot=['line'], in_sample_color='grey', oos_color='coral', plot_cone_lines=True ):
if plot_cone_lines:
cone_df[lines_to_plot].plot(alpha=0.3, color='k', ls='-', lw=2, label='')
warm_up_x_end = int(len(cone_df)*warm_up_days_pct)
plt.fill_between(cone_df.index[:warm_up_x_end], cone_df.sd_down[:warm_up_x_end], cone_df.sd_up[:warm_up_x_end], color=in_sample_color, alpha=0.15)
plt.fill_between(cone_df.index[warm_up_x_end:], cone_df.sd_down[warm_up_x_end:], cone_df.sd_up[warm_up_x_end:], color=oos_color, alpha=0.15)
def plot_calendar_returns_info_graphic(daily_rets_ts, x_dim=15, y_dim=6):
ann_ret_df = pd.DataFrame( aggregate_returns(daily_rets_ts, 'yearly') )
monthly_ret_table = aggregate_returns(daily_rets_ts, 'monthly')
monthly_ret_table = monthly_ret_table.unstack()
monthly_ret_table = np.round(monthly_ret_table, 3)
fig = plt.figure(figsize=(21,6))
ax1 = fig.add_subplot(1,3,1)
sns.heatmap(monthly_ret_table.fillna(0)*100.0, annot=True, annot_kws={"size": 12}, alpha=1.0, center=0.0, cbar=False, cmap='RdYlGn')
ax1.set_ylabel(' ')
ax1.set_xlabel("Monthly Returns (%)")
ax2 = fig.add_subplot(1,3,2)
# sns.barplot(ann_ret_df.index, ann_ret_df[0], ci=None)
ax2.axvline(100*ann_ret_df.values.mean(), color='steelblue',linestyle='--', lw=4, alpha=0.7)
(100*ann_ret_df.sort_index(ascending=False)).plot(ax=ax2, kind='barh', alpha=0.70)
ax2.axvline(0.0, color='black',linestyle='-', lw=3)
# sns.heatmap(ann_ret_df*100.0, annot=True, annot_kws={"size": 14, "weight":'bold'}, center=0.0, cbar=False, cmap=matplotlib.cm.RdYlGn)
ax2.set_ylabel(' ')
ax2.set_xlabel("Annual Returns (%)")
ax2.legend(['mean'])
ax3 = fig.add_subplot(1,3,3)
ax3.hist(100*monthly_ret_table.dropna().values.flatten(), color='orangered', alpha=0.80, bins=20)
#sns.distplot(100*monthly_ret_table.values.flatten(), bins=20, ax=ax3, color='orangered', kde=True)
ax3.axvline(100*monthly_ret_table.dropna().values.flatten().mean(), color='gold',linestyle='--', lw=4, alpha=1.0)
ax3.axvline(0.0, color='black',linestyle='-', lw=3, alpha=0.75)
ax3.legend(['mean'])
ax3.set_xlabel("Distribution of Monthly Returns (%)")
def plot_drawdowns(df_rets, top=10):
df_drawdowns = gen_drawdown_table(df_rets)
df_cum = cum_returns(df_rets, 1.0)
running_max = np.maximum.accumulate(df_cum)
underwater = -100 * ( (running_max - df_cum) / running_max )
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(13, 6))
(df_cum).plot(ax=ax1)
(underwater).plot(ax=ax2, kind='area', color='coral', alpha=0.7)
lim = ax1.get_ylim()
colors = sns.cubehelix_palette(len(df_drawdowns))[::-1]
for i, (peak, recovery) in df_drawdowns[
['peak date', 'recovery date']].iterrows():
if pd.isnull(recovery):
recovery = df_rets.index[-1]
ax1.fill_between((peak, recovery),
lim[0],
lim[1],
alpha=.4,
color=colors[i])
plt.suptitle('Top %i draw down periods' % top)
ax1.set_ylabel('returns in %')
ax2.set_ylabel('drawdown in %')
ax2.set_title('Underwater plot')
def gen_date_ranges_interesting():
from collections import OrderedDict
periods = OrderedDict()
# Dotcom bubble
periods['Dotcom'] = (pd.Timestamp('20000310'), pd.Timestamp('20000910'))
# Lehmann Brothers
periods['Lehmann'] = (pd.Timestamp('20080801'), pd.Timestamp('20081001'))
# 9/11
periods['9/11'] = (pd.Timestamp('20010911'), pd.Timestamp('20011011'))
# 05/08/11 US down grade and European Debt Crisis 2011
periods['US downgrade/European Debt Crisis'] = (pd.Timestamp('20110805'), pd.Timestamp('20110905'))
# 16/03/11 Fukushima melt down 2011
periods['Fukushima'] = (pd.Timestamp('20110316'), pd.Timestamp('20110416'))
# 01/08/03 US Housing Bubble 2003
periods['US Housing'] = (pd.Timestamp('20030108'), pd.Timestamp('20030208'))
# 06/09/12 EZB IR Event 2012
periods['EZB IR Event'] = (pd.Timestamp('20120910'), pd.Timestamp('20121010'))
#August 2007, March and September of 2008, Q1 & Q2 2009,
periods['Aug07'] = (pd.Timestamp('20070801'), pd.Timestamp('20070901'))
periods['Mar08'] = (pd.Timestamp('20080301'), pd.Timestamp('20070401'))
periods['Sept08'] = (pd.Timestamp('20080901'), pd.Timestamp('20081001'))
periods['2009Q1'] = (pd.Timestamp('20090101'), pd.Timestamp('20090301'))
periods['2009Q2'] = (pd.Timestamp('20090301'), pd.Timestamp('20090601'))
#Flash Crash (May 6, 2010 + 1 week post),
periods['Flash Crash'] = (pd.Timestamp('20100505'), pd.Timestamp('20100510'))
# April and October 2014).
periods['Apr14'] = (pd.Timestamp('20140401'), pd.Timestamp('20140501'))
periods['Oct14'] = (pd.Timestamp('20141001'), pd.Timestamp('20141101'))
return periods
def extract_interesting_date_ranges(df):
from collections import OrderedDict
periods = gen_date_ranges_interesting()
df_ts = df.copy()
df_ts.index = df_ts.index.map(pd.Timestamp)
ranges = OrderedDict()
for name, (start, end) in periods.iteritems():
period = df_ts.loc[start:end]
if len(period) == 0:
continue
ranges[name] = period
return ranges
def analyze_single_algo( df_rets, algo_live_date=None, cone_std=1.0 ):
future_cone_stdev = cone_std
plt.figure(figsize=(13,6))
algo_ts = (df_rets + 1).cumprod()
print "Entire data start date: " + str(algo_ts.index[0])
print "Entire data end date: " + str(algo_ts.index[-1])
fig = plt.figure(figsize=(13,8))
ax = fig.add_subplot(111)
cum_returns(benchmark_rets[algo_ts.index], 1.0).plot(ax=ax, lw=2, color='gray', label='', alpha=0.65)
cum_returns(benchmark2_rets[algo_ts.index], 1.0).plot(ax=ax, lw=2, color='gray', label='', alpha=0.45)
# just 'pretend' that the 'live' algo out-of-sample returns starts at X% of the whole history
# where X% is specified by the variable 'warm_up_days_pct'
if algo_live_date is None:
warm_up_days_pct = 0.5
algo_create_date = df_rets.index[ int(len(df_rets)*warm_up_days_pct) ]
else:
algo_create_date = algo_live_date
df_rets_backtest = df_rets[ df_rets.index < algo_create_date]
df_rets_live = df_rets[ df_rets.index > algo_create_date]
print 'Out-of-Sample Months: ' + str( int( len(df_rets_live) / 21) )
print 'Backtest Months: ' + str( int( len(df_rets_backtest) / 21) )
perf_stats_backtest = np.round(perf_stats(df_rets_backtest, inputIsNAV=False, returns_style='arithmetic'), 2)
perf_stats_backtest_ab = np.round(calc_alpha_beta(df_rets_backtest, benchmark_rets), 2)
perf_stats_backtest.loc['alpha'] = perf_stats_backtest_ab[0]
perf_stats_backtest.loc['beta'] = perf_stats_backtest_ab[1]
perf_stats_backtest.columns = ['Backtest']
perf_stats_live = np.round(perf_stats(df_rets_live, inputIsNAV=False, returns_style='arithmetic'), 2)
perf_stats_live_ab = np.round(calc_alpha_beta(df_rets_live, benchmark_rets), 2)
perf_stats_live.loc['alpha'] = perf_stats_live_ab[0]
perf_stats_live.loc['beta'] = perf_stats_live_ab[1]
perf_stats_live.columns = ['Out_of_Sample']
perf_stats_all = np.round(perf_stats(df_rets, inputIsNAV=False, returns_style='arithmetic'), 2)
perf_stats_all_ab = np.round(calc_alpha_beta(df_rets, benchmark_rets), 2)
perf_stats_all.loc['alpha'] = perf_stats_all_ab[0]
perf_stats_all.loc['beta'] = perf_stats_all_ab[1]
perf_stats_all.columns = ['All_History']
perf_stats_both = perf_stats_backtest.join(perf_stats_live, how='inner')
perf_stats_both = perf_stats_both.join(perf_stats_all, how='inner')
print( perf_stats_both )
algo_ts[:algo_create_date].plot(lw=3, color='forestgreen', label='', alpha=0.6)
algo_ts[algo_create_date:].plot(lw=4, color='red', label='', alpha=0.6)
cone_df = cone_rolling(df_rets, num_stdev=future_cone_stdev, cone_fit_end_date=algo_create_date)
cone_df_fit = cone_df[ cone_df.index < algo_create_date]
cone_df_live = cone_df[ cone_df.index > algo_create_date]
cone_df_live = cone_df_live[ cone_df_live.index < df_rets.index[-1] ]
cone_df_future = cone_df[ cone_df.index > df_rets.index[-1] ]
cone_df_fit['line'].plot(ax=ax, ls='--', lw=2, color='forestgreen', alpha=0.7)
cone_df_live['line'].plot(ax=ax, ls='--', lw=2, color='red', alpha=0.7)
cone_df_future['line'].plot(ax=ax, ls='--', lw=2, color='navy', alpha=0.7)
ax.fill_between(cone_df_live.index,
cone_df_live.sd_down,
cone_df_live.sd_up,
color='red', alpha=0.30)
ax.fill_between(cone_df_future.index,
cone_df_future.sd_down,
cone_df_future.sd_up,
color='navy', alpha=0.25)
ax.legend(['S&P500', '7-10yr Bond', 'Algo Backtest', 'Algo LIVE' ], loc='upper left')
plt.axhline(1.0 , linestyle='-', color='black', lw=2)
plt.ylabel('Cumulative returns', fontsize=14)
plt.xlim( (algo_ts.index[0], cone_df.index[-1]) )
# Now plot the rolling beta of the portfolios
rolling_beta_window = 63
fig = plt.figure(figsize=(13,3))
ax = fig.add_subplot(111)
#ax.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(y_axis_formatter))
plt.title("Rolling Portfolio Beta to SP500",fontsize=16)
plt.ylabel('beta', fontsize=14)
rb_1 = rolling_beta(df_rets, benchmark_rets, rolling_window=rolling_beta_window*2)
rb_1.plot(color='steelblue', lw=3, alpha=0.6, ax=ax)
rb_2 = rolling_beta(df_rets, benchmark_rets, rolling_window=rolling_beta_window*3)
rb_2.plot(color='grey', lw=3, alpha=0.4, ax=ax)
plt.xlim( (algo_ts.index[0], cone_df.index[-1]) )
plt.ylim( (-2.5, 2.5) )
plt.axhline(rb_1.mean(), color='steelblue', linestyle='--', lw=3)
plt.axhline(0.0, color='black', linestyle='-', lw=2)
plt.fill_between(cone_df_future.index,
rb_1.mean() + future_cone_stdev*np.std(rb_1),
rb_1.mean() - future_cone_stdev*np.std(rb_1),
color='steelblue', alpha=0.2)
plt.legend(['6-mo', '12-mo'], 'lower left')
# Rolling sharpe
fig = plt.figure(figsize=(13, 3))
ax = fig.add_subplot(111)
rolling_sharpe_window = 63*2
rolling_sharpe_ts = rolling_sharpe(df_rets, rolling_sharpe_window)
rolling_sharpe_ts.plot(alpha=.7, lw=3, color='orangered')
plt.title('Rolling Sharpe ratio (6-month)', fontsize=16)
plt.axhline(rolling_sharpe_ts.mean(), color='orangered', linestyle='--', lw=3)
plt.axhline(0.0, color='black', linestyle='-', lw=3)
plt.fill_between(cone_df_future.index,
rolling_sharpe_ts.mean() + future_cone_stdev*np.std(rolling_sharpe_ts),
rolling_sharpe_ts.mean() - future_cone_stdev*np.std(rolling_sharpe_ts),
color='orangered', alpha=0.15)
plt.xlim( (algo_ts.index[0], cone_df.index[-1]) )
plt.ylim( (-3.0, 6.0) )
plt.ylabel('Sharpe ratio', fontsize=14)
plot_calendar_returns_info_graphic(df_rets)
plt.figure(figsize=(13,7))
diff_pct = out_of_sample_vs_in_sample_returns_kde(cum_returns(df_rets_backtest , 1.0),
cum_returns(df_rets_live, 1.0) )
consistency_pct = int( 100*(1.0 - diff_pct) )
print "\n" + str(consistency_pct) + "%" + " :Similarity between Backtest vs. Out-of-Sample (daily returns distribution)\n"
sns.kdeplot(preprocessing.scale(df_rets_backtest) , bw='scott', shade=True, label='backtest', color='forestgreen')
sns.kdeplot(preprocessing.scale(df_rets_live) , bw='scott', shade=True, label='out-of-sample', color='red')
plt.title("Daily Returns Similarity = " + str(consistency_pct) + "%" )
dd_table = gen_drawdown_table(df_rets,top=10)
dd_table['peak date'] = map( extract_date, dd_table['peak date'])
dd_table['valley date'] = map( extract_date, dd_table['valley date'])
dd_table['recovery date'] = map( extract_date, dd_table['recovery date'])
dd_table = dd_table.sort('net drawdown in %', ascending=False)
print "\nTop 10 Worst Drawdowns"
print dd_table
plot_drawdowns(df_rets, top=10)
df_weekly = aggregate_returns(df_rets, 'weekly')
df_monthly = aggregate_returns(df_rets, 'monthly')
plt.figure(figsize=(13, 6))
sns.boxplot([df_rets, df_weekly, df_monthly], names=['daily', 'weekly', 'monthly'])
plt.title('Return quantiles')
var_daily = var_cov_var_normal(1e7, .05, df_rets.mean(), df_rets.std())
var_weekly = var_cov_var_normal(1e7, .05, df_weekly.mean(), df_weekly.std())
two_sigma_daily = df_rets.mean() - 2*df_rets.std()
two_sigma_weekly = df_weekly.mean() - 2*df_weekly.std()
var_sigma = pd.Series([two_sigma_daily, two_sigma_weekly],
index=['2-sigma returns daily', '2-sigma returns weekly'])
print np.round(var_sigma, 3)
# Get interesting time periods
rets_interesting = extract_interesting_date_ranges(df_rets)
print '\nStress Events'
print np.round(pd.DataFrame(rets_interesting).describe().transpose().loc[:,['mean','min','max']], 3)
bmark_interesting = extract_interesting_date_ranges(benchmark_rets)
fig = plt.figure(figsize=(28,61))
for i, (name, rets_period) in enumerate(rets_interesting.iteritems()):
ax = fig.add_subplot(6, 3, i+1)
cum_returns(rets_period).plot(ax=ax, color='forestgreen', label='algo', alpha=0.7, lw=2)
cum_returns(bmark_interesting[name]).plot(ax=ax, color='gray', label='SPY', alpha=0.6)
plt.legend(['algo', 'SPY'], loc='lower left')
ax.set_title(name, size=14)
ax.set_ylabel('', size=12)
ax.legend()
algo_bt = get_backtest('557f7e7ed2c0610ddf158ca9')
algo_rets = algo_bt.daily_performance.returns
analyze_single_algo( df_rets=algo_rets, algo_live_date='2014-1-1', cone_std=1.0 )
stock = securities_panel.loc['price']['EFA'].dropna()
stock_rets = stock.pct_change().dropna()
analyze_single_algo( df_rets=stock_rets, algo_live_date='2014-1-1', cone_std=1.0 )
def get_top_draw_downs(df_rets, top=10):
df_rets = df_rets.copy()
df_cum = cum_returns(df_rets)
running_max = np.maximum.accumulate(df_cum)
underwater = running_max - df_cum
drawdowns = []
for t in range(top):
peak, valley, recovery = get_max_draw_down_underwater(underwater)
# Slice out draw-down period
#if not np.isnan(recovery):
underwater = pd.concat(
[underwater.loc[:peak].iloc[:-1], underwater.loc[recovery:].iloc[1:]])
#else:
# drawdown has not ended yet
underwater = underwater.loc[:peak]
drawdowns.append((peak, valley, recovery))
if len(df_rets) == 0:
break
return drawdowns
Similar to what is reported in the Quantopian Backtester but computes a few others.
Your algo
A couple of broad indexes for comparison purposes.
“Cone Charts”
Math Behind the Cone
1) Compute Algo Volatility Expectation from the Backtest.
a) Uses *only* the *backtest* data to compute the daily volatility of the algo.
2) Compute Algo Profit Expectation from the Backtest
a) From the backtest data only, calculate the linear trend of the algo's profit (e.g. the slope of how it goes up-and-to-the-right as it generates profits)
b) Use this linear trend established from the backtest daily returns and if the algo performs similarly in live trading, we should expect the live performance to fall within the cone drawn around this linear trend +/- the volatility computed in #1. (The linear trends are shown as *dashed* lines in the above plot)
3) Compute the Cone based on Backtest Returns & Volatility
a) The midpoint line of the cone (e.g. average profit expectation) is the linear trend established in #2 above.
b) Then draw the cone using the daily volatility computed in #1 above by scaling the daily value based on the number of days that the algo has been live, using the conventional standard rule of volatility scaling. E.g. If I was to scale a daily volatility to an annual volatility I take the daily value and multiply it by the sqrt(252) to annualize it.
c) So, for example, an algo with a 1% daily volatility, means it has a ~16% annual volatility: 1% * sqrt(252). This will result in a cone that has a width of +/-16% at a point in time 1-year from the start of the cone. Similarly, the cone width at month 6 would be +/-11 %, since 1% * sqrt(126)=11%. This calculation is simply done every N days from the start of the cone to create the width of the cone at each N-day period from the start of the cone and shapes the cone as time progresses forward.