bt = get_backtest('586600541456ae6293a7dfd7') #
import matplotlib.pyplot as plt
import matplotlib
import datetime
from datetime import datetime
import pytz
from pytz import timezone
import pyfolio as pf
from __future__ import division
import pandas as pd
import seaborn as sns
import scipy as sp
import numpy as np
from math import copysign
from collections import OrderedDict, defaultdict, deque
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.research import run_pipeline
from quantopian.pipeline.filters.morningstar import Q500US, Q1500US
from quantopian.pipeline.data import morningstar as mstar
from quantopian.pipeline.classifiers.morningstar import Sector
import scipy.stats as stats
returns = bt.daily_performance.returns
def cum_returns(df, withStartingValue=None):
if withStartingValue is None:
return np.exp(np.log(1 + df).cumsum()) - 1
else:
return np.exp(np.log(1 + df).cumsum()) * withStartingValue
def aggregate_returns(df_daily_rets, convert_to):
cumulate_returns = lambda x: cum_returns(x)[-1]
if convert_to == 'daily':
return df_daily_rets
elif convert_to == 'weekly':
return df_daily_rets.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.isocalendar()[1]]).apply(cumulate_returns)
elif convert_to == 'monthly':
return df_daily_rets.groupby([lambda x: x.year, lambda x: x.month]).apply(cumulate_returns)
elif convert_to == 'yearly':
return df_daily_rets.groupby([lambda x: x.year]).apply(cumulate_returns)
else:
ValueError('convert_to must be daily, weekly, monthly or yearly')
def plot_calendar_returns_info_graphic(daily_rets_ts, x_dim=15, y_dim=6):
month_names = {
1: 'Jan',
2: 'Feb',
3: 'Mar',
4: 'Apr',
5: 'May',
6: 'Jun',
7: 'Jul',
8: 'Aug',
9: 'Sept',
10: 'Oct',
11: 'Nov',
12: 'Dec'
}
ann_ret_df = pd.DataFrame( aggregate_returns(daily_rets_ts, 'yearly') )
monthly_ret_table = aggregate_returns(daily_rets_ts, 'monthly')
monthly_ret_table = monthly_ret_table.unstack()
monthly_ret_table = np.round(monthly_ret_table, 3)
monthly_ret_table.columns = monthly_ret_table.columns.map(lambda col: month_names[col])
annual_ret_table = aggregate_returns(daily_rets_ts, 'yearly')
annual_ret_table = np.round(annual_ret_table, 4)
vmin = np.round(min(monthly_ret_table.min(axis = 1)) - 0.02, 2)
vmax = np.round(max(monthly_ret_table.max(axis = 1)) + 0.02, 2)
monthly_ret_table = monthly_ret_table.assign(Annual=annual_ret_table)
fig,ax1 = plt.subplots(figsize=(15,15))
sns.heatmap(monthly_ret_table.fillna(0)*100.0, annot=True, annot_kws={"size": 13}, alpha=1.0, center=3.0, cbar=True, cmap=matplotlib.cm.PiYG,
linewidths=2.75, xticklabels =True, cbar_kws={"orientation": "horizontal"}, robust=True, vmin=vmin*100,
vmax = vmax*100, fmt='g' )
for text in ax1.get_xticklabels():
text.set_size(14)
text.set_weight('bold')
if str(text.get_unitless_position()[:-1]) == '(12.5,)':
text.set_size(22)
text.set_weight('bold')
text.set_style('italic')
for text in ax1.get_yticklabels():
text.set_size(14)
text.set_weight('bold')
for text in ax1.texts:
text.set_size(14)
if str(text.get_unitless_position()[:-1]) == '(12.5,)':
text.set_size(22)
text.set_weight('bold')
ax1.axvline(x=12, color='#4D032D',linewidth=5)
ax1.axvspan(12, 13, facecolor='0.05', alpha=0.4)
ax1.set_ylabel("Year")
ax1.set_xlabel("Monthly Returns (%)")
ax1.set_title("Monthly Returns (%), " + \
"\nFrom: " + str(bt.start_date.date()) + " to " + str(bt.end_date.date()));
plt.show()
pd.options.display.float_format = '{0:.3f}%'.format
ytu = bt.cumulative_performance.ending_portfolio_value
ytu = pd.DataFrame(ytu)
ytu = ytu.groupby([ytu.index.year]).last()
ytu["opening"] = ytu.ending_portfolio_value.shift(1)
ytu = ytu.fillna(bt.capital_base)
ytu["annual_return"] = (ytu.ending_portfolio_value / ytu.opening)-1
ytu["total_return"] = 0
for i in range(len(ytu.index)):
ytu.total_return[0:1] = ytu.annual_return[0:1]
ytu["total_return"] = ((1 + ytu.total_return.shift(1)) * (1 + ytu.annual_return)) - 1
ytu.total_return = ytu.total_return.fillna(ytu.annual_return)
ytu["CasdAGR"]=range(len(ytu.index))
ytu["CAGR"] = ((1 + ytu.total_return) ** (1 / (ytu.CasdAGR+1))) - 1
ytu.pop('CasdAGR')
ytu.pop('ending_portfolio_value')
ytu.pop('opening')
ytu.pop('total_return')
ytu.annual_return = ytu.annual_return * 100
ytu.CAGR = ytu.CAGR * 100
ytu = ytu.sort_index(ascending=False)
print ytu
print "note: CAGR assumes full year trading"
ytu.plot(x=None, y=None, kind='barh', ax=None, subplots=False, sharex=False, sharey=False, layout=None, figsize=(15,10),
use_index=True, title='Annual Return % & CAGR % at the end of each of Year', grid=None, legend=True, style=None, logx=False, logy=False, loglog=False,
xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=12, colormap=None, table=False,
yerr=None, xerr=None, secondary_y=False, sort_columns=False)
plt.axvline(float(ytu.CAGR.mean()), color='g', linestyle='dashed', linewidth=2, label='CAGR Mean')
plt.axvline(float(ytu.annual_return.mean()), color='b', linestyle='dashed', linewidth=2, label='Annual Return Mean')
plt.legend()
matplotlib.pyplot.figure()
plot_calendar_returns_info_graphic(returns)
matplotlib.pyplot.figure()
pf.plot_monthly_returns_dist(returns)
matplotlib.pyplot.figure()
pf.timeseries.cum_returns(returns).plot()
plt.ylabel('Cumulative Returns')
matplotlib.pyplot.figure()
ytu = pf.timeseries.rolling_sharpe(returns, rolling_sharpe_window=252)
ytu = pd.DataFrame(ytu)
ytu = ytu.groupby([ytu.index.year]).last()
ytu.plot(x=None, y=None, kind='barh', ax=None, subplots=False, sharex=False, sharey=False, layout=None, figsize=(15,10),
use_index=True, title='ROLLING 12 MONTH SHARPE AT YEAR END', grid=None, legend=False, style=None, logx=False, logy=False, loglog=False,
xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=12, colormap=None, table=False,
yerr=None, xerr=None, secondary_y=False, sort_columns=False)
plt.axvline(float(ytu.mean()), color='m', linestyle='dashed', linewidth=2, label='Mean')
Source: Wiecki, Thomas and Campbell, Andrew and Lent, Justin and Stauth, Jessica, All that Glitters Is Not Gold: Comparing Backtest and Out-of-Sample Performance on a Large Cohort of Trading Algorithms (March 9, 2016). Available at SSRN: https://ssrn.com/abstract=2745220 or http://dx.doi.org/10.2139/ssrn.2745220
print "------------------------------------------------------------------"
print("\n"'ALL DATA BELOW IS OVER FULL BACKTEST OF {:2,.0f} days'.format(((bt.end_date.date() - bt.start_date.date()).days)))
print "------------------------------------------------------------------"
print("\n"'\x1b[1;31m'+'TAIL RATIO'+'\x1b[0m'"\n")
print "Determines the ratio between the right (95%) and left tail (5%). For example, a ratio of 0.25 means that losses are four times as bad as profits."
print("\n"'Backtest tail ratio = {:4,.3f}'.format((pf.capacity.empyrical.tail_ratio(returns))))
print "------------------------------------------------------------------"
print("\n"'\x1b[1;31m'+'KURTOSIS'+'\x1b[0m'"\n")
print('Characterizes the relative ‘peakedness’ or flatness of an investment’s return distribution compared with the normal distribution. The higher the kurtosis, the more peaked the return distribution is; the lower the kurtosis, the more rounded the return distribution is. A normal distribution has a kurtosis of 3. Higher kurtosis indicates a return distribution with a more acute peak around the mean (higher probability than a normal distribution of more returns clustered around the mean) and a greater chance of extremely large deviations from the expected return (fatter tails, more big surprises). Investors view a greater percentage of extremely large deviations from the expected return as an increase in risk. Lower kurtosis has a smaller peak (lower probability of returns around the mean) and a lower probability than a normal distribution of extreme returns.')
print('source: Greenwich Alternative Investments: http://www.greenwichai.com/index.php/hf-essentials/measure-of-risk - retrieved Dec 27, 2016')
print("\n"'Kurtosis over entire test period = {:4,.3f}'.format((sp.stats.stats.kurtosis(returns))))
xyz = sp.stats.stats.kurtosis(returns)-3
print("\n"'Excess kurtosis over entire test period = {:4,.3f}'.format(xyz))
print "------------------------------------------------------------------"
print("\n"'\x1b[1;31m'+'SKEWNESS'+'\x1b[0m'"\n")
print("\n"'A distribution with no tail to the right or to the left is one that is not skewed in any direction. This is the same as a normal distribution i.e. a distribution which has zero skewness. If there is a large frequency of occurrence of negative returns compared to positive returns then the distribution displays a fat left tail or negative skewness. In case the frequency of positive returns exceeds that of negative returns then the distribution displays a fat right tail or positive skewness.')
print("\n"'Skewness over entire test period = {:4,.3f}'.format((sp.stats.stats.skew(returns))))
print "------------------------------------------------------------------"
print("\n"'\x1b[1;31m'+'SHARPE RATIO'+'\x1b[0m'"\n")
print("\n"'Sharpe ratio last year = {:4,.3f}'"\n".format((float(ytu.returns[-1:]))))
print "------------------------------------------------------------------"
print("\n"'\x1b[1;31m'+'ROLLING SHARPE RATIO'+'\x1b[0m'"\n")
print("\n"'Rolling Monthly Sharpe Ratio over entire test period has a mean of {:4,.4f} and standard deviation of {:4,.4f}'
.format((pf.timeseries.rolling_sharpe(returns, rolling_sharpe_window=(pf.capacity.empyrical.stats.APPROX_BDAYS_PER_MONTH)).mean()),
pf.timeseries.rolling_sharpe(returns, rolling_sharpe_window=(pf.capacity.empyrical.stats.APPROX_BDAYS_PER_MONTH)).std()))
print("\n"'Rolling 6 Month Sharpe Ratio over entire test period has a mean of {:4,.4f} and standard deviation of {:4,.4f}'
.format((pf.timeseries.rolling_sharpe(returns, rolling_sharpe_window=(pf.capacity.empyrical.stats.APPROX_BDAYS_PER_MONTH * 6)).mean()),
pf.timeseries.rolling_sharpe(returns, rolling_sharpe_window=(pf.capacity.empyrical.stats.APPROX_BDAYS_PER_MONTH * 6)).std()))
print("\n"'Rolling 12 Month Sharpe Ratio over entire test period has a mean of {:4,.4f} and standard deviation of {:4,.4f}'
.format((pf.timeseries.rolling_sharpe(returns, rolling_sharpe_window=(pf.capacity.empyrical.stats.APPROX_BDAYS_PER_MONTH * 12)).mean()),
pf.timeseries.rolling_sharpe(returns, rolling_sharpe_window=(pf.capacity.empyrical.stats.APPROX_BDAYS_PER_MONTH * 12)).std()))
print "------------------------------------------------------------------"
spy = get_pricing(symbols = bt.benchmark_security, start_date=bt.start_date.date(), end_date=bt.end_date.date(), symbol_reference_date=None,
frequency='daily', fields={"price"}, handle_missing='ignore')
spy = pd.Series(spy.price.pct_change())
alpla_beta= pf.capacity.empyrical.alpha_beta(returns, spy, risk_free=0.0, period='daily', annualization=None)
print("\n"'\x1b[1;31m'+'ALPHA + BETA'+'\x1b[0m'"\n")
print("\n""\n"'Annualized Alpha is annualized returns in excess of returns resulting from correlations with the benchmark. Beta is the correlation between daily returns of algo and daily returns of the benchmark')
print("\n"'Annualized Alpha over entire test period = {:10.5}'.format((alpla_beta[0])))
print('Beta over entire test period = {:10.5}'.format((alpla_beta[1])))
print "------------------------------------------------------------------"
print("\n"'\x1b[1;31m'+'COMMON SENSE RATIO'+'\x1b[0m'"\n")
print"\n""Above 1: make money, below 1: lose money"
print"\n""Common Sense Ratio = Tail ratio * Gain to Pain Ratio"
print"Common Sense Ratio = [percentile(returns, 95%) * Sum(profits)] / [percentile(returns, 5%) * Sum(losses)]"
print"Trend following strategies have low win rate. Aggregate losses kill profitability. Therefore risk is in the aggregate which is measured by the profit ratio or Gain to Pain Ratio (Jack Schwager favourite measure btw). No surprise since GPR is the ratio version of gain expectancy or trading edge. Mean reverting strategies fail on knockouts. LTCM best case. A few blow-ups and game over. So the risk is in the tail. Therefore, use tail ratio. Now, a bit of arithmetical magic clled transitivity, or multiply GPR by tail and voila CSR. It recaptures risk on both sides."
print"source:http://alphasecurecapital.com/regardless-of-the-asset-class-there-are-only-two-types-of-strategies/"
print("\n"'Common Sense ratio over entire test period = {:4,.3f}'.format((pf.capacity.empyrical.tail_ratio(returns) * (1 + pf.capacity.empyrical.annual_return(returns)))))
print "------------------------------------------------------------------"
print("\n"'\x1b[1;31m'+'VALUE AT RISK(VaR)'+'\x1b[0m'"\n")
print "\n""\n""a statistical technique used to measure and quantify the possibility of losing money within an investment portfolio over a specific time frame to determine the extent and occurrence ratio of potential losses in their institutional portfolios. VaR quantifies market risk while it is being taken. It measures the odds of losing money but does not indicate certainty. VAR summarizes the predicted maximum loss (or worst loss) over a target horizon within a given confidence interval."
print("\n"'\x1b[1;34m'+'HISTORICAL METHOD VAR'+'\x1b[0m')
print("\n"'\x1b[1;31m'+'DAILY VAR - HISTORICAL'+'\x1b[0m'"\n")
yui = pd.DataFrame((returns*100).quantile([.01, .05, 0.1, 0.5]))
yui["percent"] = yui.index
yui.index = range(len(yui))
yui2 = pd.DataFrame({'percent':[stats.percentileofscore(returns*100,0)/100],
'returns':['0.0']})
yui = yui.append(yui2, ignore_index=True)
print('With 99% confidence, we expect that our worst daily loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 1% the daily loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[0],yui.returns[0]))
print("\n"'With 95% confidence, we expect that our worst daily loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 5% the daily loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[1],yui.returns[1]))
print("\n"'With 90% confidence, we expect that our worst daily loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 10% the daily loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[2],yui.returns[2]))
print("\n"'With 50% confidence, we expect that our worst daily loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 50% the daily loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[3],yui.returns[3]))
print("\n"'With {:4,.1f}% confidence, we expect that our worst daily loss will not exceed 0%, or in other words we can expect'
' that with a probability of {:4,.1f}% the daily loss of the portfolio will decrease by more than 0%.'
.format((1-yui.percent[4])*100,yui.percent[4]*100))
print("\n"'\x1b[1;31m'+'MONTHLY VAR - HISTORICAL'+'\x1b[0m'"\n")
yui = pd.DataFrame((pf.capacity.empyrical.aggregate_returns(returns, convert_to='monthly')*100).quantile([.01, .05, 0.1, 0.5]))
yui["percent"] = yui.index
yui.index = range(len(yui))
yui2 = pd.DataFrame({'percent':[stats.percentileofscore(pf.capacity.empyrical.aggregate_returns(returns, convert_to='monthly')*100,0)/100],
'returns':['0.0']})
yui = yui.append(yui2, ignore_index=True)
print('With 99% confidence, we expect that our worst monthly loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 1% the monthly loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[0],yui.returns[0]))
print("\n"'With 95% confidence, we expect that our worst monthly loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 5% the monthly loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[1],yui.returns[1]))
print("\n"'With 90% confidence, we expect that our worst monthly loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 10% the monthly loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[2],yui.returns[2]))
print("\n"'With 50% confidence, we expect that our worst monthly loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 50% the monthly loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[3],yui.returns[3]))
print("\n"'With {:4,.1f}% confidence, we expect that our worst monthly loss will not exceed 0%, or in other words we can expect'
' that with a probability of {:4,.1f}% the monthly loss of the portfolio will decrease by more than 0%.'
.format((1-yui.percent[4])*100,yui.percent[4]*100))
print("\n"'\x1b[1;31m'+'ANNUAL VAR - HISTORICAL'+'\x1b[0m'"\n")
yui = pd.DataFrame((pf.capacity.empyrical.aggregate_returns(returns, convert_to='yearly')*100).quantile([.01, .05, 0.1, 0.5]))
yui["percent"] = yui.index
yui.index = range(len(yui))
yui2 = pd.DataFrame({'percent':[stats.percentileofscore(pf.capacity.empyrical.aggregate_returns(returns, convert_to='yearly')*100,0)/100],
'returns':['0.0']})
yui = yui.append(yui2, ignore_index=True)
print('With 99% confidence, we expect that our worst annual loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 1% the annual loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[0],yui.returns[0]))
print("\n"'With 95% confidence, we expect that our worst annual loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 5% the annual loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[1],yui.returns[1]))
print("\n"'With 90% confidence, we expect that our worst annual loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 10% the annual loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[2],yui.returns[2]))
print("\n"'With 50% confidence, we expect that our worst annual loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 50% the annual loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[3],yui.returns[3]))
print("\n"'With {:4,.1f}% confidence, we expect that our worst annual loss will not exceed 0%, or in other words we can expect'
' that with a probability of {:4,.1f}% the annual loss of the portfolio will decrease by more than 0%.'
.format((1-yui.percent[4])*100,yui.percent[4]*100))
print("\n"'With 99% confidence, we expect that our worst daily loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 1% the daily loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[0],yui.returns[0]))
print("\n"'With 95% confidence, we expect that our worst daily loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 5% the daily loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[1],yui.returns[1]))
print("\n"'With 90% confidence, we expect that our worst daily loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 10% the daily loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[2],yui.returns[2]))
print("\n"'With 50% confidence, we expect that our worst daily loss will not exceed {:4,.3f}%, or in other words we can expect'
' that with a probability of 50% the daily loss of the portfolio will decrease by more than {:4,.3f}%.'
.format(yui.returns[3],yui.returns[3]))
print("\n"'With {:4,.1f}% confidence, we expect that our worst daily loss will not exceed 0%, or in other words we can expect'
' that with a probability of {:4,.1f}% the daily loss of the portfolio will decrease by more than 0%.'
.format((1-yui.percent[4])*100,yui.percent[4]*100))
def vwap(transaction):
return (transaction.amount * transaction.price).sum() / transaction.amount.sum()
def _groupby_consecutive(txn, max_delta=pd.Timedelta('8h')):
def vwap(transaction):
if transaction.amount.sum() == 0:
#warnings.warn('Zero transacted shares, setting vwap to nan.')
print "Zero transacted shares, setting vwap to nan."
return np.nan
return (transaction.amount * transaction.price).sum() / \
transaction.amount.sum()
out = []
for sym, t in txn.groupby('symbol'):
t = t.sort_index()
t.index.name = 'dt'
t = t.reset_index()
t['order_sign'] = t.amount > 0
t['block_dir'] = (t.order_sign.shift(
1) != t.order_sign).astype(int).cumsum()
t['block_time'] = ((t.dt - t.dt.shift(1)) >
max_delta).astype(int).cumsum()
grouped_price = (t.groupby(('block_dir',
'block_time'))
.apply(vwap))
grouped_price.name = 'price'
grouped_rest = t.groupby(('block_dir', 'block_time')).agg({
'amount': 'sum',
'symbol': 'first',
'dt': 'first'})
grouped = grouped_rest.join(grouped_price)
out.append(grouped)
out = pd.concat(out)
out = out.set_index('dt')
return out
def extract_round_trips_stack(transactions,
starting_capital=None):
transactions = _groupby_consecutive(transactions)
roundtrips = []
for sym, trans_sym in transactions.groupby('symbol'):
trans_sym = trans_sym.sort_index()
price_stack = deque()
dt_stack = deque()
trans_sym['signed_price'] = trans_sym.price * \
np.sign(trans_sym.amount)
trans_sym['abs_amount'] = trans_sym.amount.abs().astype(int)
for dt, t in trans_sym.iterrows():
if t.price < 0:
#warnings.warn('Negative price detected, ignoring for'
#'round-trip.')
print "Negative price detected, ignoring for round-trip."
continue
indiv_prices = [t.signed_price] * t.abs_amount
if (len(price_stack) == 0) or \
(copysign(1, price_stack[-1]) == copysign(1, t.amount)):
price_stack.extend(indiv_prices)
dt_stack.extend([dt] * len(indiv_prices))
else:
# Close round-trip
pnl = 0
invested = 0
cur_open_dts = []
for price in indiv_prices:
if len(price_stack) != 0 and \
(copysign(1, price_stack[-1]) != copysign(1, price)):
# Retrieve first dt, stock-price pair from
# stack
prev_price = price_stack.popleft()
prev_dt = dt_stack.popleft()
pnl += -(price + prev_price)
cur_open_dts.append(prev_dt)
invested += abs(prev_price)
else:
# Push additional stock-prices onto stack
price_stack.append(price)
dt_stack.append(dt)
roundtrips.append({'pnl': pnl,
'open_dt': cur_open_dts[0],
'close_dt': dt,
'long': price < 0,
'rt_returns': pnl / invested,
'symbol': sym,
'invested': invested,
})
roundtrips = pd.DataFrame(roundtrips)
roundtrips['duration'] = roundtrips['close_dt'] - roundtrips['open_dt']
if starting_capital is not None:
roundtrips['returns'] = roundtrips['pnl'] / starting_capital
'''
if starting_capital is not None:
# Need to normalize so that we can join
pv = pd.DataFrame(starting_capital,
columns=['starting_capital'])\
.assign(date=starting_capital.index)
roundtrips['date'] = roundtrips.close_dt.apply(lambda x:
x.replace(hour=0,
minute=0,
second=0))
tmp = roundtrips.join(pv, on='date', lsuffix='_')
roundtrips['returns'] = tmp.pnl / tmp.starting_capital
roundtrips = roundtrips.drop('date', axis='columns')
'''
return roundtrips
pnl_stats = OrderedDict([('Total profit', lambda x: np.round(x.sum(),0)),
('Gross profit', lambda x: x[x>0].sum()),
('Gross loss', lambda x: x[x<0].sum()),
('Profit factor', lambda x: x[x>0].sum() / x[x<0].abs().sum() if x[x<0].abs().sum() != 0 else np.nan),
('Avg. trade net profit', 'mean'),
('Avg. winning trade', lambda x: x[x>0].mean()),
('Avg. losing trade', lambda x: x[x<0].mean()),
('Ratio Avg. Win:Avg. Loss', lambda x: x[x>0].mean() / x[x<0].abs().mean() if x[x<0].abs().mean() != 0 else np.nan),
('Largest winning trade', 'max'),
('Largest losing trade', 'min')])
summary_stats = OrderedDict([('Total number of trades', 'count'),
('Percent profitable', lambda x: len(x[x>0]) / float(len(x))),
('Winning trades', lambda x: len(x[x>0])),
('Losing trades', lambda x: len(x[x<0])),
('Even trades', lambda x: len(x[x==0])),])
return_stats = OrderedDict([('Avg returns all trades', lambda x: x.mean()),
('Avg returns winning', lambda x: x[x>0].mean()),
('Avg returns losing', lambda x: x[x<0].mean()),
('Median returns all trades', lambda x: x.median()),
('Median returns winning', lambda x: x[x>0].median()),
('Median returns losing', lambda x: x[x<0].median()),
('Profit factor', lambda x: x[x>0].mean() / x[x<0].abs().mean() if x[x<0].abs().mean() != 0 else np.nan),
('Percent profitable', lambda x: len(x[x>0]) / float(len(x))),
('Ratio Avg. Win:Avg. Loss', lambda x: x[x>0].mean() / x[x<0].abs().mean() if x[x<0].abs().mean() != 0 else np.nan),
('Largest winning trade', 'max'),
('Largest losing trade', 'min')])
duration_stats = OrderedDict([('Avg duration', lambda x: x.mean()),
('Median duration', lambda x: x.median()),
('Avg # trades per day', lambda x: (float(len(x)) / (x.max() - x.min()).days) if (x.max() - x.min()).days != 0 else np.nan),
('Avg # trades per month', lambda x: (float(len(x)) / (((x.max() - x.min()).days) / pf.APPROX_BDAYS_PER_MONTH)) if (x.max() - x.min()).days != 0 else np.nan)
])
# Add symbols
transactions = bt.transactions
transactions['symbol'] = map(lambda x: x.symbol, symbols(transactions.sid))
trades = extract_round_trips_stack(transactions, starting_capital=bt.daily_performance.starting_cash.iloc[0])
def get_SID(row):
temp_ticker = row['symbol']
start_date = row['open_dt']
row['SID'] = symbols(temp_ticker)
return row
trades = trades.apply(get_SID, axis=1)
sector = mstar.asset_classification.morningstar_sector_code.latest
pipe = Pipeline(
columns={'Close': USEquityPricing.close.latest,
'Sector Code': sector},
screen=Q1500US()
)
#run_pipeline(pipe, bt.end_date.date(), bt.end_date.date()).head(10) # head(10) shows just the first 10 rows
SECTOR_CODE_NAMES = {
Sector.BASIC_MATERIALS: 'Basic Materials',
Sector.CONSUMER_CYCLICAL: 'Consumer Cyclical',
Sector.FINANCIAL_SERVICES: 'Financial Services',
Sector.REAL_ESTATE: 'Real Estate',
Sector.CONSUMER_DEFENSIVE: 'Consumer Defensive',
Sector.HEALTHCARE: 'Healthcare',
Sector.UTILITIES: 'Utilities',
Sector.COMMUNICATION_SERVICES: 'Communication Services',
Sector.ENERGY: 'Energy',
Sector.INDUSTRIALS: 'Industrials',
Sector.TECHNOLOGY: 'Technology',
-1: 'Unknown'
}
def get_q1500_sector_codes(day1, day2):
pipe = Pipeline(columns={'Sector': Sector()})#, screen=Q1500US())
# Drop the datetime level of the index, since we only have one day of data.
return run_pipeline(pipe, day1, day2).reset_index(level=0, drop=True)
def calculate_sector_counts(sectors):
counts = (sectors.groupby('Sector').size())
# Replace numeric sector codes with human-friendly names.
counts.index = counts.index.map(lambda code: SECTOR_CODE_NAMES[code])
return counts
#INSTEAD OF RUNNING FULL PIPELINE, MAYBE CUSTOM FACTOR ONLY ON SID IN TRADES??
run_pipeline(pipe, bt.end_date.date(), bt.end_date.date()).head(10)
zz = get_q1500_sector_codes(bt.end_date.date(),bt.end_date.date())
zz["sector_name"] = zz.Sector.map(lambda code: SECTOR_CODE_NAMES[code])
trades['sector']=trades.SID.map(zz.sector_name)
trades['sector']=trades.sector.fillna('Unknown')
pd.options.display.float_format = '{:20,.2f}'.format
stats = trades.assign(ones=1).groupby('ones')['pnl'].agg(summary_stats).T.rename_axis({1.0: 'All trades'}, axis='columns')
stats2 = trades.groupby('long')['pnl'].agg(summary_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
stats = stats.join(stats2)
print "+++SHOULD SHOW UNIQUE TRADES ALSO!!+++"
stats[['All trades', 'Long trades', 'Short trades']]
ts = trades.groupby('sector')['pnl'].agg(summary_stats).T.rename_axis({1.0: 'All trades'}, axis='columns')
ts
ts2 = trades.groupby(['sector','long'])['pnl'].agg(summary_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts2
tsx = trades[trades.sector == 'Financial Services']
tsxa = trades[trades.sector == 'Healthcare']
ts3a = tsxa.groupby(['sector','long'])['pnl'].agg(summary_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts3b = tsx.groupby(['sector','long'])['pnl'].agg(summary_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts3 = ts3a.join(ts3b)
ts3
pd.options.display.float_format = '{:20,.2f}'.format
stats = trades.assign(ones=1).groupby('ones')['pnl'].agg(pnl_stats).T.rename_axis({1.0: 'All trades'}, axis='columns')
stats2 = trades.groupby('long')['pnl'].agg(pnl_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
stats = stats.join(stats2)
stats[['All trades', 'Long trades', 'Short trades']]
ts = trades.groupby('sector')['pnl'].agg(pnl_stats).T.rename_axis({1.0: 'All trades'}, axis='columns')
ts
ts2 = trades.groupby(['sector','long'])['pnl'].agg(pnl_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts2
tsx = trades[trades.sector == 'Financial Services']
tsxa = trades[trades.sector == 'Healthcare']
ts3a = tsxa.groupby(['sector','long'])['pnl'].agg(pnl_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts3b = tsx.groupby(['sector','long'])['pnl'].agg(pnl_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts3 = ts3a.join(ts3b)
ts3
d_stats = trades.assign(ones=1).groupby('ones')['duration'].agg(duration_stats).T.rename_axis({1.0: 'All trades'}, axis='columns')
d_stats_wl = trades.groupby('long')['duration'].agg(duration_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
d_stats = d_stats.join(d_stats_wl)
d_stats[['All trades', 'Long trades', 'Short trades']]
ts = trades.groupby('sector')['duration'].agg(duration_stats).T.rename_axis({1.0: 'All trades'}, axis='columns')
ts
ts2 = trades.groupby(['sector','duration'])['duration'].agg(duration_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts2
tsx = trades[trades.sector == 'Financial Services']
tsxa = trades[trades.sector == 'Healthcare']
ts3a = tsxa.groupby(['sector','duration'])['duration'].agg(duration_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts3b = tsx.groupby(['sector','duration'])['duration'].agg(duration_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts3 = ts3a.join(ts3b)
ts3
pd.options.display.float_format = '{0:.5f}%'.format
stats = trades.assign(ones=1).groupby('ones')['returns'].agg(return_stats).T.rename_axis({1.0: 'All trades'}, axis='columns')
stats2 = trades.groupby('long')['returns'].agg(return_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
stats = stats.join(stats2)
print "CAUTION-MAX WIN/LOSING TRADE AT 100X"
(stats[['All trades', 'Long trades', 'Short trades']] * 100)
ts = trades.groupby('sector')['returns'].agg(return_stats).T.rename_axis({1.0: 'All trades'}, axis='columns')
ts
ts2 = trades.groupby(['sector','long'])['returns'].agg(return_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts2
tsx = trades[trades.sector == 'Financial Services']
tsxa = trades[trades.sector == 'Healthcare']
ts3a = tsxa.groupby(['sector','long'])['returns'].agg(return_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts3b = tsx.groupby(['sector','long'])['returns'].agg(return_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
ts3 = ts3a.join(ts3b)
ts3
pd.options.display.float_format = '{0:.3f}%'.format
lp = trades.groupby('symbol')['returns'].agg(return_stats) * 100
lp = lp.sort_index()
qwe = pd.DataFrame(data = {'symbol':[], 'number of trades':[]})
for i in lp.index.values:
yu = len(trades[trades.symbol == i])
qwe = qwe.append({'symbol':i, 'number of trades':yu}, ignore_index=True)
qwe = qwe.set_index('symbol')
lp = lp.merge(qwe, left_index=True, right_index=True, how='left')
cols = lp.columns.tolist()
cols = cols[-1:] + cols[:-1]
lp = lp[cols]
lp = lp.sort('Avg returns all trades',ascending=False)
lp['number of trades'] = lp['number of trades'].map('{:,.0f}'.format)
lp['Profit factor'] = (lp['Profit factor'] / 100 ).map('{:,.3f}'.format)
lp
med_hold = trades.groupby('symbol')['duration'].agg(lambda x: x.median())
#not super helpful ... for me atleast when using larger universe size
med_hold.sort(axis=0, ascending=True, kind='quicksort', na_position='last', inplace=True)
med_hold
returns = bt.daily_performance.returns
cash = bt.daily_performance['ending_cash']
positions = pf.capacity.pos.extract_pos(bt.positions, cash)
#transactions = _groupby_consecutive(bt.transactions)
gross_lev=bt.recorded_vars.leverage
print "Downside deviation is a measure of downside risk that focuses on returns that fall below a minimum threshold or minimum acceptable return (MAR)."
print("\n"'Annualized downside deviation below 0% per month = {:3}%'.format((pf.capacity.empyrical.stats.downside_risk(returns, required_return=0.0, period='monthly'))* 100))
print("\n"'Annualized downside deviation below 1% per month = {:3}%'.format((pf.capacity.empyrical.stats.downside_risk(returns, required_return=0.01, period='monthly'))* 100))
print("\n"'Annualized downside deviation below 2% per month = {:3}%'.format((pf.capacity.empyrical.stats.downside_risk(returns, required_return=0.02, period='monthly'))* 100))
print("\n"'Annualized downside deviation below 3% per month = {:3}%'.format((pf.capacity.empyrical.stats.downside_risk(returns, required_return=0.03, period='monthly'))* 100))
print "\n"
print("\n"'Annualized downside deviation below 0% per year = {:3}%'.format((pf.capacity.empyrical.stats.downside_risk(returns, required_return=0.0, period='yearly'))* 100))
print("\n"'Annualized downside deviation below 1% per year = {:3}%'.format((pf.capacity.empyrical.stats.downside_risk(returns, required_return=0.01, period='yearly'))* 100))
print("\n"'Annualized downside deviation below 2% per year = {:3}%'.format((pf.capacity.empyrical.stats.downside_risk(returns, required_return=0.02, period='yearly'))* 100))
print("\n"'Annualized downside deviation below 3% per year = {:3}%'.format((pf.capacity.empyrical.stats.downside_risk(returns, required_return=0.03, period='yearly'))* 100))
print("\n"'Annualized downside deviation below 5% per year = {:3}%'.format((pf.capacity.empyrical.stats.downside_risk(returns, required_return=0.05, period='yearly'))* 100))
# Run model that assumes returns to be T-distributed
#returns = bt.daily_performance.returns
trace = pf.bayesian.run_model('t', returns)
print("\n"'Probability of Sharpe ratio > 0 = {:3}%'.format((trace['sharpe'] > 0).mean() * 100))
print("\n"'Probability of Sharpe ratio > 0.25 = {:3}%'.format((trace['sharpe'] > 0.25).mean() * 100))
print("\n"'Probability of Sharpe ratio > 0.5 = {:3}%'.format((trace['sharpe'] > 0.5).mean() * 100))
print("\n"'Probability of Sharpe ratio > 0.75 = {:3}%'.format((trace['sharpe'] > 0.75).mean() * 100))
print("\n"'Probability of Sharpe ratio > 1 = {:3}%'.format((trace['sharpe'] > 1).mean() * 100))
print("\n"'Probability of Sharpe ratio > 1.25 = {:3}%'.format((trace['sharpe'] > 1.25).mean() * 100))
print("\n"'Probability of Sharpe ratio > 1.5 = {:3}%'.format((trace['sharpe'] > 1.5).mean() * 100))
#import pymc3 as pm #IMPORT RESTRICTION!!!
#pm.traceplot(trace);
pf.plot_prob_profit_trade(trades)
matplotlib.pyplot.figure()
pf.plot_slippage_sensitivity(returns, positions = positions, transactions = bt.transactions)
matplotlib.pyplot.figure()
pf.plot_slippage_sweep(returns, positions = positions, transactions = bt.transactions, slippage_params=(3, 8, 12, 18, 30, 50))
matplotlib.pyplot.figure()
plt.plot(bt.risk.alpha.index, bt.risk.alpha.values);
plt.ylabel('Single Factor Market Alpha');
lolo = trades.sort('close_dt').copy()
gen = (x for i,x in enumerate(list((lolo.close_dt))))
ty = list(gen)
yuio=[]
for i in range(len(ty)):
year = ty[i].year
yuio.append(year)
lolo["year"] = yuio
pd.options.display.float_format = '{0:.3f}%'.format
total_pnl = lolo['pnl'].sum()
pct_profit_attribution = (lolo.groupby(['sector'])['pnl'].sum() / total_pnl)*100
pct_profit_attribution =pd.DataFrame(pct_profit_attribution.sort(inplace=False, ascending=False))
print("\n"'Profitability (PnL / PnL total) per Sector over entire backtest period of {:2,.0f} days'.format(((bt.end_date.date() - bt.start_date.date()).days)))
pct_profit_attribution
lolo1 = lolo.ix[:, [4, 10,12]]
rtyy= range(bt.start_date.year, bt.end_date.year+1)
rtyy2 = pd.DataFrame()
for i in rtyy:
rtyy1 = (lolo1[lolo1.year == i].groupby(['sector', 'year']).sum() / lolo1.groupby('year').sum())*100
print rtyy1
rtyy2 = rtyy2.append(rtyy1)
rtyy3 = rtyy2.sortlevel(level=0, axis=0, ascending=True, inplace=False, sort_remaining=True)
rtyy4 = rtyy3.unstack(level=-1).T
print "\n""\n""+++++ Profit attribution % per Sector Per Year (PnL for the Sector per yr / PnL total per yr) +++++""\n"
print "...yes I know, not exactly profit attribution, but still insightful...""\n""\n"
rtyy4.plot(x=None, y=None, kind='bar', ax=None, subplots=True, sharex=False, sharey=False, layout=None, figsize=(15,55),
use_index=True, title=None, grid=None, legend=True, style=None, logx=False, logy=False, loglog=False,
xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=12, colormap=None, table=False,
yerr=None, xerr=None, secondary_y=False, sort_columns=False)
lolo = trades.sort('close_dt').copy()
gen = (x for i,x in enumerate(list((lolo.close_dt))))
ty = list(gen)
yuio=[]
for i in range(len(ty)):
year = ty[i].year
yuio.append(year)
lolo["year"] = yuio
pd.options.display.float_format = '${:2,.0f}'.format
profit_attribution = lolo.groupby(['sector'])['pnl'].sum()
profit_attribution =pd.DataFrame(profit_attribution.sort(inplace=False, ascending=False))
print("\n"'Profit ($ PnL) per Sector over entire backtest period of {:2,.0f} days'.format(((bt.end_date.date() - bt.start_date.date()).days)))
profit_attribution
lolo1 = lolo.ix[:, [4, 10,12]]
rtyy= range(bt.start_date.year, bt.end_date.year+1)
rtyy2 = pd.DataFrame()
for i in rtyy:
rtyy1 = lolo1[lolo1.year == i].groupby(['sector', 'year']).sum()
print rtyy1
rtyy2 = rtyy2.append(rtyy1)
rtyy3 = rtyy2.sortlevel(level=0, axis=0, ascending=True, inplace=False, sort_remaining=True)
rtyy4 = rtyy3.unstack(level=-1).T
print "\n""\n""+++++ Profit $ per Sector Per Year +++++""\n""\n"
rtyy4.plot(x=None, y=None, kind='bar', ax=None, subplots=True, sharex=False, sharey=False, layout=None, figsize=(15,55),
use_index=True, title=None, grid=None, legend=True, style=None, logx=False, logy=False, loglog=False,
xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=12, colormap=None, table=False,
yerr=None, xerr=None, secondary_y=False, sort_columns=False)
rrty = trades.ix[:, [6, 10]]
rrty = rrty.set_index('symbol')
rrty = rrty.groupby(level=0).last()
rrty1 = pd.Series(rrty.ix[:,0])
pf.create_round_trip_tear_sheet(returns, positions = positions, transactions = bt.transactions, sector_mappings = rrty1)
#FOR LATEr AGGREGATION AT SOME POINT ... MAYBE AVG PER MONTH EXPOSURE ETC. ... LEAVING IN FOR NOW
rrty = trades.ix[:, [9, 10]]
rrty = rrty.set_index('SID')
rrty = rrty.groupby(level=0).last()
rrty1 = pd.Series(rrty.ix[:,0])
drt = pf.capacity.pos.get_sector_exposures(positions = positions,symbol_sector_map = dict(rrty1))
drt
drt.groupby([drt.index.year]).last()
drt.mean()
drt.groupby([drt.index.year]).mean()
drt.last('1D')
bt.positions.last('1D')
trades[trades.SID ==19922]
pd.options.display.float_format = '{:20,.2f}'.format
iuh= get_pricing(symbols = trades.symbol.unique(), start_date=bt.start_date.date(), end_date=bt.end_date.date(), symbol_reference_date=None,
frequency='daily', fields={"price", "volume"}, handle_missing='ignore')
pf.create_capacity_tear_sheet(returns, positions = positions, transactions = bt.transactions,
market_data = iuh,
liquidation_daily_vol_limit=0.2,
trade_daily_vol_limit=0.05,
last_n_days=pf.APPROX_BDAYS_PER_MONTH * 6,
days_to_liquidate_limit=1)
import datetime
bt_days = (bt.end_date.date() - bt.start_date.date()).days
if bt_days > 500:
live_start_date= bt.end_date.date() - datetime.timedelta(days=bt_days*0.25)
else:
live_start_date= bt.end_date.date() - datetime.timedelta(days=bt_days / 2)
pf.create_full_tear_sheet(returns=returns, positions = positions, transactions = bt.transactions,
market_data = iuh, round_trips=False, bayesian=True, gross_lev=gross_lev, live_start_date=live_start_date,
slippage=0, sector_mappings=rrty1)