# Import Zipline, the open source backester, and a few other libraries that we will use
import zipline
from zipline import TradingAlgorithm
from zipline.api import schedule_function, order_target, order_target_percent, order_percent, record, symbol, history, set_commission,set_slippage
from zipline.api import date_rules, time_rules, commission, slippage, order_target, get_datetime, get_open_orders, set_symbol_lookup_date
import pytz
from datetime import datetime
import matplotlib.pyplot as pyplot
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import time
import math
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
from datetime import date, timedelta
#stocks = symbols(['XLF', 'XLE', 'XLU', 'XLK', 'XLB', 'XLP', 'XLY', 'XLI', 'XLV'])
stocks = symbols(['AFL', 'AIG', 'AIV', 'AIZ',
'AJG',
'ALL',
'AMG',
'AMP',
'AMT',
'AON',
'AVB',
'AXP',
'BAC',
'BBT',
'BEN',
'BK',
'BLK',
'BXP',
'C',
'CB',
'CBG',
'CCI',
'CI',
'CINF',
'CMA',
'CME',
'COF',
'DFS',
'DLR',
'EQIX',
'EQR',
'ESS',
'ETFC',
'EXR',
'FITB',
'FRT',
'GGP',
'GS',
'HBAN',
'HCN',
'HCP',
'HIG',
'HST',
'ICE',
'IRM',
'IVZ',
'JPM',
'KEY',
'KIM',
'L',
'LM',
'LNC',
'MAC',
'MCO',
'MET',
'MMC',
'MS',
'MTB',
'NDAQ',
'NTRS',
'O',
'PBCT',
'PFG',
'PGR',
'PLD',
'PNC',
'PRU',
'PSA',
'RF',
'SCHW',
'SLG',
'SPG',
'STI',
'STT',
'TMK',
'TROW',
'TRV',
'UDR',
'UNM',
'USB',
'VNO',
'VTR',
'WFC',
'XL',
'ZION'])
data = get_pricing(
stocks,
start_date='2012-01-01',
end_date = '2016-12-23',
frequency='daily'
)
data.price.plot(use_index=True)
# Define the algorithm - this should look familiar from the Quantopian IDE
# For more information on writing algorithms for Quantopian
# and these functions, see https://www.quantopian.com/help
def initialize(context):
schedule_function(trade,
date_rule=date_rules.week_end(),
time_rule=time_rules.market_close(minutes=1),
half_days=False)
context.lookback = 252*2 # 252*n = n years
context.stocks = symbols(['XLF', 'XLE', 'XLU', 'XLK', 'XLB', 'XLP', 'XLY', 'XLI', 'XLV'])
set_commission(commission.PerShare(cost=0.00))
set_slippage(slippage.VolumeShareSlippage(volume_limit=1, price_impact=0))
context.i = 0
#add_history(context.lookback, '1d', 'price')
def handle_data(context, data):
context.i += 1
if context.i < context.lookback:
return
def trade(context,data):
if context.i < context.lookback:
return
w = estimate(context,data)
# Close all current positions
for stock in context.portfolio.positions:
if stock not in w.index:
order_target(stock, 0)
#Order
for i,n in enumerate(w):
if w.index[i] in data:
if w.index[i] in context.portfolio.positions: #If position already exists
order_target_percent(w.index[i], n)
else:
order_percent(w.index[i], n)
def estimate(context,data):
# Structure a portfolio such that it maximizes dividend yield while lowering volatility
# Covar estimation
#price_history = history(context.lookback, frequency="1d", field='price')
price_history = data.history(context.stocks, fields='price', bar_count = context.lookback, frequency="1d")
ret = (price_history/price_history.shift(5)-1).dropna(axis=0)[5::5]
ret.columns = context.stocks
n = len(ret.columns)
covar = ret.cov()
w = min_var_weights(covar, context.stocks)
# LOG EVERYTHING
#esigma = np.sqrt(np.dot(w.T,np.dot(covar,w)))*math.sqrt(52)*100
#log.info('Expected Volatility: '+str(esigma)+'%')
#w = pd.Series(np.round(1000*w)/1000)
#log.info('Number of Stocks: '+str(sum(abs(w)>0)))
#log.info('Current Leverage: '+str(context.account.leverage))
#w.index = ret.columns
#log.info(w)
#print(get_datetime())
#print(w)
return w #Cash Cushion of X%
def min_var_weights(cov, stocks):
'''
Returns a dictionary of sid:weight pairs.
'''
cov = pd.DataFrame(2*cov)
x = np.array([0.]*(len(cov)+1))
#x = np.ones(len(cov) + 1)
x[-1] = 1.0
p = lagrangize(cov)
weights = np.linalg.solve(p, x)[:-1]
weights = pd.Series(weights, index=stocks)
return weights
def lagrangize(df):
'''
Utility funcion to format a DataFrame
in order to solve a Lagrangian sysem.
'''
df = df
df['lambda'] = np.ones(len(df))
z = np.ones(len(df) + 1)
x = np.ones(len(df) + 1)
z[-1] = 0.0
x[-1] = 1.0
m = [i for i in df.as_matrix()]
m.append(z)
return pd.DataFrame(np.array(m))
def initialize(context):
set_slippage(slippage.VolumeShareSlippage(volume_limit=0.025,
price_impact=0.1))
set_commission(commission.PerShare(cost=0.0075, min_trade_cost=1.00))
set_symbol_lookup_date('2016-01-01')
context.i = 0
#context.index = symbol('SPY')
# finance sector stocks
context.symbols = [
symbol('AFL'),
symbol('AIG'),
symbol('AIV'),
symbol('AIZ'),
symbol('AJG'),
symbol('ALL'),
symbol('AMG'),
symbol('AMP'),
symbol('AMT'),
symbol('AON'),
symbol('AVB'),
symbol('AXP'),
symbol('BAC'),
symbol('BBT'),
symbol('BEN'),
symbol('BK'),
symbol('BLK'),
symbol('BXP'),
symbol('C'),
symbol('CB'),
symbol('CBG'),
symbol('CCI'),
symbol('CI'),
symbol('CINF'),
symbol('CMA'),
symbol('CME'),
symbol('COF'),
symbol('DFS'),
symbol('DLR'),
symbol('EQIX'),
symbol('EQR'),
symbol('ESS'),
symbol('ETFC'),
symbol('EXR'),
symbol('FITB'),
symbol('FRT'),
symbol('GGP'),
symbol('GS'),
symbol('HBAN'),
symbol('HCN'),
symbol('HCP'),
symbol('HIG'),
symbol('HST'),
symbol('ICE'),
symbol('IRM'),
symbol('IVZ'),
symbol('JPM'),
symbol('KEY'),
symbol('KIM'),
symbol('L'),
symbol('LM'),
symbol('LNC'),
symbol('MAC'),
symbol('MCO'),
symbol('MET'),
symbol('MMC'),
symbol('MS'),
symbol('MTB'),
symbol('NDAQ'),
symbol('NTRS'),
symbol('O'),
symbol('PBCT'),
symbol('PFG'),
symbol('PGR'),
symbol('PLD'),
symbol('PNC'),
symbol('PRU'),
symbol('PSA'),
symbol('RF'),
symbol('SCHW'),
symbol('SLG'),
symbol('SPG'),
symbol('STI'),
symbol('STT'),
symbol('TMK'),
symbol('TROW'),
symbol('TRV'),
symbol('UDR'),
symbol('UNM'),
symbol('USB'),
symbol('VNO'),
symbol('VTR'),
symbol('WFC'),
symbol('XL'),
symbol('ZION')]
# pair permutation
context.pairs = []
for i in range(len(context.symbols)-1):
s1 = context.symbols[i]
for j in range(i+1, len(context.symbols)):
s2 = context.symbols[j]
context.pairs.append((s1, s2))
# params
context.lookback = 20
context.p_value_threshold = 0.05
context.entry_threshold = 3
context.exit_threshold = 0.1
# schedule
schedule_function(check_pairs,
date_rules.every_day(),
time_rules.market_open(minutes=1))
total_minutes = 6*60 + 30
for i in range(10, total_minutes, 15):
schedule_function(check_pairs,
date_rules.every_day(),
time_rules.market_open(minutes=i))
def coint_p_value(y, x):
x = sm.add_constant(x)
t_test, p_value, _ = coint(y, x)
return p_value
def hedge_ratio(y, x):
x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
return model.params[1]
def make_z_score_func(hedge, spreads):
if spreads.std()==0:
return lambda p1, p2: 0
return lambda p1, p2: (p1-hedge*p2-spreads.mean())/spreads.std()
def before_trading_start(context, data):
context.i += 1
if context.i < context.lookback:
return
prices = data.history(context.symbols, "price", context.lookback, '1d')
context.pair_info = {}
for pair in context.pairs:
try:
s1, s2 = pair
ys = prices[s1]
xs = prices[s2]
p_value = coint_p_value(ys, xs)
hedge = hedge_ratio(ys, xs)
spreads = ys - hedge*xs
z_score_func = make_z_score_func(hedge, spreads)
y2, y1, y = ys[-3:]
x2, x1, x = xs[-3:]
z_score = z_score_func(y, x)
z_score_1 = z_score_func(y1, x1)
z_score_2 = z_score_func(y2, x2)
# daily change of spread z-score
ok_to_short = z_score < z_score_1 and z_score_1 < z_score_2
ok_to_long = z_score > z_score_1 and z_score_1 > z_score_2
context.pair_info[pair] = {
'p_value': p_value,
'hedge': hedge,
'spreads': spreads,
'z_score_func': z_score_func,
'ok_to_long': ok_to_long,
'ok_to_short': ok_to_short
}
except Exception as e:
log.warn('{} removed: {}'.format(pair, str(e)))
def handle_data(context, data):
pass
def calc_target_pct(y_shares, x_shares, y_price, x_price):
y_dollars = y_shares * y_price
x_dollars = x_shares * x_price
notional_dollars = abs(y_dollars) + abs(x_dollars)
y_target_pct = y_dollars / notional_dollars
x_target_pct = x_dollars / notional_dollars
return (y_target_pct, x_target_pct)
def check_pairs(context, data):
if context.i < context.lookback:
return
if context.portfolio.positions_value==0 and len(get_open_orders())==0:
check_pairs_for_entry(context, data)
else:
check_pairs_for_exit(context, data)
def check_pairs_for_entry(context, data):
if context.i < context.lookback:
return
pairs = []
for pair, pair_info in context.pair_info.iteritems():
if pair_info['p_value'] > context.p_value_threshold:
continue
#for pair, p_value in context.pair_info.iteritems():
#if p_value > context.p_value_threshold:
#continue
y, x = data.current(pair, 'price')
z_score = pair_info['z_score_func'](y, x)
hedge = pair_info['hedge']
target1, target2 = calc_target_pct(1, hedge, y, x)
ok_to_long = pair_info['ok_to_long']
ok_to_short = pair_info['ok_to_short']
entry_threshold = context.entry_threshold
s1, s2 = pair
if ok_to_short and z_score > entry_threshold:
# short top, long bottom
pairs.append((s1, s2, z_score, -target1, target2, y, x, hedge))
elif ok_to_long and z_score < -entry_threshold:
# long top, short bottom
pairs.append((s1, s2, z_score, target1, -target2, y, x, hedge))
if len(pairs)>0:
pairs.sort(key=lambda x:abs(x[2])) # choose the best z_score
s1, s2, z_score, target1, target2, y, x, hedge = pairs[0]
log.info('Enter: {} {} {} {} {} {} {} {}'.format(
s1, s2, z_score, target1, target2, y, x, hedge))
order_target_percent(s1, target1)
order_target_percent(s2, target2)
def check_pairs_for_exit(context, data):
for pair in context.pairs:
s1, s2 = pair
pos1 = context.portfolio.positions[s1].amount
pos2 = context.portfolio.positions[s2].amount
if pos1==0 and pos2==0:
continue
if pair not in context.pair_info:
log.info('No pair info {} {}'.format(s1, s2))
order_target_percent(s1, 0)
order_target_percent(s2, 0)
continue
pair_info = context.pair_info[pair]
y, x = data.current(pair, 'price')
z_score = pair_info['z_score_func'](y, x)
hedge = pair_info['hedge']
s1, s2 = pair
if pos1<0 and pos2>0:
if z_score < context.exit_threshold:
log.info('B {} {} {} {} {} {} {} {}'.format(
s1, s2, z_score, pos1, pos2, y, x, hedge))
order_target_percent(s1, 0)
order_target_percent(s2, 0)
elif pos1>0 and pos2<0:
if z_score > -context.exit_threshold:
log.info('S {} {} {} {} {} {} {} {}'.format(
s1, s2, z_score, pos1, pos2, y, x, hedge))
order_target_percent(s1, 0)
order_target_percent(s2, 0)
## WORKING COPY
# Analyze is a post-hoc analysis method available on Zipline.
# It accepts the context object and 'perf' which is the output
# of a Zipline backtest. This API is currently experimental,
# and will likely change before release.
def analyze(context, perf):
fig = pyplot.figure()
# Make a subplot for portfolio value.
ax1 = fig.add_subplot(211)
perf.portfolio_value.plot(ax=ax1, figsize=(16,12))
ax1.set_ylabel('portfolio value in $')
pyplot.legend(loc=0)
pyplot.show()
#WORKING COPY
# NOTE: This cell will take a few minutes to run.
# Create algorithm object passing in initialize and
# handle_data functions
algo_obj = TradingAlgorithm(
start=datetime(2015, 11, 2, 0, 0, 0, 0, pytz.utc),
end=datetime(2016, 1, 31, 0, 0, 0, 0, pytz.utc),
initialize=initialize,
handle_data=handle_data,
before_trading_start=before_trading_start
)
# HACK: Analyze isn't supported by the parameter-based API, so
# tack it directly onto the object.
algo_obj._analyze = analyze
# Run algorithm
perf_manual = algo_obj.run(data.transpose(2,1,0))
#perf_manual = algo_obj.run()
perf_manual = perf_manual[perf_manual.index >= datetime(2015,1,1)]
perf_manual.returns.plot()
def calculate_max_drawdown(rets):
compounded_returns = []
cur_return = 0.0
for r in rets:
try:
cur_return += math.log(1.0 + r)
# this is a guard for a single day returning -100%, if returns are
# greater than -1.0 it will throw an error because you cannot take
# the log of a negative number
except ValueError:
log.debug("{cur} return, zeroing the returns".format(
cur=cur_return))
cur_return = 0.0
compounded_returns.append(cur_return)
cur_max = None
max_drawdown = None
for cur in compounded_returns:
if cur_max is None or cur > cur_max:
cur_max = cur
drawdown = (cur - cur_max)
if max_drawdown is None or drawdown < max_drawdown:
max_drawdown = drawdown
if max_drawdown is None:
return 0.0
return 1.0 - math.exp(max_drawdown)
calculate_max_drawdown(perf_manual.returns)
import pyfolio as pf
returns, positions, transactions, gross_lev = pf.utils.extract_rets_pos_txn_from_zipline(perf_manual)
pf.create_full_tear_sheet(returns, positions=positions,
transactions=transactions,
gross_lev=gross_lev
)