Notebook
In [9]:
# Import Zipline, the open source backester, and a few other libraries that we will use
import zipline
from zipline import TradingAlgorithm
from zipline.api import schedule_function, order_target, order_target_percent, order_percent, record, symbol, history, set_commission,set_slippage
from zipline.api import date_rules, time_rules, commission, slippage, order_target, get_datetime, get_open_orders, set_symbol_lookup_date
import pytz
from datetime import datetime
import matplotlib.pyplot as pyplot
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import time
import math
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
from datetime import date, timedelta
In [10]:
#stocks = symbols(['XLF', 'XLE', 'XLU', 'XLK', 'XLB', 'XLP', 'XLY', 'XLI', 'XLV'])

stocks = symbols(['AFL',  'AIG',  'AIV',  'AIZ', 
        'AJG', 
        'ALL', 
        'AMG', 
        'AMP', 
        'AMT',
        'AON', 
        'AVB', 
        'AXP', 
        'BAC', 
        'BBT', 
        'BEN', 
        'BK', 
        'BLK', 
        'BXP', 
        'C',
        'CB', 
        'CBG', 
        'CCI', 
        'CI', 
        'CINF', 
        'CMA', 
        'CME', 
        'COF', 
        'DFS', 
        'DLR',
        'EQIX', 
        'EQR', 
        'ESS', 
        'ETFC', 
        'EXR', 
        'FITB', 
        'FRT', 
        'GGP', 
        'GS',
        'HBAN', 
        'HCN', 
        'HCP', 
        'HIG', 
        'HST', 
        'ICE', 
        'IRM', 
        'IVZ', 
        'JPM',
        'KEY', 
        'KIM', 
        'L', 
        'LM', 
        'LNC', 
        'MAC', 
        'MCO', 
        'MET', 
        'MMC', 
        'MS',
        'MTB', 
        'NDAQ', 
        'NTRS', 
        'O', 
        'PBCT', 
        'PFG', 
        'PGR', 
        'PLD', 
        'PNC',
        'PRU', 
        'PSA', 
        'RF', 
        'SCHW', 
        'SLG', 
        'SPG', 
        'STI', 
        'STT', 
        'TMK',
        'TROW', 
        'TRV', 
        'UDR', 
        'UNM', 
        'USB', 
        'VNO', 
        'VTR', 
        'WFC', 
        'XL',
        'ZION'])

data = get_pricing(
    stocks,
    start_date='2012-01-01',
    end_date = '2016-12-23',
    frequency='daily'
)
data.price.plot(use_index=True)
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7511341cd0>
In [3]:
# Define the algorithm - this should look familiar from the Quantopian IDE
# For more information on writing algorithms for Quantopian
# and these functions, see https://www.quantopian.com/help

def initialize(context):
    schedule_function(trade, 
                      date_rule=date_rules.week_end(), 
                      time_rule=time_rules.market_close(minutes=1), 
                      half_days=False)
    context.lookback = 252*2 # 252*n = n years
    context.stocks = symbols(['XLF', 'XLE', 'XLU', 'XLK', 'XLB', 'XLP', 'XLY', 'XLI', 'XLV'])
    set_commission(commission.PerShare(cost=0.00))
    set_slippage(slippage.VolumeShareSlippage(volume_limit=1, price_impact=0))
    context.i = 0
    #add_history(context.lookback, '1d', 'price')

def handle_data(context, data):
    context.i += 1
    if context.i < context.lookback:
        return
    
def trade(context,data):
        if context.i < context.lookback:
            return
        w = estimate(context,data)
    
        # Close all current positions
        for stock in context.portfolio.positions:
            if stock not in w.index:
               order_target(stock, 0)  

       #Order
        for i,n in enumerate(w):
            if w.index[i] in data:
                if w.index[i] in context.portfolio.positions: #If position already exists
                    order_target_percent(w.index[i], n)
                else:
                    order_percent(w.index[i], n)
    
def estimate(context,data):
        # Structure a portfolio such that it maximizes dividend yield while lowering volatility
        # Covar estimation
        #price_history = history(context.lookback, frequency="1d", field='price')
        price_history = data.history(context.stocks, fields='price', bar_count = context.lookback, frequency="1d")
        ret = (price_history/price_history.shift(5)-1).dropna(axis=0)[5::5]
        ret.columns = context.stocks
        n = len(ret.columns)
        
        covar = ret.cov()
        
        w = min_var_weights(covar, context.stocks)

        # LOG EVERYTHING
        #esigma = np.sqrt(np.dot(w.T,np.dot(covar,w)))*math.sqrt(52)*100
        #log.info('Expected Volatility: '+str(esigma)+'%')
        #w = pd.Series(np.round(1000*w)/1000)
        #log.info('Number of Stocks: '+str(sum(abs(w)>0)))
        #log.info('Current Leverage: '+str(context.account.leverage))
        #w.index = ret.columns
        #log.info(w)
        #print(get_datetime())
        #print(w)
        return w  #Cash Cushion of X%


def min_var_weights(cov, stocks):
    '''
    Returns a dictionary of sid:weight pairs.
    '''
    cov = pd.DataFrame(2*cov)
    x = np.array([0.]*(len(cov)+1))
    #x = np.ones(len(cov) + 1)
    x[-1] = 1.0
    p = lagrangize(cov)
    weights = np.linalg.solve(p, x)[:-1]
    weights = pd.Series(weights, index=stocks)
    return weights


def lagrangize(df):
    '''
    Utility funcion to format a DataFrame 
    in order to solve a Lagrangian sysem. 
    '''
    df = df
    df['lambda'] = np.ones(len(df))
    z = np.ones(len(df) + 1)
    x = np.ones(len(df) + 1)
    z[-1] = 0.0
    x[-1] = 1.0
    m = [i for i in df.as_matrix()]
    m.append(z)    
    return pd.DataFrame(np.array(m))
In [11]:
def initialize(context):
    set_slippage(slippage.VolumeShareSlippage(volume_limit=0.025,
                                              price_impact=0.1))
    set_commission(commission.PerShare(cost=0.0075, min_trade_cost=1.00))
    set_symbol_lookup_date('2016-01-01')
    context.i = 0
    #context.index = symbol('SPY')

    # finance sector stocks
    context.symbols = [
        symbol('AFL'), 
        symbol('AIG'), 
        symbol('AIV'), 
        symbol('AIZ'), 
        symbol('AJG'), 
        symbol('ALL'), 
        symbol('AMG'), 
        symbol('AMP'), 
        symbol('AMT'),
        symbol('AON'), 
        symbol('AVB'), 
        symbol('AXP'), 
        symbol('BAC'), 
        symbol('BBT'), 
        symbol('BEN'), 
        symbol('BK'), 
        symbol('BLK'), 
        symbol('BXP'), 
        symbol('C'),
        symbol('CB'), 
        symbol('CBG'), 
        symbol('CCI'), 
        symbol('CI'), 
        symbol('CINF'), 
        symbol('CMA'), 
        symbol('CME'), 
        symbol('COF'), 
        symbol('DFS'), 
        symbol('DLR'),
        symbol('EQIX'), 
        symbol('EQR'), 
        symbol('ESS'), 
        symbol('ETFC'), 
        symbol('EXR'), 
        symbol('FITB'), 
        symbol('FRT'), 
        symbol('GGP'), 
        symbol('GS'),
        symbol('HBAN'), 
        symbol('HCN'), 
        symbol('HCP'), 
        symbol('HIG'), 
        symbol('HST'), 
        symbol('ICE'), 
        symbol('IRM'), 
        symbol('IVZ'), 
        symbol('JPM'),
        symbol('KEY'), 
        symbol('KIM'), 
        symbol('L'), 
        symbol('LM'), 
        symbol('LNC'), 
        symbol('MAC'), 
        symbol('MCO'), 
        symbol('MET'), 
        symbol('MMC'), 
        symbol('MS'),
        symbol('MTB'), 
        symbol('NDAQ'), 
        symbol('NTRS'), 
        symbol('O'), 
        symbol('PBCT'), 
        symbol('PFG'), 
        symbol('PGR'), 
        symbol('PLD'), 
        symbol('PNC'),
        symbol('PRU'), 
        symbol('PSA'), 
        symbol('RF'), 
        symbol('SCHW'), 
        symbol('SLG'), 
        symbol('SPG'), 
        symbol('STI'), 
        symbol('STT'), 
        symbol('TMK'),
        symbol('TROW'), 
        symbol('TRV'), 
        symbol('UDR'), 
        symbol('UNM'), 
        symbol('USB'), 
        symbol('VNO'), 
        symbol('VTR'), 
        symbol('WFC'), 
        symbol('XL'),
        symbol('ZION')]
        
    # pair permutation    
    context.pairs = []
    for i in range(len(context.symbols)-1):
        s1 = context.symbols[i]
        for j in range(i+1, len(context.symbols)):
            s2 = context.symbols[j]
            context.pairs.append((s1, s2))
        
    # params
    context.lookback = 20
    context.p_value_threshold = 0.05
    context.entry_threshold = 3
    context.exit_threshold = 0.1
    
    # schedule
    schedule_function(check_pairs, 
                      date_rules.every_day(), 
                      time_rules.market_open(minutes=1))

    total_minutes = 6*60 + 30
    for i in range(10, total_minutes, 15):
        schedule_function(check_pairs, 
                          date_rules.every_day(), 
                          time_rules.market_open(minutes=i))

def coint_p_value(y, x):
    x = sm.add_constant(x)
    t_test, p_value, _ = coint(y, x)
    return p_value

def hedge_ratio(y, x):
    x = sm.add_constant(x)
    model = sm.OLS(y, x).fit()
    return model.params[1]

def make_z_score_func(hedge, spreads):
    if spreads.std()==0:
        return lambda p1, p2: 0
    return lambda p1, p2: (p1-hedge*p2-spreads.mean())/spreads.std()

def before_trading_start(context, data):
    context.i += 1
    if context.i < context.lookback:
        return
    
    prices = data.history(context.symbols, "price", context.lookback, '1d')

    context.pair_info = {}
    for pair in context.pairs:
        try:
            s1, s2 = pair
            ys = prices[s1]
            xs = prices[s2]
            p_value = coint_p_value(ys, xs)
            hedge = hedge_ratio(ys, xs) 
            spreads = ys - hedge*xs
            z_score_func = make_z_score_func(hedge, spreads)

            y2, y1, y = ys[-3:]
            x2, x1, x = xs[-3:]

            z_score = z_score_func(y, x)
            z_score_1 = z_score_func(y1, x1)
            z_score_2 = z_score_func(y2, x2)

            # daily change of spread z-score
            ok_to_short = z_score < z_score_1 and z_score_1 < z_score_2
            ok_to_long  = z_score > z_score_1 and z_score_1 > z_score_2

            context.pair_info[pair] = {
                'p_value': p_value,
                'hedge': hedge,
                'spreads': spreads,
                'z_score_func': z_score_func,
                'ok_to_long': ok_to_long,
                'ok_to_short': ok_to_short
            }
        except Exception as e:
            log.warn('{} removed: {}'.format(pair, str(e)))

def handle_data(context, data):
    pass

def calc_target_pct(y_shares, x_shares, y_price, x_price):
    y_dollars = y_shares * y_price
    x_dollars = x_shares * x_price
    notional_dollars =  abs(y_dollars) + abs(x_dollars)
    y_target_pct = y_dollars / notional_dollars
    x_target_pct = x_dollars / notional_dollars
    return (y_target_pct, x_target_pct)

def check_pairs(context, data):
    if context.i < context.lookback:
        return
    
    if context.portfolio.positions_value==0 and len(get_open_orders())==0:
        check_pairs_for_entry(context, data)
    else:
        check_pairs_for_exit(context, data)
        
def check_pairs_for_entry(context, data):   
    if context.i < context.lookback:
        return
    
    pairs = []
    for pair, pair_info in context.pair_info.iteritems():
        if pair_info['p_value'] > context.p_value_threshold:
            continue
    #for pair, p_value in context.pair_info.iteritems():
        #if p_value > context.p_value_threshold:
            #continue            
                
        y, x = data.current(pair, 'price')
        z_score = pair_info['z_score_func'](y, x)

        hedge = pair_info['hedge']
        target1, target2 = calc_target_pct(1, hedge, y, x)

        ok_to_long = pair_info['ok_to_long']
        ok_to_short = pair_info['ok_to_short']
        entry_threshold = context.entry_threshold
            
        s1, s2 = pair
        if ok_to_short and z_score > entry_threshold:
            # short top, long bottom
            pairs.append((s1, s2, z_score, -target1, target2, y, x, hedge))
        elif ok_to_long and z_score < -entry_threshold:
            # long top, short bottom
            pairs.append((s1, s2, z_score, target1, -target2, y, x, hedge))
            
    if len(pairs)>0:
        pairs.sort(key=lambda x:abs(x[2])) # choose the best z_score
        s1, s2, z_score, target1, target2, y, x, hedge = pairs[0]
        log.info('Enter: {} {} {} {} {} {} {} {}'.format(
                s1, s2, z_score, target1, target2, y, x, hedge))
        order_target_percent(s1, target1)
        order_target_percent(s2, target2)
        
def check_pairs_for_exit(context, data):
    for pair in context.pairs:
        s1, s2 = pair
        pos1 = context.portfolio.positions[s1].amount
        pos2 = context.portfolio.positions[s2].amount

        if pos1==0 and pos2==0:
            continue
         
        if pair not in context.pair_info:
            log.info('No pair info {} {}'.format(s1, s2))
            order_target_percent(s1, 0)
            order_target_percent(s2, 0)                
            continue
        pair_info = context.pair_info[pair]
            
        y, x = data.current(pair, 'price')
        z_score = pair_info['z_score_func'](y, x)
        hedge = pair_info['hedge']

        s1, s2 = pair
        if pos1<0 and pos2>0:
            if z_score < context.exit_threshold:
                log.info('B {} {} {} {} {} {} {} {}'.format(
                        s1, s2, z_score, pos1, pos2, y, x, hedge))
                order_target_percent(s1, 0)
                order_target_percent(s2, 0)                
        elif pos1>0 and pos2<0:
            if z_score > -context.exit_threshold: 
                log.info('S {} {} {} {} {} {} {} {}'.format(
                        s1, s2, z_score, pos1, pos2, y, x, hedge))
                order_target_percent(s1, 0)
                order_target_percent(s2, 0)
In [12]:
## WORKING COPY
# Analyze is a post-hoc analysis method available on Zipline. 
# It accepts the context object and 'perf' which is the output 
# of a Zipline backtest.  This API is currently experimental, 
# and will likely change before release.

def analyze(context, perf):
    fig = pyplot.figure()
    
    # Make a subplot for portfolio value.
    ax1 = fig.add_subplot(211)
    perf.portfolio_value.plot(ax=ax1, figsize=(16,12))
    ax1.set_ylabel('portfolio value in $')
    pyplot.legend(loc=0)
    pyplot.show()
In [ ]:
#WORKING COPY
# NOTE: This cell will take a few minutes to run.

# Create algorithm object passing in initialize and
# handle_data functions
algo_obj = TradingAlgorithm(
    start=datetime(2015, 11, 2, 0, 0, 0, 0, pytz.utc),
    end=datetime(2016, 1, 31, 0, 0, 0, 0, pytz.utc),
    initialize=initialize, 
    handle_data=handle_data,
    before_trading_start=before_trading_start
)

# HACK: Analyze isn't supported by the parameter-based API, so
# tack it directly onto the object.
algo_obj._analyze = analyze

# Run algorithm
perf_manual = algo_obj.run(data.transpose(2,1,0))
#perf_manual = algo_obj.run()
In [71]:
 
Out[71]:
Timestamp('2013-02-05 00:00:00')
In [6]:
perf_manual = perf_manual[perf_manual.index >= datetime(2015,1,1)]
perf_manual.returns.plot()
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5b81704c10>
In [ ]:
    def calculate_max_drawdown(rets):
        compounded_returns = []
        cur_return = 0.0
        for r in rets:
            try:
                cur_return += math.log(1.0 + r)
            # this is a guard for a single day returning -100%, if returns are
            # greater than -1.0 it will throw an error because you cannot take
            # the log of a negative number
            except ValueError:
                log.debug("{cur} return, zeroing the returns".format(
                    cur=cur_return))
                cur_return = 0.0
            compounded_returns.append(cur_return)

        cur_max = None
        max_drawdown = None
        for cur in compounded_returns:
            if cur_max is None or cur > cur_max:
                cur_max = cur

            drawdown = (cur - cur_max)
            if max_drawdown is None or drawdown < max_drawdown:
                max_drawdown = drawdown

        if max_drawdown is None:
            return 0.0

        return 1.0 - math.exp(max_drawdown)
In [ ]:
calculate_max_drawdown(perf_manual.returns)
In [ ]:
import pyfolio as pf

returns, positions, transactions, gross_lev = pf.utils.extract_rets_pos_txn_from_zipline(perf_manual)
In [ ]:
pf.create_full_tear_sheet(returns, positions=positions, 
                          transactions=transactions,
                          gross_lev=gross_lev
                         )