Notebook
In [2]:
import numpy as np
import pandas as pd
from datetime import timedelta
from datetime import date

from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.factors import CustomFactor, AverageDollarVolume
from quantopian.pipeline.data import morningstar as mstar
from quantopian.pipeline.filters.morningstar import IsPrimaryShare
from quantopian.pipeline.factors import SimpleMovingAverage

from quantopian.research import run_pipeline

import statsmodels
from statsmodels.tsa.stattools import coint


import matplotlib.pyplot as plt

# Constants that need to be global
COMMON_STOCK= 'ST00000001'

targetPositions = 25
analysisDate = date.today()-timedelta(days=7)
windowStart = analysisDate-timedelta(days=365)
windowEnd = analysisDate
In [3]:
# Average Dollar Volume without nanmean, so that recent IPOs are truly removed
class ADV_adj(CustomFactor):
    inputs = [USEquityPricing.close, USEquityPricing.volume]
    window_length = 252
    
    def compute(self, today, assets, out, close, volume):
        close[np.isnan(close)] = 0
        out[:] = np.mean(close * volume, 0)
In [4]:
def universe_filters():
    """
    Create a Pipeline producing Filters implementing common acceptance criteria.
    
    Returns
    -------
    zipline.Filter
        Filter to control tradeablility
    """

    # Equities with an average daily volume greater than 750000.
    high_volume = (AverageDollarVolume(window_length=252) > 750000)
    
    # Not Misc. sector:
    sector_check = Sector().notnull()
    
    # Equities that morningstar lists as primary shares.
    # NOTE: This will return False for stocks not in the morningstar database.
    primary_share = IsPrimaryShare()
    
    # Equities for which morningstar's most recent Market Cap value is above $300m.
    have_market_cap = mstar.valuation.market_cap.latest > 300000000
    
    # Equities not listed as depositary receipts by morningstar.
    # Note the inversion operator, `~`, at the start of the expression.
    not_depositary = ~mstar.share_class_reference.is_depositary_receipt.latest
    
    # Equities that listed as common stock (as opposed to, say, preferred stock).
    # This is our first string column. The .eq method used here produces a Filter returning
    # True for all asset/date pairs where security_type produced a value of 'ST00000001'.
    common_stock = mstar.share_class_reference.security_type.latest.eq(COMMON_STOCK)
    
    # Equities whose exchange id does not start with OTC (Over The Counter).
    # startswith() is a new method available only on string-dtype Classifiers.
    # It returns a Filter.
    not_otc = ~mstar.share_class_reference.exchange_id.latest.startswith('OTC')
    
    # Equities whose symbol (according to morningstar) ends with .WI
    # This generally indicates a "When Issued" offering.
    # endswith() works similarly to startswith().
    not_wi = ~mstar.share_class_reference.symbol.latest.endswith('.WI')
    
    # Equities whose company name ends with 'LP' or a similar string.
    # The .matches() method uses the standard library `re` module to match
    # against a regular expression.
    not_lp_name = ~mstar.company_reference.standard_name.latest.matches('.* L[\\. ]?P\.?$')
    
    # Equities with a null entry for the balance_sheet.limited_partnership field.
    # This is an alternative way of checking for LPs.
    not_lp_balance_sheet = mstar.balance_sheet.limited_partnership.latest.isnull()
    
    # Highly liquid assets only. Also eliminates IPOs in the past 12 months
    # Use new average dollar volume so that unrecorded days are given value 0
    # and not skipped over
    # S&P Criterion
    liquid = ADV_adj() > 250000
    
    # Add logic when global markets supported
    # S&P Criterion
    domicile = True
    
    # Keep it to liquid securities
    ranked_liquid = ADV_adj().rank(ascending=False) < 1500
    
    universe_filter = (high_volume & primary_share & have_market_cap & not_depositary &
                      common_stock & not_otc & not_wi & not_lp_name & not_lp_balance_sheet &
                    liquid & domicile & sector_check & liquid & ranked_liquid)
    
    return universe_filter
In [5]:
    # Create our pipeline  
    pipe = Pipeline() 
    
    
    # Select trading universe
    stocksUniverse = universe_filters()
    
    sector = Sector()
    
    pipe.add(sector, 'sector')
    
    
    pipe.set_screen(stocksUniverse)
In [6]:
pipe.show_graph(format='png')
Out[6]:
In [7]:
universe = run_pipeline(pipe, start_date=analysisDate, end_date=analysisDate)
In [8]:
def find_cointegrated_pairs(securities_panel, universe):
    n = len(securities_panel.minor_axis)
    score_matrix = np.zeros((n, n))
    pvalue_matrix = np.ones((n, n))
    keys = securities_panel.keys
    pairs = []
    symbol_list = []
    for i in range(n):
        symbol_list.append(securities_panel.minor_axis[i].symbol)
        for j in range(i+1, n):
            if universe.sector.xs(analysisDate)[securities_panel.minor_axis[i].sid] == universe.sector.xs(analysisDate)[securities_panel.minor_axis[j].sid]:
                S1 = securities_panel.minor_xs(securities_panel.minor_axis[i])
                S2 = securities_panel.minor_xs(securities_panel.minor_axis[j])
                result = coint(S1, S2)
                score = result[0]
                pvalue = result[1]
            else:
                score = 0
                pvalue = 1
                
            score_matrix[i, j] = score
            pvalue_matrix[i, j] = pvalue
            if pvalue < 0.05:
                pairs.append((securities_panel.minor_axis[i].symbol, securities_panel.minor_axis[j].symbol))
                
    return symbol_list, score_matrix, pvalue_matrix, pairs
In [9]:
securities_panel = get_pricing(universe.index.get_level_values(1), fields=['price']
                               , start_date=windowStart, end_date=windowEnd)
In [10]:
# Heatmap to show the p-values of the cointegration test between each pair of
# stocks. Only show the value in the upper-diagonal of the heatmap
# (Just showing a '1' for everything in lower diagonal)
securities_panel.apply(np.log)
symbol_list, scores, pvalues, pairs = find_cointegrated_pairs(securities_panel,universe)
In [11]:
import seaborn
seaborn.heatmap(pvalues, xticklabels=symbol_list, yticklabels=symbol_list, cmap='RdYlGn_r' 
                , mask = (pvalues >= 0.95)
                )
print len(pairs)
3700