Notebook
In [1]:
from quantopian.research import get_pricing
#from quantopian.pipeline.filters import Q1500US
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.factors import RollingSpearmanOfReturns, CustomFactor, SimpleMovingAverage, Latest, EWMA, EWMSTD, Returns, ExponentialWeightedMovingAverage, RSI, AverageDollarVolume
#from quantopian.pipeline.filters.morningstar import Q500US, Q1500US
#import math
from quantopian.pipeline.data.builtin import USEquityPricing
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
from statsmodels import regression
import statsmodels.api as sm
In [2]:
def make_pipeline():
    pipe = Pipeline()
    # Screen out penny stocks and low liquidity securities.
    price = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=22)
    volume = SimpleMovingAverage(inputs=[USEquityPricing.volume], window_length=22)
    price_filter_high  = (price <= 60.0)
    price_filter_low = (price >= 20.0)
    volume_filter  = (volume >= 1000000)
    rolling_correlations = RollingSpearmanOfReturns(target=symbols(15622),returns_length=10,correlation_length=30) 
    pipe.set_screen(price_filter_high & price_filter_low & volume_filter & rolling_correlations.notnan())
    pipe.add(rolling_correlations,'Rolling Correlations')
    return pipe
    
In [3]:
my_pipe = make_pipeline()
In [ ]:
start_date = '2017-02-01'
end_date = '2017-03-17'
result = run_pipeline(my_pipe, start_date, end_date)
result2 = result.ix[(result['Rolling Correlations'] != 1.0)]
r3 = result2.sort('Rolling Correlations', ascending = False)
print r3.head(10)
x = r3['Rolling Correlations'].max().get_values()#.get_level_values(1)[0] 

print x.head(5)
In [6]:
print r3[0,0].symbol

KeyErrorTraceback (most recent call last)
<ipython-input-6-5b367cba56b5> in <module>()
----> 1 print r3[0,0].symbol

/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1995             return self._getitem_multilevel(key)
   1996         else:
-> 1997             return self._getitem_column(key)
   1998 
   1999     def _getitem_column(self, key):

/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in _getitem_column(self, key)
   2002         # get column
   2003         if self.columns.is_unique:
-> 2004             return self._get_item_cache(key)
   2005 
   2006         # duplicate columns & possible reduce dimensionality

/usr/local/lib/python2.7/dist-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
   1348         res = cache.get(item)
   1349         if res is None:
-> 1350             values = self._data.get(item)
   1351             res = self._box_item_values(item, values)
   1352             cache[item] = res

/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in get(self, item, fastpath)
   3288 
   3289             if not isnull(item):
-> 3290                 loc = self.items.get_loc(item)
   3291             else:
   3292                 indexer = np.arange(len(self.items))[isnull(self.items)]

/usr/local/lib/python2.7/dist-packages/pandas/indexes/base.pyc in get_loc(self, key, method, tolerance)
   1945                 return self._engine.get_loc(key)
   1946             except KeyError:
-> 1947                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   1948 
   1949         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4154)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4018)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12368)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12322)()

KeyError: (0, 0)
In [19]:
s_d = '2016-06-15'

hedge_hist = get_pricing('XRT', fields='price', start_date=s_d, end_date=end_date)
benchmark = get_pricing('ANF', fields='price', start_date=s_d, end_date=end_date)

# We have to take the percent changes to get to returns
# Get rid of the first (0th) element because it is NAN
r_a = hedge_hist.pct_change()[1:]
r_b = benchmark.pct_change()[1:]

# Let's plot them just for fun
r_a.plot()
r_b.plot()
plt.ylabel("Daily Return")
plt.legend();
In [20]:
# Let's define everything in familiar regression terms
X = r_b.values # Get just the values, ignore the timestamps
Y = r_a.values

def linreg(x,y):
    # We add a constant so that we can also fit an intercept (alpha) to the model
    # This just adds a column of 1s to our data
    x = sm.add_constant(x)
    model = regression.linear_model.OLS(y,x).fit()
    # Remove the constant now that we're done
    x = x[:, 1]
    return model.params[0], model.params[1]

alpha, beta = linreg(X,Y)
print 'alpha: ' + str(alpha)
print 'beta: ' + str(beta)

print stats.spearmanr(X, Y)
alpha: 0.000673326962457
beta: 0.24580515016
SpearmanrResult(correlation=0.81334873542884845, pvalue=4.2364145200323183e-46)
In [ ]: