Notebook
In [1]:
import pandas as pd
import numpy as np
import statsmodels
from statsmodels.tsa.stattools import coint
import matplotlib.pyplot as plt
In [2]:
symbol_list = ['ABGB', 'ASTI', 'CSUN', 'DQ', 'FSLR','SPY']
securities_panel = get_pricing(symbol_list, fields=['price']
                               , start_date='2014-01-01', end_date='2015-01-01')
securities_panel.minor_axis = map(lambda x: x.symbol, securities_panel.minor_axis)
In [3]:
total_df = securities_panel.loc['price']
#percent variations:
daily_returns= total_df.pct_change()[1:]
#cumulative variations:
return_index= (1 + daily_returns).cumprod()
total_df.head()
Out[3]:
ABGB ASTI CSUN DQ FSLR SPY
2014-01-02 00:00:00+00:00 14.4198 7.409 7.0400 38.00 57.43 182.95
2014-01-03 00:00:00+00:00 14.7546 7.250 7.0775 39.50 56.74 182.80
2014-01-06 00:00:00+00:00 15.3300 7.121 7.0000 40.05 51.26 182.40
2014-01-07 00:00:00+00:00 15.6300 7.299 6.9300 41.93 52.48 183.44
2014-01-08 00:00:00+00:00 15.3100 7.101 7.1600 42.49 51.68 183.53
In [4]:
def cointegration_finder(ticker_list):
    '''
    populate the dictionary 'result' with each symbol as keys 
    and the list of symbols with wich it is cointegrated as values
    then convert it to a dataframe for printing 
    '''
    result= {}
    for ticker in symbol_list:
        compare_ticker_to_this_list = [x for x in symbol_list if x != ticker]
        cointegrated_tickers = [x for x in compare_ticker_to_this_list if coint(total_df[x],total_df[ticker])[1]<.05]
        result[ticker]= cointegrated_tickers
        
    return pd.DataFrame.from_dict(result, orient='index')
In [5]:
df = cointegration_finder(symbol_list)
print df
         0
SPY   None
FSLR  ABGB
ASTI  CSUN
ABGB  FSLR
CSUN  None
DQ    None
In [18]:
coint(total_df['CSUN'], total_df['ASTI'])[1] == coint(total_df['ASTI'], total_df['CSUN'])[1]
Out[18]:
False
In [6]:
return_index[['ABGB','CSUN']].plot()
plt.show()
In [7]:
def zscore(series):
    return (series - series.mean()) / np.std(series)
In [8]:
def visualize_spread(x, y):
    score, pvalue, _= coint(x, y)
    diff_series= x-y
    zscore(diff_series).plot()
    plt.axhline(zscore(diff_series).mean(), color='black')
    plt.axhline(1.0, color='red', linestyle='--')
    plt.axhline(-1.0, color='green', linestyle='--')
    plt.figure(figsize=(15,8))
    plt.show()
In [9]:
visualize_spread(total_df['ABGB'], total_df['CSUN'])
<matplotlib.figure.Figure at 0x7f5b185b53d0>
In [10]:
visualize_spread(total_df['ABGB'], total_df['FSLR'])
<matplotlib.figure.Figure at 0x7f5b185c8150>
In [11]:
def visualize_ratio(x, y):
    score, pvalue, _= coint(x, y)
    diff_series= x/y
    zscore(diff_series).plot()
    plt.axhline(zscore(diff_series).mean(), color='black')
    plt.axhline(1.0, color='red', linestyle='--')
    plt.axhline(-1.0, color='green', linestyle='--')
    plt.figure(figsize=(15,8))
    plt.show()
In [20]:
visualize_ratio(total_df['ABGB'], total_df['FSLR']);
<matplotlib.figure.Figure at 0x7f5b184ab2d0>