Notebook
In [2]:
import pandas as pd
from pandas import Timedelta as td
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import datetime as dt
from pykalman import KalmanFilter
1) identify the most recent 125 closes prices as a pattern 'cp'
2) look back in history and find all the 125-length patterns that looks like cp
3) drop the patterns with overlapped values, just take the ones with hgher correlation with cp
4) now consider waht the outcome was and if all the outcomes goes in the same direction, consider these patterns as valid
5) trade the current pattern in this outcome-direction hoping that history repeats
In [3]:
start = '2002-1-1'
end= '2015, 12, 1'

#p= pattern length
p = 125
#o = outcome length
o = 25
In [4]:
def ret_index(prices):
    '''
    return index of 1 dollar invested in that instrument
    '''
    rets = prices.pct_change()
    index = (rets+1).cumprod()
    return (index-1)*100
In [5]:
px = get_pricing('SPY', start, end, fields= 'price')
i = len(px.index)-1
#verify
current_close = px.iloc[i]
current_date = px.index[i]
print current_date, current_close
#historical data within to serach for similar patterns
h = px[:i-p]
cp = px.iloc[i-p:i]
2015-12-01 00:00:00+00:00 210.74

.. 1) done

In [6]:
def similar(x):
    '''
    condition: patterns have to be correlated with pearson correlation 
    '''  
    sp_corr_value, sp_corr_pvalue = stats.pearsonr(cp.values,x)

    if sp_corr_value < sp_corr_pvalue: # correlation value returned by pearnson has to be grather than p_value
        return np.NAN   
    else:
        return sp_corr_value
In [7]:
# apply the correlation function every day 
correlation = pd.rolling_apply(h,p,similar)# apply the similar function to each row of the historical dataframe
correlation.name = 'corr'
correlation.dropna(inplace=True)
correlation.plot(style='ro')
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc59be58890>

.. 2) done

In [8]:
# adjust the correlation dataframe in order to pick just the highest values of correlation BUT 
# without any overlapping dates.. se the sext graph with blacks dots
df = pd.DataFrame(correlation).dropna()
df['date']=df.index
df.columns = ['corr', 'date']
df['delta'] = (df['date']-df['date'].shift(1))/pd.Timedelta('1 days')
df['delta'].fillna(p+1, inplace=True)
df['eval'] = df.apply(lambda x: x['date'] if x['delta'] >7 else np.NAN, axis=1)
df.fillna(method = 'pad', inplace=True)
df[:5]
Out[8]:
corr date delta eval
2002-08-23 00:00:00+00:00 0.145151 2002-08-23 126 2002-08-23
2002-08-26 00:00:00+00:00 0.170073 2002-08-26 3 2002-08-23
2002-08-27 00:00:00+00:00 0.194336 2002-08-27 1 2002-08-23
2002-08-28 00:00:00+00:00 0.212647 2002-08-28 1 2002-08-23
2002-08-29 00:00:00+00:00 0.233133 2002-08-29 1 2002-08-23
In [9]:
pat = df['corr'].groupby(df['eval']).apply(lambda x: x.argmax()).values
pat # dates with max correlation values
Out[9]:
array([Timestamp('2002-12-10 00:00:00+0000', tz='UTC'),
       Timestamp('2003-05-08 00:00:00+0000', tz='UTC'),
       Timestamp('2004-07-21 00:00:00+0000', tz='UTC'),
       Timestamp('2004-10-13 00:00:00+0000', tz='UTC'),
       Timestamp('2005-06-27 00:00:00+0000', tz='UTC'),
       Timestamp('2005-12-29 00:00:00+0000', tz='UTC'),
       Timestamp('2006-09-18 00:00:00+0000', tz='UTC'),
       Timestamp('2007-06-05 00:00:00+0000', tz='UTC'),
       Timestamp('2007-11-01 00:00:00+0000', tz='UTC'),
       Timestamp('2008-05-14 00:00:00+0000', tz='UTC'),
       Timestamp('2008-09-26 00:00:00+0000', tz='UTC'),
       Timestamp('2009-01-16 00:00:00+0000', tz='UTC'),
       Timestamp('2009-05-27 00:00:00+0000', tz='UTC'),
       Timestamp('2010-05-04 00:00:00+0000', tz='UTC'),
       Timestamp('2010-08-30 00:00:00+0000', tz='UTC'),
       Timestamp('2011-11-14 00:00:00+0000', tz='UTC'),
       Timestamp('2012-08-23 00:00:00+0000', tz='UTC'),
       Timestamp('2013-02-06 00:00:00+0000', tz='UTC'),
       Timestamp('2014-12-19 00:00:00+0000', tz='UTC')], dtype=object)
In [10]:
max_corr = df['corr'].loc[[_ for _ in pat]]
max_corr.order(ascending=False, inplace=True)
max_corr = max_corr[:10]
pat_names = max_corr.index
In [11]:
# i want the black dots only..
correlation.plot(style='ro', alpha= .2)
max_corr.plot(style= 'ko')
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc59bdc8710>

.. 3) done

In [12]:
#
#just plot adjustments
#
pat_list = [px.iloc[(i+1)-p:i+1] for i, date in enumerate(px.index) if date in pat_names]
out_list = [px.iloc[(i+1):(i+1)+o] for i, date in enumerate(px.index) if date in pat_names]
df_pat = pd.concat(pat_list, axis= 1)
df_out = pd.concat(out_list, axis=1)
df_pat.columns= pat_names
df_out.columns= pat_names
In [13]:
ri= ret_index(cp).fillna(0).values
for d in pat_names:
    q_p = df_pat[d].dropna()
    q_o = df_out[d].dropna()
    q_p = q_p.reset_index(drop=True)
    q_o = q_o.reset_index(drop=True)
    r_p= ret_index(q_p).fillna(0)
    r_o = ret_index(q_o).fillna(0)
    r_p= r_p.values
    r_o = r_o.values
    plt.plot(range(p), r_p-r_p[-1])
    plt.plot([_+(p-1) for _ in range(o)], r_o-r_o[0])
plt.plot(ri-ri[-1], 'k')
plt.show()
df_pat.plot()
plt.show()
In [ ]: