import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pykalman import KalmanFilter
import statsmodels.api as sm
<center>
<center>
<center> <center>
secs = ['PEP', 'KO']
data = get_pricing(
symbols(secs), start_date='2006-1-1', end_date='2008-8-1',
fields='close_price', frequency='daily')
data.columns = [sec.symbol for sec in data.columns]
data.index.name = 'Date'
(1 + data.pct_change()).cumprod().plot();
plt.ylabel('Cumulative Return');
cm = plt.get_cmap('jet')
colors = np.linspace(0.1, 1, len(data))
sc = plt.scatter(data[secs[0]], data[secs[1]], s=30, c=colors, cmap=cm, edgecolor='k', alpha=0.7)
cb = plt.colorbar(sc)
cb.ax.set_yticklabels([str(p.date()) for p in data[::len(data)//9].index])
plt.xlabel(secs[0])
plt.ylabel(secs[1]);
x = sm.add_constant(data[secs[0]], prepend=False)
ols = sm.OLS(data[secs[1]], x).fit()
beta = ols.params
y_fit = [x.min().dot(beta), x.max().dot(beta)]
print ols.summary2()
cm = plt.get_cmap('jet')
colors = np.linspace(0.1, 1, len(data))
sc = plt.scatter(data[secs[0]], data[secs[1]], s=50, c=colors, cmap=cm,
edgecolor='k', alpha=0.7, label='Price Data')
plt.plot([x.min()[0], x.max()[0]], y_fit, '--b', linewidth=3, label='OLS Fit')
plt.legend()
cb = plt.colorbar(sc)
cb.ax.set_yticklabels([str(p.date()) for p in data[::len(data)//9].index])
plt.xlabel(secs[0])
plt.ylabel(secs[1]);
spread = pd.DataFrame(data[secs[1]] - np.dot(sm.add_constant(data[secs[0]], prepend=False), beta))
spread.columns = [secs[0] + '-' + secs[1] + ' Spread']
spread.plot(style=['g']);
# check for cointegration
adf = sm.tsa.stattools.adfuller(spread['PEP-KO Spread'], maxlag=1)
print 'ADF test statistic: %.02f' % adf[0]
print 'p-value: %.03f' % adf[1]
spread['Middle'] = spread['PEP-KO Spread'].mean()
std = spread['PEP-KO Spread'].std()
spread['Upper'] = spread['Middle'] + std
spread['Lower'] = spread['Middle'] - std
spread.plot(style=['g', '--b', '--y', '--y']);
trades = pd.DataFrame(np.nan, index=spread.index, columns=['Buy', 'Sell'])
trades['Buy'][(spread['PEP-KO Spread'].shift(1) > spread['Lower']) &
(spread['PEP-KO Spread'] < spread['Lower'])] = 1
trades['Buy'][(spread['PEP-KO Spread'].shift(1) < spread['Middle']) &
(spread['PEP-KO Spread'] > spread['Middle'])] = 0
trades['Buy'].ffill(inplace=True)
trades['Buy'] = trades['Buy'].diff().shift(-1)
trades['Buy'][trades['Buy'] == 0] = np.nan
trades['Buy'][trades['Buy'] == -1] = 0
trades['Buy'] *= spread['Lower']
trades['Sell'][(spread['PEP-KO Spread'].shift(1) < spread['Upper']) &
(spread['PEP-KO Spread'] > spread['Upper'])] = 1
trades['Sell'][(spread['PEP-KO Spread'].shift(1) > spread['Middle']) &
(spread['PEP-KO Spread'] < spread['Middle'])] = 0
trades['Sell'].ffill(inplace=True)
trades['Sell'] = trades['Sell'].diff().shift(-1)
trades['Sell'][trades['Sell'] == 0] = np.nan
trades['Sell'][trades['Sell'] == -1] = 0
trades['Sell'] *= spread['Upper']
spread.plot(style=['g', '--b', '--y', '--y'])
plt.plot(trades['Buy'], 'm^', markersize=12, label='Buy')
plt.plot(trades['Sell'], 'cv', markersize=12, label='Sell')
plt.legend(loc=0);
secs = ['PEP', 'KO']
data_oos = get_pricing(
symbols(secs), start_date='2008-8-1', end_date='2010-1-1',
fields='close_price', frequency='daily')
data_oos.columns = [sec.symbol for sec in data_oos.columns]
data_oos.index.name = 'Date'
spread_oos = spread.reindex(spread.index + data_oos.index)
spread_oos['PEP-KO Spread OOS'] = data_oos[secs[1]] - np.dot(
sm.add_constant(data_oos[secs[0]], prepend=False), beta)
spread_oos[['Middle', 'Upper', 'Lower']] = spread_oos[['Middle', 'Upper', 'Lower']].ffill()
spread_oos.plot(style=['g', '--b', '--y', '--y', 'r']);
data_all = data.append(data_oos)
cm = plt.get_cmap('jet')
colors = np.linspace(0.1, 1, len(data_all))
sc = plt.scatter(data_all[secs[0]], data_all[secs[1]], s=50, c=colors, cmap=cm,
edgecolor='k', alpha=0.7, label='Price Data')
plt.plot([x.min()[0], x.max()[0]], y_fit, '--b', linewidth=3, label='OLS Fit')
plt.legend()
cb = plt.colorbar(sc)
cb.ax.set_yticklabels([str(p.date()) for p in data_all[::len(data_all)//9].index])
plt.xlabel(secs[0])
plt.ylabel(secs[1]);
obs_mat = sm.add_constant(data_all[secs[0]].values, prepend=False)[:, np.newaxis]
kf = KalmanFilter(n_dim_obs=1, n_dim_state=2, # y is 1-dimensional, (alpha, beta) is 2-dimensional
initial_state_mean=np.ones(2),
initial_state_covariance=np.ones((2, 2)),
transition_matrices=np.eye(2),
observation_matrices=obs_mat,
observation_covariance=10**2,
transition_covariance=0.01**2 * np.eye(2))
state_means, state_covs = kf.filter(data_all[secs[1]])
beta_kf = pd.DataFrame({'Slope': state_means[:, 0], 'Intercept': state_means[:, 1]},
index=data_all.index)
beta_kf.plot(subplots=True);
# visualize the correlation between assest prices over time
cm = plt.cm.get_cmap('jet')
dates = [str(p.date()) for p in data_all[::len(data_all)/10].index]
colors = np.linspace(0.1, 1, len(data_all))
sc = plt.scatter(data_all[secs[0]], data_all[secs[1]],
s=50, c=colors, cmap=cm, edgecolor='k', alpha=0.7)
cb = plt.colorbar(sc)
cb.ax.set_yticklabels([str(p.date()) for p in data_all[::len(data_all)//9].index]);
plt.xlabel(secs[0])
plt.ylabel(secs[1])
# add regression lines
step = 25
xi = np.linspace(data_all[secs[0]].min(), data_all[secs[0]].max(), 2)
colors_l = np.linspace(0.1, 1, len(state_means[::step]))
for i, b in enumerate(state_means[::step]):
plt.plot(xi, b[0] * xi + b[1], alpha=.5, lw=2, c=cm(colors_l[i]))
spread_kf = data_all[secs[1]] - data_all[secs[0]] * beta_kf['beta'] - beta_kf['alpha']
spread_kf.plot();
spread_oos.plot(style=['g', '--b', '--y', '--y', 'r']);
spread_kf.plot(label='Dynamic PEP-KO Spread')
plt.legend(loc=0)
# spread['PEP-KO Spread'].plot()
# spread_oos['PEP-KO Spread OOS'].plot()
# plt.ylim((-3, 3));
<center>