Notebook
In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from pykalman import KalmanFilter
from scipy import poly1d

import datetime as dt
import pytz
In [3]:
start = '2014-01-04'
end = '2016-11-01'
y = get_pricing('XLE', fields='price', start_date=start, end_date=end)
x = get_pricing('USO', fields='price', start_date=start, end_date=end)


delta = 1e-3
trans_cov = delta / (1 - delta) * np.eye(2) # How much random walk wiggles
obs_mat = np.expand_dims(np.vstack([[x], [np.ones(len(x))]]).T, axis=1)

kf = KalmanFilter(n_dim_obs=1, n_dim_state=2, # y is 1-dimensional, (alpha, beta) is 2-dimensional
                  initial_state_mean=[0,0],
                  initial_state_covariance=np.ones((2, 2)),
                  transition_matrices=np.eye(2),
                  observation_matrices=obs_mat,
                  observation_covariance=2,
                  transition_covariance=trans_cov)

state_means, state_covs = kf.filter(y.values)



cm = plt.get_cmap('jet')
colors = np.linspace(0.1, 1, len(x))

sc = plt.scatter(x, y, s=30, c=colors, cmap=cm, edgecolor='k', alpha=0.8)
cb = plt.colorbar(sc)
cb.ax.set_yticklabels([str(p.date()) for p in x[::len(x)//9].index])

# Plot every fifth line
step = 5
xi = np.linspace(x.min()-5, x.max()+5, 2)
colors_l = np.linspace(0.1, 1, len(state_means[::step]))
for i, beta in enumerate(state_means[::step]):
    plt.plot(xi, beta[0] * xi + beta[1], alpha=.1, lw=1, c=cm(colors_l[i]))
    
# Plot the OLS regression line
plt.plot(xi, poly1d(np.polyfit(x, y, 1))(xi), '0.4')

# Adjust axes for visibility
#plt.axis([70, 130, 30, 70])
Out[3]:
[<matplotlib.lines.Line2D at 0x7f21cca64c90>]
In [ ]: