Notebook
In [90]:
# Import a Kalman filter and other useful libraries
from pykalman import KalmanFilter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import poly1d
import seaborn as sns

Effect of prior

In [106]:
# Load pricing data
start = '2012-01-01'
end = '2015-01-01'
spy = get_pricing('SPY', fields='price', start_date=start, end_date=end) 
amzn = get_pricing('AMZN', fields='price', start_date=start, end_date=end)
In [121]:
def do_plots(x_data,y_data, prior_mean, prior_covariance):
   delta = 1e-3
   trans_cov = delta / (1 - delta) * np.eye(2) # How much random walk wiggles
   obs_mat = np.expand_dims(np.vstack([[x_data], [np.ones(len(x_data))]]).T, axis=1)

   kf = KalmanFilter(n_dim_obs=1, n_dim_state=2, # y is 1-dimensional, (alpha, beta) is 2-dimensional
                  initial_state_mean=prior_mean,
                  initial_state_covariance=prior_covariance,
                  transition_matrices=np.eye(2),
                  observation_matrices=obs_mat,
                  observation_covariance=0,
                  transition_covariance=trans_cov)
   # Use the observations y to get running estimates and errors for the state parameters
   state_means, state_covs = kf.filter(y_data.values)
   _, axarr = plt.subplots(2, sharex=True)
   axarr[0].plot(x.index, state_means[:,0], label='slope')
   axarr[0].legend()
   axarr[1].plot(x.index, state_means[:,1], label='intercept')
   axarr[1].legend()
   plt.tight_layout();
   xx, yy = np.random.multivariate_normal(prior_mean, prior_covariance, 5000).T

   sns.jointplot(xx, yy, kind="kde")
   
   print "final state %f = %f * %f + %f"%(y_data[-1],state_means[-1][0], x_data[-1], state_means[-1][1])
    

Test from the original notebook. One thing I noticed is how close the intercept and slope values are.

In [129]:
do_plots(spy,amzn, [0,0], np.ones((2,2)))
final state 310.360000 = 1.503603 * 205.520000 + 1.339497

Use a less diagonal prior with l

In [123]:
do_plots(spy,amzn, [0,0], np.eye(2)*1e3)
final state 310.360000 = 1.723906 * 205.520000 + -43.937100

OLS regression on the last 30 values. Slope and intercept is close to test 2.

In [127]:
window_size = 30
import statsmodels.api as sm
X = sm.add_constant(spy.values[-window_size:])

model = sm.OLS(amzn[-window_size:].values,X,formula="A ~ B + C")
results = model.fit()
results.params
Out[127]:
array([-52.62103509,   1.78589291])

regression over last 30 values, no intercept. Slope is close to test 1.

In [128]:
window_size = 30
import statsmodels.api as sm

model = sm.OLS(amzn[-window_size:].values,spy.values[-window_size:],formula="A ~ B + C")
results = model.fit()
results.params
Out[128]:
array([ 1.5303261])

Intial test with y=x and covariance matrix of \begin{pmatrix} 1 & 1 \ 1 & 1 \ \end{pmatrix}

In [93]:
do_plots(spy,spy, [0,0], np.ones((2,2)))

Test with y=x and covariance matrix of \begin{pmatrix} 1 & 0 \ 0 & 1 \ \end{pmatrix}

In [94]:
do_plots(spy,spy, [0,0], np.eye(2))

Less confidence in prior mean

In [95]:
do_plots(spy,spy, [0,0], np.eye(2) * 1e5)

Test with non zero intercept

In [96]:
do_plots(spy-30,spy, [0,0],np.ones((2,2)))
In [97]:
do_plots(spy-30,spy, [0,0], np.eye(2) * 1e5)

Tests with large slope

In [98]:
do_plots(spy,spy*25, [0,0], np.ones((2,2)))
In [99]:
do_plots(spy,spy*25, [0,0], np.eye(2) * 1e3)

Test with slope and intercept

In [100]:
do_plots(spy,spy*25-200, [0,0], np.eye(2) * 1e3)
In [101]:
do_plots(spy,spy*25-200, [0,0], np.ones((2,2)))
In [102]:
do_plots(spy,spy*25-200, [0,0], np.eye(2) * 1e8)
In [103]:
do_plots(spy,spy*25-200, [0,0], np.zeros((2,2)))
In [104]:
do_plots(spy,spy*25-200, [20,190], np.zeros((2,2)))
In [105]:
do_plots(spy,spy*25-200, [20,-198], np.eye(2)*1000)
In [ ]:
 
In [ ]: