# Import a Kalman filter and other useful libraries
from pykalman import KalmanFilter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import poly1d
import seaborn as sns

Effect of prior

# Load pricing data
start = '2012-01-01'
end = '2015-01-01'
spy = get_pricing('SPY', fields='price', start_date=start, end_date=end) 
amzn = get_pricing('AMZN', fields='price', start_date=start, end_date=end)

def do_plots(x_data,y_data, prior_mean, prior_covariance):
   delta = 1e-3
   trans_cov = delta / (1 - delta) * np.eye(2) # How much random walk wiggles
   obs_mat = np.expand_dims(np.vstack([[x_data], [np.ones(len(x_data))]]).T, axis=1)

   kf = KalmanFilter(n_dim_obs=1, n_dim_state=2, # y is 1-dimensional, (alpha, beta) is 2-dimensional
                  initial_state_mean=prior_mean,
                  initial_state_covariance=prior_covariance,
                  transition_matrices=np.eye(2),
                  observation_matrices=obs_mat,
                  observation_covariance=0,
                  transition_covariance=trans_cov)
   # Use the observations y to get running estimates and errors for the state parameters
   state_means, state_covs = kf.filter(y_data.values)
   _, axarr = plt.subplots(2, sharex=True)
   axarr[0].plot(x.index, state_means[:,0], label='slope')
   axarr[0].legend()
   axarr[1].plot(x.index, state_means[:,1], label='intercept')
   axarr[1].legend()
   plt.tight_layout();
   xx, yy = np.random.multivariate_normal(prior_mean, prior_covariance, 5000).T

   sns.jointplot(xx, yy, kind="kde")
   
   print "final state %f = %f * %f + %f"%(y_data[-1],state_means[-1][0], x_data[-1], state_means[-1][1])

Test from the original notebook. One thing I noticed is how close the intercept and slope values are.

do_plots(spy,amzn, [0,0], np.ones((2,2)))

final state 310.360000 = 1.503603 * 205.520000 + 1.339497

Use a less diagonal prior with l

do_plots(spy,amzn, [0,0], np.eye(2)*1e3)

final state 310.360000 = 1.723906 * 205.520000 + -43.937100

OLS regression on the last 30 values. Slope and intercept is close to test 2.

window_size = 30
import statsmodels.api as sm
X = sm.add_constant(spy.values[-window_size:])

model = sm.OLS(amzn[-window_size:].values,X,formula="A ~ B + C")
results = model.fit()
results.params

array([-52.62103509,   1.78589291])

regression over last 30 values, no intercept. Slope is close to test 1.

window_size = 30
import statsmodels.api as sm

model = sm.OLS(amzn[-window_size:].values,spy.values[-window_size:],formula="A ~ B + C")
results = model.fit()
results.params

array([ 1.5303261])

Intial test with y=x and covariance matrix of \begin{pmatrix} 1 & 1 \ 1 & 1 \ \end{pmatrix}

do_plots(spy,spy, [0,0], np.ones((2,2)))

Test with y=x and covariance matrix of \begin{pmatrix} 1 & 0 \ 0 & 1 \ \end{pmatrix}

do_plots(spy,spy, [0,0], np.eye(2))

Less confidence in prior mean

do_plots(spy,spy, [0,0], np.eye(2) * 1e5)

Test with non zero intercept

do_plots(spy-30,spy, [0,0],np.ones((2,2)))

do_plots(spy-30,spy, [0,0], np.eye(2) * 1e5)

Tests with large slope

do_plots(spy,spy*25, [0,0], np.ones((2,2)))

do_plots(spy,spy*25, [0,0], np.eye(2) * 1e3)

Test with slope and intercept

do_plots(spy,spy*25-200, [0,0], np.eye(2) * 1e3)

do_plots(spy,spy*25-200, [0,0], np.ones((2,2)))

do_plots(spy,spy*25-200, [0,0], np.eye(2) * 1e8)

do_plots(spy,spy*25-200, [0,0], np.zeros((2,2)))

do_plots(spy,spy*25-200, [20,190], np.zeros((2,2)))

do_plots(spy,spy*25-200, [20,-198], np.eye(2)*1000)