here are 3 methods I use to calculate beta. if you want to do it in before_market then you can only do it from bar2 when you have store a copy of data in context
import numpy as np
from pytz import timezone
import pandas as pd
from scipy import stats
import operator
from functools import partial
from scipy import polyfit, polyval
def estimateBeta(priceY,priceX,algo = 'standard'):
X = pd.DataFrame({'x':priceX,'y':priceY})
if algo=='returns':
ret = (X/X.shift(1)-1).dropna().values
x = ret[:,0]
y = ret[:,1]
# filter high values
low = np.percentile(x,20)
high = np.percentile(x,80)
iValid = (x>low) & (x<high)
x = x[iValid]
y = y[iValid]
iteration = 1
nrOutliers = 1
while iteration < 10 and nrOutliers > 0 :
(a,b) = polyfit(x,y,1)
yf = polyval([a,b],x)
#plot(x,y,'x',x,yf,'r-')
err = yf-y
idxOutlier = abs(err) > 3*np.std(err)
nrOutliers =sum(idxOutlier)
beta = a
#print 'Iteration: %i beta: %.2f outliers: %i' % (iteration,beta, nrOutliers)
x = x[~idxOutlier]
y = y[~idxOutlier]
iteration += 1
elif algo=='quantopian' or algo=='q':
ret = (X/X.shift(1)-1).dropna().values
x = ret[:,0]
y = ret[:,1]
returns_matrix = np.vstack([y,x])
C = np.cov(returns_matrix, ddof=1)
algorithm_covariance = C[0][1]
benchmark_variance = C[1][1]
beta = algorithm_covariance / benchmark_variance
return beta
elif algo=='standard':
ret =np.log(X).diff().dropna()
beta = ret['x'].cov(ret['y'])/ret['x'].var()
else:
raise TypeError("unknown Beta algorithm type, use 'standard', 'q' or 'returns'")
return beta