I found some beta calculation code what do you think is the method best used to develop low beta algos?
import scipy.stats as stats
from scipy import polyfit, polyval
import datetime
import pytz
import pandas as pd
import numpy as np
import re
from pandas import DataFrame,Series
from zipline.utils.tradingcalendar import get_early_closes
from zipline.utils import tradingcalendar
from datetime import timedelta
import operator
from functools import partial
def estimateBeta(priceY,priceX,algo = 'standard'):
#estimate stock Y vs stock X beta using iterative linear
# regression. Outliers outside 3 sigma boundary are filtered out
# Parameters
# --------
#priceX : price series of x (usually market)
# priceY : price series of y (estimate beta of this price)
# Returns
# --------
#beta : stockY beta relative to stock X
X = pd.DataFrame({'x':priceX,'y':priceY})
if algo=='returns':
ret = (X/X.shift(1)-1).dropna().values
x = ret[:,0]
y = ret[:,1]
# filter high values
low = np.percentile(x,20)
high = np.percentile(x,80)
iValid = (x>low) & (x<high)
x = x[iValid]
y = y[iValid]
iteration = 1
nrOutliers = 1
while iteration < 10 and nrOutliers > 0 :
(a,b) = polyfit(x,y,1)
yf = polyval([a,b],x)
#plot(x,y,'x',x,yf,'r-')
err = yf-y
idxOutlier = abs(err) > 3*np.std(err)
nrOutliers =sum(idxOutlier)
beta = a
#print 'Iteration: %i beta: %.2f outliers: %i' % (iteration,beta, nrOutliers)
x = x[~idxOutlier]
y = y[~idxOutlier]
iteration += 1
elif algo=='log':
x = np.log(X['x'])
y = np.log(X['y'])
(a,b) = polyfit(x,y,1)
beta = a
elif algo=='standard':
ret =np.log(X).diff().dropna()
beta = ret['x'].cov(ret['y'])/ret['x'].var()
else:
raise TypeError("unknown Beta algorithm type, use 'standard', 'log' or 'returns'")
return beta