from __future__ import print_function
import numpy as np
from quantopian.pipeline import Pipeline
from quantopian.pipeline.experimental import QTradableStocksUS
from quantopian.research import returns, run_pipeline
def tradeable_for(day):
pipe = Pipeline(screen=QTradableStocksUS())
return run_pipeline(pipe, day, day).index.get_level_values(1)
universe = tradeable_for('2011-01')
rets = returns(universe, '2009', '2011').dropna(how='any', axis=1)
rets.head()
all_returns = rets.values
spy_returns = returns('SPY', '2009', '2011').values
def original_vectorized_beta(spy, assets):
asset_residuals = assets - assets.mean(axis=0)
spy_residuals = spy - spy.mean()
covariances = (asset_residuals * spy_residuals).sum(axis=0)
spy_variance = (spy_residuals ** 2).sum()
return covariances / spy_variance
original = original_vectorized_beta(spy_returns[:, None], all_returns)
original
from scipy.stats import linregress
aapl_returns = returns('AAPL', '2009', '2011').values
scipy_aapl_beta = linregress(spy_returns, aapl_returns).slope
vectorized_aapl_beta = original[rets.columns.get_loc(symbols('AAPL'))]
print("Scipy: ", scipy_aapl_beta)
print("Vectorized:", vectorized_aapl_beta)
def fastest_vectorized_beta_I_can_muster_v2(spy, assets):
buf = np.empty(assets.shape[1])
# We only need to de-mean one of these arrays, and SPY is a lot less work...
spy -= spy.mean()
spy.dot(assets, out=buf)
np.divide(buf, spy.dot(spy), out=buf)
return buf
new = fastest_vectorized_beta_I_can_muster_v2(spy_returns, all_returns)
new
np.abs((new - original)).max()
import time
def timeit(f):
"""Take a function f and return a new function that calls 5000 times,
returning the last result and the average runtime, ignoring the ten
slowest/fastest calls. This gives a more reliable estimate for fast functions.
"""
_time=time.time
def timed_f(*args, **kwargs):
times = []
for _ in range(5000):
start = _time()
result = f(*args, **kwargs)
end = _time()
times.append(end - start)
# Take the average of the middle three to smooth out variance from cache effects.
average_time = sum(sorted(times)[10:-10]) / (len(times) - 20)
return result, average_time
return timed_f
_, duration_old = timeit(original_vectorized_beta)(spy_returns[:, None], all_returns)
print("It took {} seconds to calculate betas the old way.".format(duration_old))
_, duration_new = timeit(fastest_vectorized_beta_I_can_muster_v2)(spy_returns, all_returns)
print("It took {} seconds to calculate betas the new way.".format(duration_new))
print("The new version is {}x faster than the original notebook version.".format(duration_old / duration_new))