from __future__ import print_function
import numpy as np

from quantopian.pipeline import Pipeline
from quantopian.pipeline.experimental import QTradableStocksUS
from quantopian.research import returns, run_pipeline

def tradeable_for(day):
    pipe = Pipeline(screen=QTradableStocksUS())
    return run_pipeline(pipe, day, day).index.get_level_values(1)

universe = tradeable_for('2011-01')

rets = returns(universe, '2009', '2011').dropna(how='any', axis=1)
rets.head()

all_returns = rets.values
spy_returns = returns('SPY', '2009', '2011').values

def original_vectorized_beta(spy, assets):
    asset_residuals = assets - assets.mean(axis=0)  
    spy_residuals = spy - spy.mean()  
    covariances = (asset_residuals * spy_residuals).sum(axis=0)  
    spy_variance = (spy_residuals ** 2).sum()  
    return covariances / spy_variance

original = original_vectorized_beta(spy_returns[:, None], all_returns)
original

array([ 1.83116649,  0.95349333,  1.40241192, ...,  0.93017304,
        0.4938906 ,  1.48053737])

Sanity Check that Values Still Match¶

from scipy.stats import linregress

aapl_returns = returns('AAPL', '2009', '2011').values
scipy_aapl_beta = linregress(spy_returns, aapl_returns).slope
vectorized_aapl_beta = original[rets.columns.get_loc(symbols('AAPL'))]

print("Scipy:     ", scipy_aapl_beta)
print("Vectorized:", vectorized_aapl_beta)

Scipy:      0.953493328272
Vectorized: 0.953493328272

def fastest_vectorized_beta_I_can_muster_v2(spy, assets):
    buf = np.empty(assets.shape[1])
    # We only need to de-mean one of these arrays, and SPY is a lot less work...
    spy -= spy.mean()
    spy.dot(assets, out=buf)
    np.divide(buf, spy.dot(spy), out=buf)  
    return buf

new = fastest_vectorized_beta_I_can_muster_v2(spy_returns, all_returns)
new

array([ 1.83116649,  0.95349333,  1.40241192, ...,  0.93017304,
        0.4938906 ,  1.48053737])

New Vectorized Beta Matches Old to Within ~¶

np.abs((new - original)).max()

2.6645352591003757e-15

import time
def timeit(f):
    """Take a function f and return a new function that calls 5000 times,
    returning the last result and the average runtime, ignoring the ten 
    slowest/fastest calls. This gives a more reliable estimate for fast functions.
    """
    _time=time.time
    def timed_f(*args, **kwargs):
        times = []
        for _ in range(5000):
            start = _time()
            result = f(*args, **kwargs)
            end = _time()
            times.append(end - start)
        # Take the average of the middle three to smooth out variance from cache effects.
        average_time = sum(sorted(times)[10:-10]) / (len(times) - 20)
        return result, average_time
    return timed_f

_, duration_old = timeit(original_vectorized_beta)(spy_returns[:, None], all_returns)
print("It took {} seconds to calculate betas the old way.".format(duration_old))

It took 0.00288552261261 seconds to calculate betas the old way.

_, duration_new = timeit(fastest_vectorized_beta_I_can_muster_v2)(spy_returns, all_returns)
print("It took {} seconds to calculate betas the new way.".format(duration_new))
print("The new version is {}x faster than the original notebook version.".format(duration_old / duration_new))

It took 0.000153615867278 seconds to calculate betas the new way.
The new version is 18.7840140719x faster than the original notebook version.

	Equity(2 [ARNC])	Equity(24 [AAPL])	Equity(41 [ARCB])	Equity(52 [ABM])	Equity(62 [ABT])	Equity(64 [ABX])	Equity(67 [ADSK])	Equity(76 [TAP])	Equity(88 [ACI])	Equity(107 [ACV])	...	Equity(36371 [OCSL])	Equity(36372 [SNI])	Equity(36448 [LPS])	Equity(36628 [GTAT])	Equity(36714 [RAX])	Equity(36733 [HSNI])	Equity(36763 [WPRT])	Equity(36930 [DISC_A])	Equity(37686 [LOPE])	Equity(37775 [CLW])
2009-01-02 00:00:00+00:00	0.075573	0.062573	0.017290	-0.013132	0.001320	-0.021220	0.049771	0.010627	0.139399	0.032727	...	-0.001280	0.072630	-0.011282	0.253472	0.104869	0.035813	0.058824	0.039007	0.010684	0.136527
2009-01-05 00:00:00+00:00	-0.018124	0.043229	-0.029752	0.003200	-0.015700	-0.045570	0.010643	-0.019419	0.039508	-0.032459	...	0.002564	0.016928	0.017967	0.102493	0.020339	-0.057181	0.111111	0.010239	-0.020613	0.160169
2009-01-06 00:00:00+00:00	0.020208	-0.016913	0.001665	0.009067	-0.033626	-0.020963	-0.001915	-0.056773	0.051035	0.000419	...	-0.023973	0.012072	-0.046197	0.062814	0.069767	0.019746	0.055000	0.012838	-0.028602	0.112625
2009-01-07 00:00:00+00:00	-0.099897	-0.021505	-0.039722	-0.020633	-0.004914	-0.071970	-0.038849	-0.013798	-0.111928	-0.009838	...	0.000000	-0.023018	-0.053398	-0.052009	-0.049689	-0.093361	-0.069510	-0.029353	-0.053333	-0.151020
2009-01-08 00:00:00+00:00	0.042203	0.019780	-0.024884	0.031319	0.010868	0.066342	-0.041916	-0.021963	0.059774	0.005793	...	-0.001310	-0.013879	0.025178	0.022444	-0.042484	-0.089245	0.022071	0.038488	-0.002347	-0.026923

Sanity Check that Values Still Match¶

New Vectorized Beta Matches Old to Within ~10−15¶

New Vectorized Beta Matches Old to Within ~¶