Notebook

Hurst Exponent Approximation Factor

See https://www.quantopian.com/posts/hurst-exponent for Hurst #Slow

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import statsmodels.api as sm
import scipy as sp
from scipy.stats import norm, linregress
# http://blaze.readthedocs.io/en/latest/index.html
import blaze as bz

from zipline.utils.tradingcalendar import get_trading_days
from time import time
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline import CustomFactor
from quantopian.pipeline.factors import SimpleMovingAverage
from quantopian.pipeline.filters.morningstar import Q1500US, Q500US

from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import AverageDollarVolume
In [2]:
import seaborn
In [3]:
#Hurst exponent helps test whether the time series is:
#(1) A Random Walk (H ~ 0.5)
#(2) Trending (H > 0.5)
#(3) Mean reverting (H < 0.5)

class HurstExp(CustomFactor):  
    inputs = [USEquityPricing.open]  
    window_length = int(252*0.5)
    
    def Hurst(self, ts):   #Fast
        # Create a range of lag values  
        lags=np.arange(2,20)  
        # Calculate variance array of the lagged differences  
        tau = [np.sqrt(np.std(np.subtract(ts[lag:], ts[:-lag]))) for lag in lags]

        # Slow: Use a linear fit to estimate  
        #poly = np.polyfit(np.log(lags), np.log(tau), 1)[0]
        
        #Fast
        #From: Derek M. Tishler - dmtishler@gmail.com
        # 1st degree polynomial approximation for speed  
        # source: http://stackoverflow.com/questions/28237428/fully-vectorise-numpy-polyfit  
        n = len(lags)  
        x = np.log(lags)  
        y = np.log(tau)  
        poly = (n*(x*y).sum() - x.sum()*y.sum()) / (n*(x*x).sum() - x.sum()*x.sum())

        # Return the Hurst exponent
        hurst_exp = poly*2.0
        return hurst_exp

    def compute(self, today, assets, out,  OPEN):
        SERIES = np.log(np.nan_to_num(OPEN)) #Adjustmet of @Villa "use log prices so that when you subtract lag differences you get log returns." 
        hurst_exp_per_asset = map(self.Hurst, [SERIES[:,col_id].flatten() for col_id in np.arange(SERIES.shape[1])])  
        #print 'Hurst Exp:\n','len=',len(hurst_exp_per_asset)
        #print "HurstData=",hurst_exp_per_asset  
        out[:] = np.nan_to_num(hurst_exp_per_asset)
        
In [4]:
def make_pipeline():
    #Base Universe is top 15 Volume stocks from Q500US    
    dollar_volume = AverageDollarVolume(window_length=30).top(15)
    universe = (Q500US() 
                & dollar_volume
                )
    
    # Filter out undesierable priced assets for our capital base
    latest_price  = USEquityPricing.open.latest
    price_mask    = (latest_price > 10.) & (latest_price < 1000000.) #Reduces top 15 down to 8 or 9
    
    # Measure the degree of mean reversion/randomwalk/trending in each asset
    hurst_factor     = HurstExp(mask = (universe & price_mask))#.top(100)

    pipe = Pipeline(
        columns={
            'hurst': hurst_factor,
        },
        screen=universe
    )
    
    return pipe
In [5]:
result = run_pipeline(make_pipeline(), start_date='2017-03-01', end_date='2018-01-08')
In [6]:
result
Out[6]:
hurst
2017-03-01 00:00:00+00:00 Equity(24 [AAPL]) 0.507521
Equity(700 [BAC]) 0.584885
Equity(1335 [C]) 0.579652
Equity(5061 [MSFT]) 0.182185
Equity(16841 [AMZN]) 0.468925
Equity(19725 [NVDA]) 0.382621
Equity(25006 [JPM]) 0.532008
Equity(39840 [TSLA]) 0.456210
Equity(42950 [FB]) 0.490782
2017-03-02 00:00:00+00:00 Equity(24 [AAPL]) 0.508193
Equity(700 [BAC]) 0.579273
Equity(1335 [C]) 0.578211
Equity(5061 [MSFT]) 0.177688
Equity(16841 [AMZN]) 0.469413
Equity(19725 [NVDA]) 0.390256
Equity(25006 [JPM]) 0.529061
Equity(39840 [TSLA]) 0.456357
Equity(42950 [FB]) 0.491306
2017-03-03 00:00:00+00:00 Equity(24 [AAPL]) 0.507268
Equity(700 [BAC]) 0.570570
Equity(1335 [C]) 0.574210
Equity(5061 [MSFT]) 0.174331
Equity(16841 [AMZN]) 0.470147
Equity(19725 [NVDA]) 0.400299
Equity(25006 [JPM]) 0.520129
Equity(39840 [TSLA]) 0.460270
Equity(42950 [FB]) 0.491839
2017-03-06 00:00:00+00:00 Equity(24 [AAPL]) 0.507182
Equity(700 [BAC]) 0.565966
Equity(1335 [C]) 0.571460
... ... ...
2018-01-03 00:00:00+00:00 Equity(5061 [MSFT]) 0.402769
Equity(5121 [MU]) 0.469260
Equity(16841 [AMZN]) 0.440811
Equity(19725 [NVDA]) 0.399652
Equity(39840 [TSLA]) 0.349845
Equity(42950 [FB]) 0.335136
2018-01-04 00:00:00+00:00 Equity(24 [AAPL]) 0.471569
Equity(700 [BAC]) 0.457474
Equity(5061 [MSFT]) 0.399082
Equity(5121 [MU]) 0.466353
Equity(16841 [AMZN]) 0.439662
Equity(19725 [NVDA]) 0.397149
Equity(39840 [TSLA]) 0.365745
Equity(42950 [FB]) 0.307590
2018-01-05 00:00:00+00:00 Equity(24 [AAPL]) 0.469122
Equity(700 [BAC]) 0.454850
Equity(5061 [MSFT]) 0.398775
Equity(5121 [MU]) 0.455801
Equity(16841 [AMZN]) 0.438394
Equity(19725 [NVDA]) 0.391525
Equity(39840 [TSLA]) 0.366392
Equity(42950 [FB]) 0.272892
2018-01-08 00:00:00+00:00 Equity(24 [AAPL]) 0.466696
Equity(700 [BAC]) 0.452925
Equity(5061 [MSFT]) 0.399888
Equity(5121 [MU]) 0.448963
Equity(16841 [AMZN]) 0.437138
Equity(19725 [NVDA]) 0.393242
Equity(39840 [TSLA]) 0.366753
Equity(42950 [FB]) 0.239896

1844 rows × 1 columns

In [7]:
def plot_equities(data, column, equities=None, title=""):
    """Set equities to None for all equities"""
    if equities is None:
        equities = data.index.get_level_values(1)
    df = pd.DataFrame(data.ix[:, column].xs(equities[0], level=1))
    df = df.rename(columns={column: equities[0].symbol})
    for eq in equities[1:]:
        df1 = pd.DataFrame(data.ix[:, column].xs(eq, level=1))
        df1 = df1.rename(columns={column: eq.symbol})
        df = df.join(df1)
    df = df
    #df = df / df.ix[0, :] #Normalize all values to start at 1.0
    ax = df.plot(title=title, fontsize=12)
    ax.set_xlabel("date")
    ax.set_ylabel(column)
    plt.show()
    return df
In [8]:
_ = plot_equities(result, 'hurst', result.index.get_level_values(1)[0:9], 'Hurst Factors')