See https://www.quantopian.com/posts/hurst-exponent for Hurst #Slow
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import statsmodels.api as sm
import scipy as sp
from scipy.stats import norm, linregress
# http://blaze.readthedocs.io/en/latest/index.html
import blaze as bz
from zipline.utils.tradingcalendar import get_trading_days
from time import time
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline import CustomFactor
from quantopian.pipeline.factors import SimpleMovingAverage
from quantopian.pipeline.filters.morningstar import Q1500US, Q500US
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import AverageDollarVolume
import seaborn
#Hurst exponent helps test whether the time series is:
#(1) A Random Walk (H ~ 0.5)
#(2) Trending (H > 0.5)
#(3) Mean reverting (H < 0.5)
class HurstExp(CustomFactor):
inputs = [USEquityPricing.open]
window_length = int(252*0.5)
def Hurst(self, ts): #Fast
# Create a range of lag values
lags=np.arange(2,20)
# Calculate variance array of the lagged differences
tau = [np.sqrt(np.std(np.subtract(ts[lag:], ts[:-lag]))) for lag in lags]
# Slow: Use a linear fit to estimate
#poly = np.polyfit(np.log(lags), np.log(tau), 1)[0]
#Fast
#From: Derek M. Tishler - dmtishler@gmail.com
# 1st degree polynomial approximation for speed
# source: http://stackoverflow.com/questions/28237428/fully-vectorise-numpy-polyfit
n = len(lags)
x = np.log(lags)
y = np.log(tau)
poly = (n*(x*y).sum() - x.sum()*y.sum()) / (n*(x*x).sum() - x.sum()*x.sum())
# Return the Hurst exponent
hurst_exp = poly*2.0
return hurst_exp
def compute(self, today, assets, out, OPEN):
SERIES = np.log(np.nan_to_num(OPEN)) #Adjustmet of @Villa "use log prices so that when you subtract lag differences you get log returns."
hurst_exp_per_asset = map(self.Hurst, [SERIES[:,col_id].flatten() for col_id in np.arange(SERIES.shape[1])])
#print 'Hurst Exp:\n','len=',len(hurst_exp_per_asset)
#print "HurstData=",hurst_exp_per_asset
out[:] = np.nan_to_num(hurst_exp_per_asset)
def make_pipeline():
#Base Universe is top 15 Volume stocks from Q500US
dollar_volume = AverageDollarVolume(window_length=30).top(15)
universe = (Q500US()
& dollar_volume
)
# Filter out undesierable priced assets for our capital base
latest_price = USEquityPricing.open.latest
price_mask = (latest_price > 10.) & (latest_price < 1000000.) #Reduces top 15 down to 8 or 9
# Measure the degree of mean reversion/randomwalk/trending in each asset
hurst_factor = HurstExp(mask = (universe & price_mask))#.top(100)
pipe = Pipeline(
columns={
'hurst': hurst_factor,
},
screen=universe
)
return pipe
result = run_pipeline(make_pipeline(), start_date='2017-03-01', end_date='2018-01-08')
result
def plot_equities(data, column, equities=None, title=""):
"""Set equities to None for all equities"""
if equities is None:
equities = data.index.get_level_values(1)
df = pd.DataFrame(data.ix[:, column].xs(equities[0], level=1))
df = df.rename(columns={column: equities[0].symbol})
for eq in equities[1:]:
df1 = pd.DataFrame(data.ix[:, column].xs(eq, level=1))
df1 = df1.rename(columns={column: eq.symbol})
df = df.join(df1)
df = df
#df = df / df.ix[0, :] #Normalize all values to start at 1.0
ax = df.plot(title=title, fontsize=12)
ax.set_xlabel("date")
ax.set_ylabel(column)
plt.show()
return df
_ = plot_equities(result, 'hurst', result.index.get_level_values(1)[0:9], 'Hurst Factors')