Notebook
In [1]:
from quantopian.pipeline.factors import Returns
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.research import run_pipeline, symbols
from quantopian.pipeline.data.builtin import EquityPricing
from alphalens.tears import create_full_tear_sheet, create_returns_tear_sheet
from alphalens.performance import mean_information_coefficient
from quantopian.pipeline.domain import (
    AT_EQUITIES, # Austria
    AU_EQUITIES, # Australia
    BE_EQUITIES, # Belgium
    BR_EQUITIES, # Brazil
    CA_EQUITIES, # Canada
    CH_EQUITIES, # Switzerland
    CN_EQUITIES, # China
    DE_EQUITIES, # Germany
    DK_EQUITIES, # Denmark
    ES_EQUITIES, # Spain
    FI_EQUITIES, # Finland
    FR_EQUITIES, # France
    GB_EQUITIES, # Great Britain
    HK_EQUITIES, # Hong Kong
    IE_EQUITIES, # Ireland
    IN_EQUITIES, # India
    IT_EQUITIES, # Italy
    JP_EQUITIES, # Japan
    KR_EQUITIES, # South Korea
    NL_EQUITIES, # Netherlands
    NO_EQUITIES, # Norway
    NZ_EQUITIES, # New Zealand
    PT_EQUITIES, # Portugal
    SE_EQUITIES, # Sweden
    SG_EQUITIES, # Singapore
    US_EQUITIES, # United States
)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
import time
In [2]:
###########################params##################################
start_date = '2005-01-01'
end_date = '2019-01-01'
domain = DE_EQUITIES

#-------------------------Volume Restriction for stocks------------
top_vol_perc = 30
vol_window = 20 #4 weeks
#in this case we look for stocks whose minimum daily volume of the recent 20 businessdays was in the top 30% of all stocks

#-------------------------MA Crossover params----------------------
window_length_MA1 = 50
window_length_MA2 = 200 #set this one as the bigger one

#for the calculation of a weighted average. the normalization happens in the code later. constant 1 would be SMA
weightfunct_MA1 = lambda t : 1 / math.sqrt(-t + window_length_MA1 +1)
weightfunct_MA2 = lambda t : 1

#-------------------------forward returns we want to know----------
forwardDays = [1,2,3,4,5]

weightfunction of MA1 looks the following. The idea ist that it will become more sensitive to the most recent data since it should be more representative of the current mean than the one long ago.

In [3]:
pd.DataFrame(np.fromfunction(np.vectorize(lambda t : weightfunct_MA1(t)), [window_length_MA1])).plot()
Out[3]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc9475019b0>
In [4]:
#lets directly convert the dates to match the domain
start_date = domain.calendar.minute_to_session_label(pd.Timestamp(start_date,tz='UTC'))
end_date = domain.calendar.minute_to_session_label(pd.Timestamp(end_date,tz='UTC'))

Now we want to screen for the best volume stocks to avoid most liquidity problems:

In [5]:
#Factor that gets us the least volume in the last vol_window days
class MinVolume(CustomFactor):
    window_length = vol_window
    inputs = [EquityPricing.volume]
    
    def compute(self, today ,assets, out, vol):
        out[:] = np.min(vol, axis=0)
        
#our volume filter now becomes. To make sure we never get any stocks that dont trade on a given day we add the mask
min_vol = MinVolume()
top_vol = min_vol.percentile_between(100 - top_vol_perc, 100, mask = (min_vol >0))

Here is our custom weighted mean factor. Since we cant modify the init() here is a small workaround for it:

In [6]:
"""
weightfunc: a weightfunction of the form lambda i : <comutation> to weight the elements inside the window
volume_weighted: boolean weither to also weight by volume
"""
def makeWeightedMean(weightFunct, volume_weighted, window_len):
    if volume_weighted:
        class WeightedMean(CustomFactor):
            inputs = [EquityPricing.close, EquityPricing.volume]
            window_length = window_len
            mask = top_vol
            #tranfsorm the function into a 2d array and normalise the weights
            weights = np.fromfunction(np.vectorize(lambda i,j : weightFunct(i)), (window_length,1))
                              
            def compute(self, today, asset_ids, out, close, volume):
                nanMask = ~np.isnan(close) & ~np.isnan(volume)
                weights = np.multiply(self.weights,volume) #weights based on our function
                weights = np.multiply(weights, nanMask) #get rid of the weights for Nans to get proper normalization
                weights = weights/weights.sum(axis = 0) #normalize our weights
                weighted_close = np.multiply(weights, close)
                out[:] = np.nansum(weighted_close,axis=0) #get the weighted mean for each stock
                
        return WeightedMean()
    
    else:
        class WeightedMean(CustomFactor):
            inputs = [EquityPricing.close]
            window_length = window_len 
            mask = top_vol
            #tranfsorm the function into a 2d array and normalise the weights
            weights = np.fromfunction(np.vectorize(lambda i,j : weightFunct(j)), (1, window_length))
        
            def compute(self, today, asset_ids, out, close):
                nanMask = ~np.isnan(close)
                weights = np.multiply(self.weights.transpose(), nanMask)
                weights = weights/weights.sum(axis = 0) #normalize the weights
                out[:] = np.nansum(np.multiply(weights, close),axis= 0) #get the weighted mean for each stock
            
        return WeightedMean()
In [7]:
def makePipeline():
    ma1 = makeWeightedMean(weightfunct_MA1,True,window_length_MA1)
    ma2 = makeWeightedMean(weightfunct_MA2,False, window_length_MA2)
    return Pipeline(screen = top_vol, columns = {'diff' : ma1-ma2}, domain = domain)
In [8]:
result = run_pipeline(makePipeline(), start_date=start_date - domain.calendar.day, end_date=end_date)

Pipeline Execution Time: 13.48 Seconds

Since the moving averages arent window safe we calculate the crossover outside of the pipeline:

In [83]:
result_unst = result.unstack() #gets rid of the level 2 index

cross_down = (result_unst.values[1:] >= 0) & (result_unst.values[:-1] < 0) #previous day MA1 was bigger, today its smaller
cross_up = (result_unst.values[1:] < 0) & (result_unst.values[:-1] >= 0) #the other way around

cross_down = pd.DataFrame(cross_down, index = result_unst.index[1:], columns=result_unst.columns) #add back the index and make a DF of it
cross_up = pd.DataFrame(cross_up, index = result_unst.index[1:], columns=result_unst.columns)

#and now we stack the index back again
cross_down = cross_down.stack(dropna = False)
cross_up = cross_up.stack(dropna = False)

cross_up.columns = ["factor"]
cross_down.columns = ["factor"]

cross_combined = cross_up.astype(float) - (cross_down.astype(float))

I dont know weither there is some built in function to merge the data with the forward returns for boolean Factors. Thats why we do the merging by hand. First we generate the forward returns:

In [84]:
#set up our Factorlist and column names
columns = []
returns_factors = {}
for days in forwardDays:
    columns.append(str(days) + "D")
    returns_factors[str(days) + 'D'] = Returns(window_length = days + 1, mask= top_vol)
    
returns_pipeline = Pipeline(returns_factors, domain = domain, screen=top_vol)

returns_start = start_date 
returns_end = end_date + domain.calendar.day * max(forwardDays) # get the end and add some businessdays

returns = run_pipeline(returns_pipeline, returns_start,returns_end)
returns = returns.reindex(columns= columns) #dict and Pipeline messes up the order even if we would use an ordered dict, so here is a small fix

Pipeline Execution Time: 8.63 Seconds

Now we want to shift the returns, so that on the date x we get the futur returns. Again to get rid of the inconvenience coming with multiindex we use unstack().

In [85]:
dateIndex = cross_up.index.levels[0]
returns_shifted = pd.DataFrame(index = dateIndex,
                              columns = pd.MultiIndex.from_product([columns, returns.index.levels[1]]))
returns_unst = returns.unstack()

for days in forwardDays:
    cutoff = days - max(forwardDays)
    if cutoff == 0:#this special case would break the notation [days, 0]
        cutoff = None
        
    current = returns_unst[str(days)  + 'D'].iloc[days : cutoff] #cut off the data
    current.index = dateIndex
    returns_shifted[str(days) + "D"] = current

returns_shifted = returns_shifted.stack(dropna = False)
returns_shifted = returns_shifted.reindex(columns= columns) #fix the messed up order of the columns
In [86]:
cross_combined_quantiles =  (cross_combined + 1).astype(int)
cross_combined_quantiles.columns = ["factor_quantile"]
merged = pd.concat([cross_combined, cross_combined_quantiles, returns_shifted], axis = 1).dropna()
merged.index.set_names(['date', 'asset'], inplace=True)
merged.index.levels[0].freq = pd.tseries.offsets.BDay()
In [87]:
create_full_tear_sheet(merged)
Quantiles Statistics
min max mean std count count %
factor_quantile
0.0 -1.0 -1.0 -1.0 0.0 2709 0.305261
1.0 0.0 0.0 0.0 0.0 881827 99.367954
2.0 1.0 1.0 1.0 0.0 2900 0.326784
Returns Analysis
1D 2D 3D 4D 5D
Ann. alpha NaN NaN NaN NaN NaN
beta NaN NaN NaN NaN NaN
Mean Period Wise Return Top Quantile (bps) 12.825 9.297 5.775 1.346 -0.316
Mean Period Wise Return Bottom Quantile (bps) -2.435 -6.628 -5.345 -4.172 -3.614
Mean Period Wise Spread (bps) 17.788 16.632 9.534 1.812 -0.278
<matplotlib.figure.Figure at 0x7fc8cac8dc88>
Information Analysis
1D 2D 3D 4D 5D
IC Mean 0.003 0.003 0.002 0.002 0.001
IC Std. 0.066 0.067 0.067 0.066 0.065
Risk-Adjusted IC 0.038 0.041 0.032 0.029 0.014
t-stat(IC) NaN NaN NaN NaN NaN
p-value(IC) NaN NaN NaN NaN NaN
IC Skew NaN NaN NaN NaN NaN
IC Kurtosis NaN NaN NaN NaN NaN
Turnover Analysis
1D 2D 3D 4D 5D
Quantile 0.0 Mean Turnover 0.982 0.976 0.982 0.986 0.984
Quantile 1.0 Mean Turnover 0.015 0.024 0.033 0.041 0.048
Quantile 2.0 Mean Turnover 0.983 0.980 0.981 0.988 0.988
1D 2D 3D 4D 5D
Mean Factor Rank Autocorrelation -0.074 -0.023 -0.017 -0.009 -0.011

To get rid of the unequally sized quantiles one could drop the middle one. Strangely this also alters the results of the remaining quantiles in alphalens:

In [88]:
cross_combined_quantiles =  cross_combined[cross_combined['factor'] != 0]
cross_combined_quantiles += 1
cross_combined_quantiles /=2
cross_combined_quantiles = cross_combined_quantiles.astype(int)
cross_combined = cross_combined.reindex(cross_combined_quantiles.index)
returns_shifted = returns_shifted.reindex(cross_combined_quantiles.index)
cross_combined_quantiles.columns = ["factor_quantile"]
merged = pd.concat([cross_combined, cross_combined_quantiles, returns_shifted], axis = 1).dropna()
merged.index.set_names(['date', 'asset'], inplace=True)
merged.index.levels[0].freq = pd.tseries.offsets.BDay()
In [89]:
create_full_tear_sheet(merged)
Quantiles Statistics
min max mean std count count %
factor_quantile
0 -1.0 -1.0 -1.0 0.0 2709 48.297379
1 1.0 1.0 1.0 0.0 2900 51.702621
Returns Analysis
1D 2D 3D 4D 5D
Ann. alpha NaN NaN NaN NaN NaN
beta NaN NaN NaN NaN NaN
Mean Period Wise Return Top Quantile (bps) 2.486 2.374 1.408 -0.051 -0.380
Mean Period Wise Return Bottom Quantile (bps) -5.108 -4.926 -2.965 -0.843 -0.277
Mean Period Wise Spread (bps) 17.788 17.416 10.467 2.095 -0.129
<matplotlib.figure.Figure at 0x7fc89b598ac8>
Information Analysis
1D 2D 3D 4D 5D
IC Mean 0.070 0.032 0.013 -0.006 -0.008
IC Std. 0.809 0.814 0.817 0.828 0.825
Risk-Adjusted IC 0.087 0.040 0.016 -0.007 -0.009
t-stat(IC) NaN NaN NaN NaN NaN
p-value(IC) NaN NaN NaN NaN NaN
IC Skew NaN NaN NaN NaN NaN
IC Kurtosis NaN NaN NaN NaN NaN
Turnover Analysis
1D 2D 3D 4D 5D
Quantile 0 Mean Turnover 0.982 0.976 0.982 0.986 0.984
Quantile 1 Mean Turnover 0.983 0.980 0.981 0.988 0.988
1D 2D 3D 4D 5D
Mean Factor Rank Autocorrelation -1.0 1.0 -1.0 NaN -1.0
In [ ]: