Notebook

Alphalens + Quantopian | How To

In [8]:
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline import factors, filters, classifiers
from quantopian.pipeline.factors import CustomFactor, Returns, AverageDollarVolume, SimpleMovingAverage,VWAP
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.filters.morningstar import IsPrimaryShare
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.data.builtin import USEquityPricing

import math
import datetime
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm
import seaborn as sns
import scipy.stats as stats
In [9]:
MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}

Helper functions

In [10]:
def high_volume_universe(top_liquid, min_price = None, min_volume = None):  
    """
    Computes a security universe of liquid stocks and filtering out
    hard to trade ones
    Returns
    -------
    high_volume_tradable - zipline.pipeline.filter
    """
    
    full_filter = filters.make_us_equity_universe(
        target_size=top_liquid,
        rankby=factors.AverageDollarVolume(window_length=200),
        mask=filters.default_us_equity_universe_mask(),
        groupby=classifiers.morningstar.Sector(),
        max_group_weight=0.3,
        smoothing_func=lambda f: f.downsample('month_start'),
    )
    
    if min_price > 0:
        price = SimpleMovingAverage(inputs=[USEquityPricing.close],
                                    window_length=21, mask=full_filter)
        full_filter &= (price >= min_price)
        
    if min_volume > 0:
        volume = SimpleMovingAverage(inputs=[USEquityPricing.volume],
                                     window_length=21, mask=full_filter)
        full_filter &= (volume >= min_volume)
        
    return full_filter

def run_pipeline_chunks(pipe, start_date, end_date, chunks_len = None):
    """
    Drop-in replacement for run_pipeline.
    run_pipeline fails over a very long period of time (memery usage),
    so we need to split in chunks the pipeline and concatenate the results
    """
    chunks  = []
    current = pd.Timestamp(start_date)
    end     = pd.Timestamp(end_date)
    step    = pd.Timedelta(weeks=26) if chunks_len is None else chunks_len
    
    while current <= end:
        
        current_end = current + step
        if current_end > end:
            current_end = end
        
        print 'Running pipeline:', current, ' - ', current_end
        results = run_pipeline(pipe, current.strftime("%Y-%m-%d"), current_end.strftime("%Y-%m-%d"))
        chunks.append(results)
        
        # pipeline returns more days than requested (if no trading day), so get last date from the results
        current_end = results.index.get_level_values(0)[-1].tz_localize(None)
        current = current_end + pd.Timedelta(days=1)

    return pd.concat(chunks)
       
def construct_factor_history(factor_cls, start_date='2015-10-1', end_date='2016-2-1', 
                             factor_name='factor', top_liquid=500,
                             sector_column=None, filter_universe=True):
    """
    Creates a DataFrame containing daily factor values and sector codes for a liquidity 
    constrained universe. The returned DataFrame is can be used in the factor tear sheet.
    """
    if filter_universe: # this is very slow!
        ok_universe = high_volume_universe(top_liquid)
    else:
        ok_universe = AverageDollarVolume(window_length=20).top(top_liquid)
       
    factor = factor_cls(mask=ok_universe)
    sector = Sector(mask=ok_universe)    
       
    pipe = Pipeline()
    pipe.add(factor, factor_name)
    if sector_column is not None: # this is very slow too
        pipe.add(sector, sector_column)  
    pipe.set_screen(ok_universe)

    daily_factor = run_pipeline_chunks(pipe, start_date=start_date, end_date=end_date)
    #daily_factor = run_pipeline(pipe, start_date=start_date, end_date=end_date)
       
    return daily_factor.dropna()

def get_daily_price(sid_universe, start_date, end_date, extra_days_before=0, extra_days_after=0):
    """
    Creates a DataFrame containing daily percentage returns and price
    """   
    extra_days = math.ceil(extra_days_before * 365.0/252.0) + 3 # just to be sure
    start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d") - datetime.timedelta(days=extra_days)
    start_date = start_date.strftime("%Y-%m-%d")
    
    extra_days = math.ceil(extra_days_after * 365.0/252.0) + 3 # just to be sure
    end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d") + datetime.timedelta(days=extra_days)
    end_date = end_date.strftime("%Y-%m-%d")
    
    pricing = get_pricing(sid_universe, start_date=start_date, end_date=end_date, fields='open_price')
    
    return pricing

run_tear_sheet glues all the functions together

In [11]:
import alphalens
import alphalens.performance as perf 
import alphalens.utils as utils

def run_tear_sheet( factor,
                    factor_name     = 'factor',
                    sector_names    = None,
                    start_date      = '2015-06-30',
                    end_date        = '2016-05-30',
                    top_liquid      = 500,
                    filter_universe = False,
                    avgretplot      = (10, 50),
                    periods         = (1, 5, 10),
                    show_sector_plots = True,
                    quantiles       = 5,
                    filter_zscore   = 10):
          
    sector_column = 'sector_code' if show_sector_plots else None
    
    ## Run the Pipeline
    print 'construct factor history'
    factor = construct_factor_history(factor, start_date=start_date, end_date=end_date, 
                                      factor_name=factor_name, top_liquid=top_liquid,
                                      sector_column=sector_column, filter_universe=filter_universe)
    
    ## Get pricing
    sid_universe = list( factor.index.levels[1].unique() )
    print 'Get pricing, universe %d entries' % len(sid_universe)
    pricing = get_daily_price(sid_universe, start_date=start_date, end_date=end_date, 
                              extra_days_before=days_before, extra_days_after=days_after)
        
    ## Use Alphalens to create a factor tear sheet
    print 'alphalens'
    sectors_series = factor[sector_column] if show_sector_plots else None
    alphalens.tears.create_factor_tear_sheet(factor=factor[factor_name],
                                             prices=pricing,
                                             groupby=sectors_series,
                                             show_groupby_plots=show_sector_plots,
                                             periods=periods,
                                             quantiles=quantiles,
                                             filter_zscore=filter_zscore,
                                             groupby_labels=sector_names,
                                             long_short=True,
                                             avgretplot=avgretplot,
                                             turnover_for_all_periods=True)

Define our factor

In [12]:
class ROE(CustomFactor):   
    inputs = [morningstar.operation_ratios.roe] 
    window_length = 1
    def compute(self, today, assets, out, close):        
        out[:] = close[-1]        

class Alpha41(CustomFactor):   
    inputs = [USEquityPricing.low, USEquityPricing.high] 
    window_length = 1
    def compute(self, today, assets, out, low, high):        
        out[:] = high[0]*low[0]

Define settings

In [13]:
def factor(mask):
    alpha41 = Alpha41(mask=mask)
    vwap = VWAP(window_length=1, mask=mask)    
    alpha41 = alpha41**.5 - vwap
    return alpha41

factor_name='Alpha41'

start_date  = '2005-04-30'
end_date    = '2016-05-30'
top_liquid  = 500
filter_universe = False  # very slow, filter out untradable stocks

# Cumulative average returns plot by quantile
days_before = 10
days_after  = 50
avgretplot  = (days_before, days_after) # None to avoid plotting

# alphalens specific
periods = (1, 5, 10)
show_sector_plots = False # very slow to load the sector column in pipeline
quantiles = 5
filter_zscore = 10

Run the tear sheet

In [14]:
run_tear_sheet( factor = factor,
                factor_name  =factor_name,
                sector_names=MORNINGSTAR_SECTOR_CODES,
                start_date  = start_date,
                end_date    = end_date,
                top_liquid  = top_liquid,
                filter_universe = filter_universe,
                avgretplot  = avgretplot,
                periods     = periods,
                show_sector_plots = show_sector_plots,
                quantiles = quantiles,
                filter_zscore = filter_zscore)
construct factor history
Running pipeline: 2005-04-30 00:00:00  -  2005-10-29 00:00:00
Running pipeline: 2005-11-01 00:00:00  -  2006-05-02 00:00:00
Running pipeline: 2006-05-03 00:00:00  -  2006-11-01 00:00:00
Running pipeline: 2006-11-02 00:00:00  -  2007-05-03 00:00:00
Running pipeline: 2007-05-04 00:00:00  -  2007-11-02 00:00:00
Running pipeline: 2007-11-03 00:00:00  -  2008-05-03 00:00:00
Running pipeline: 2008-05-06 00:00:00  -  2008-11-04 00:00:00
Running pipeline: 2008-11-05 00:00:00  -  2009-05-06 00:00:00
Running pipeline: 2009-05-07 00:00:00  -  2009-11-05 00:00:00
Running pipeline: 2009-11-06 00:00:00  -  2010-05-07 00:00:00
Running pipeline: 2010-05-08 00:00:00  -  2010-11-06 00:00:00
Running pipeline: 2010-11-09 00:00:00  -  2011-05-10 00:00:00
Running pipeline: 2011-05-11 00:00:00  -  2011-11-09 00:00:00
Running pipeline: 2011-11-10 00:00:00  -  2012-05-10 00:00:00
Running pipeline: 2012-05-11 00:00:00  -  2012-11-09 00:00:00
Running pipeline: 2012-11-10 00:00:00  -  2013-05-11 00:00:00
Running pipeline: 2013-05-14 00:00:00  -  2013-11-12 00:00:00
Running pipeline: 2013-11-13 00:00:00  -  2014-05-14 00:00:00
Running pipeline: 2014-05-15 00:00:00  -  2014-11-13 00:00:00
Running pipeline: 2014-11-14 00:00:00  -  2015-05-15 00:00:00
Running pipeline: 2015-05-16 00:00:00  -  2015-11-14 00:00:00
Running pipeline: 2015-11-17 00:00:00  -  2016-05-17 00:00:00
Running pipeline: 2016-05-18 00:00:00  -  2016-05-30 00:00:00
Get pricing, universe 1951 entries
alphalens
Returns Analysis
1 5 10
Ann. alpha 0.101 0.030 0.014
beta 0.042 0.024 0.038
Mean Period Wise Return Top Quantile (bps) 4.551 1.017 -0.073
Mean Period Wise Return Bottom Quantile (bps) -5.584 -2.796 -1.777
Mean Period Wise Spread (bps) 10.152 3.819 1.708
Information Analysis
1 5 10
IC Mean 0.012 0.011 0.008
IC Std. 0.148 0.143 0.137
t-stat(IC) 4.139 4.033 2.909
p-value(IC) 0.000 0.000 0.004
IC Skew 0.023 0.013 0.130
IC Kurtosis 0.239 0.334 0.330
Ann. IR 1.244 1.212 0.874
Turnover Analysis
1 5 10
Quantile 1 Mean Turnover 0.736 0.737 0.740
Quantile 2 Mean Turnover 0.781 0.786 0.789
Quantile 3 Mean Turnover 0.738 0.751 0.759
Quantile 4 Mean Turnover 0.778 0.784 0.790
Quantile 5 Mean Turnover 0.742 0.744 0.747
1 5 10
Mean Factor Rank Autocorrelation -0.015 0.007 0.017
/usr/local/lib/python2.7/dist-packages/matplotlib/axes/_axes.py:2790: MatplotlibDeprecationWarning: Use of None object as fmt keyword argument to suppress plotting of data values is deprecated since 1.4; use the string "none" instead.
  warnings.warn(msg, mplDeprecation, stacklevel=1)
<matplotlib.figure.Figure at 0x7fefecdd2710>