Alphalens + Quantopian | How To¶

from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline import factors, filters, classifiers
from quantopian.pipeline.filters import  StaticAssets
from quantopian.pipeline.factors import CustomFactor, Returns, Latest, AverageDollarVolume, SimpleMovingAverage
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.filters.morningstar import IsPrimaryShare
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.data.builtin import USEquityPricing

import math
import datetime
import numpy as np
import pandas as pd

MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}

Helper functions¶

def high_volume_universe(top_liquid, min_price = None, min_volume = None):  
    """
    Computes a security universe of liquid stocks and filtering out
    hard to trade ones
    Returns
    -------
    high_volume_tradable - zipline.pipeline.filter
    """
    
    full_filter = filters.make_us_equity_universe(
        target_size=top_liquid,
        rankby=factors.AverageDollarVolume(window_length=200),
        mask=filters.default_us_equity_universe_mask(),
        groupby=classifiers.morningstar.Sector(),
        max_group_weight=0.3,
        smoothing_func=lambda f: f.downsample('month_start'),
    )
    
    if min_price is not None:
        price = SimpleMovingAverage(inputs=[USEquityPricing.close],
                                    window_length=21, mask=full_filter)
        full_filter &= (price >= min_price)
        
    if min_volume is not None:
        volume = SimpleMovingAverage(inputs=[USEquityPricing.volume],
                                     window_length=21, mask=full_filter)
        full_filter &= (volume >= min_volume)
        
    return full_filter

def run_pipeline_chunks(pipe, start_date, end_date, chunks_len = None):
    """
    Drop-in replacement for run_pipeline.
    run_pipeline fails over a very long period of time (memery usage),
    so we need to split in chunks the pipeline and concatenate the results
    """
    chunks  = []
    current = pd.Timestamp(start_date)
    end     = pd.Timestamp(end_date)
    step    = pd.Timedelta(weeks=50) if chunks_len is None else chunks_len
    
    while current <= end:
        
        current_end = current + step
        if current_end > end:
            current_end = end
        
        print 'Running pipeline:', current, ' - ', current_end
        results = run_pipeline(pipe, current.strftime("%Y-%m-%d"), current_end.strftime("%Y-%m-%d"))
        chunks.append(results)
        
        # pipeline returns more days than requested (if no trading day), so get last date from the results
        current_end = results.index.get_level_values(0)[-1].tz_localize(None)
        current = current_end + pd.Timedelta(days=1)

    return pd.concat(chunks)
       
def construct_factor_history(factor_cls, start_date='2015-10-1', end_date='2016-2-1', 
                             factor_name='factor', top_liquid=500,
                             sector_column=None, filter_universe=True):
    """
    Creates a DataFrame containing daily factor values and sector codes for a liquidity 
    constrained universe. The returned DataFrame is can be used in the factor tear sheet.
    """
    if filter_universe: # this is very slow!
        ok_universe = high_volume_universe(top_liquid)
    else:
        ok_universe = AverageDollarVolume(window_length=20).top(top_liquid)
       
    factor = factor_cls(mask=ok_universe)
    sector = Sector(mask=ok_universe)    
       
    pipe = Pipeline()
    pipe.add(factor, factor_name)
    if sector_column is not None: # this is very slow too
        pipe.add(sector, sector_column)  
    pipe.set_screen(ok_universe)

    daily_factor = run_pipeline_chunks(pipe, start_date=start_date, end_date=end_date)
    #daily_factor = run_pipeline(pipe, start_date=start_date, end_date=end_date)
       
    return daily_factor.dropna()

def get_daily_price(sid_universe, start_date, end_date, extra_days_before=0, extra_days_after=0):
    """
    Creates a DataFrame containing daily percentage returns and price
    """   
    extra_days = math.ceil(extra_days_before * 365.0/252.0) + 3 # just to be sure
    start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d") - datetime.timedelta(days=extra_days)
    start_date = start_date.strftime("%Y-%m-%d")
    
    extra_days = math.ceil(extra_days_after * 365.0/252.0) + 3 # just to be sure
    end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d") + datetime.timedelta(days=extra_days)
    end_date = end_date.strftime("%Y-%m-%d")
    
    pricing = get_pricing(sid_universe, start_date=start_date, end_date=end_date, fields='open_price')
    
    return pricing

run_tear_sheet glue all the function together to make life easier to run the tear sheet on a pipeline factor¶

import alphalens
import alphalens.performance as perf 
import alphalens.utils as utils

def run_tear_sheet(factor,
                   factor_name,
                   start_date,
                   end_date,
                   top_liquid,
                   filter_universe,
                   show_sector_plots,
                   avgretplot,
                   periods,
                   quantiles,
                   bins,
                   filter_zscore,
                   long_short,
                   prices_cache = None):
     
    sector_column = 'sector_code' if show_sector_plots else None
    days_before, days_after = (0,0)

    if avgretplot is not None:   
        days_before, days_after = avgretplot
        days_after = max(days_after, max(periods) + 1)
    
    #
    ## Run the Pipeline
    #
    print 'construct factor history'
    factor = construct_factor_history(factor, start_date=start_date, end_date=end_date, 
                                      factor_name=factor_name, top_liquid=top_liquid,
                                      sector_column=sector_column, filter_universe=filter_universe)
    #
    ## Get prices
    #
    sid_universe = set( factor.index.levels[1].unique() )
    if prices_cache is not None:
        cached_sids = set(prices_cache.columns)
        sid_universe -= cached_sids
        
    print 'Get pricing for %d entries' % len(sid_universe)
    if sid_universe:
        prices = get_daily_price(sid_universe, start_date=start_date, end_date=end_date, 
                                 extra_days_before=days_before, extra_days_after=days_after)
        if prices_cache is not None:
            prices = pd.concat([prices, prices_cache], axis=1)
    else:
        prices = prices_cache

    #
    ## Use Alphalens to create a factor tear sheet
    #
    print 'alphalens'    
    sectors_series = factor[sector_column] if show_sector_plots else None
    factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor=factor[factor_name],
                                                                       prices=prices,
                                                                       groupby=sectors_series,
                                                                       by_group=False,
                                                                       quantiles=quantiles,
                                                                       bins=bins,
                                                                       periods=periods,
                                                                       filter_zscore=filter_zscore,
                                                                       groupby_labels=MORNINGSTAR_SECTOR_CODES)
    
    if avgretplot:
        alphalens.tears.create_event_returns_tear_sheet(factor_data=factor_data,
                                                        prices=prices,
                                                        avgretplot=avgretplot,
                                                        long_short=long_short,
                                                        by_group=show_sector_plots)

    alphalens.tears.create_full_tear_sheet(factor_data=factor_data,
                                           long_short=long_short,
                                           group_adjust=False,
                                           by_group=show_sector_plots)

    
    return prices

Define our factor¶

from quantopian.pipeline.data.alpha_vertex import precog_top_100, precog_top_500

Define settings¶

factor_name = 'factor'

start_date  = '2010-01-01'
end_date    = '2017-02-15'
top_liquid  = 800
filter_universe = True   # very slow, filter out untradable stocks
show_sector_plots = False # very slow to load the sector column in pipeline

# alphalens specific
periods = (1, 3, 5, 10)
avgretplot  = (2, 7)   # use None to avoid plotting or (days_before, days_after)
filter_zscore = None
long_short  = True

prices_cache = None # this saves lots of time when running tear sheet multiple times

Run the tear sheet¶

quantiles = None
bins      = [-100,-0.03,-0.01,0.,0.01,0.03,100]

def factor(mask):
    return Latest(inputs=[precog_top_100.predicted_five_day_log_return], mask=mask)

prices_cache = \
run_tear_sheet( factor       = factor,
                factor_name  = factor_name,
                start_date   = start_date,
                end_date     = end_date,
                top_liquid   = top_liquid,
                filter_universe = filter_universe,
                show_sector_plots = show_sector_plots,
                avgretplot   = avgretplot,               
                periods      = periods,
                quantiles    = quantiles,
                bins         = bins,
                filter_zscore = filter_zscore,
                long_short   = long_short,
                prices_cache = prices_cache)

construct factor history
Running pipeline: 2010-01-01 00:00:00  -  2010-12-17 00:00:00
Running pipeline: 2010-12-18 00:00:00  -  2011-12-03 00:00:00
Running pipeline: 2011-12-06 00:00:00  -  2012-11-20 00:00:00
Running pipeline: 2012-11-21 00:00:00  -  2013-11-06 00:00:00
Running pipeline: 2013-11-07 00:00:00  -  2014-10-23 00:00:00
Running pipeline: 2014-10-24 00:00:00  -  2015-10-09 00:00:00
Running pipeline: 2015-10-10 00:00:00  -  2016-09-24 00:00:00
Running pipeline: 2016-09-27 00:00:00  -  2017-02-15 00:00:00
Get pricing for 1418 entries
alphalens
Quantiles Statistics

Returns Analysis

/usr/local/lib/python2.7/dist-packages/numpy/lib/function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)

Information Analysis

Turnover Analysis

<matplotlib.figure.Figure at 0x7f65272e3fd0>

def factor(mask):
    return Latest(inputs=[precog_top_500.predicted_five_day_log_return], mask=mask)

prices_cache = \
run_tear_sheet( factor       = factor,
                factor_name  = factor_name,
                start_date   = start_date,
                end_date     = end_date,
                top_liquid   = top_liquid,
                filter_universe = filter_universe,
                show_sector_plots = show_sector_plots,
                avgretplot   = avgretplot,               
                periods      = periods,
                quantiles    = quantiles,
                bins         = bins,
                filter_zscore = filter_zscore,
                long_short   = long_short,
                prices_cache = prices_cache)

construct factor history
Running pipeline: 2010-01-01 00:00:00  -  2010-12-17 00:00:00
Running pipeline: 2010-12-18 00:00:00  -  2011-12-03 00:00:00
Running pipeline: 2011-12-06 00:00:00  -  2012-11-20 00:00:00
Running pipeline: 2012-11-21 00:00:00  -  2013-11-06 00:00:00
Running pipeline: 2013-11-07 00:00:00  -  2014-10-23 00:00:00
Running pipeline: 2014-10-24 00:00:00  -  2015-10-09 00:00:00
Running pipeline: 2015-10-10 00:00:00  -  2016-09-24 00:00:00
Running pipeline: 2016-09-27 00:00:00  -  2017-02-15 00:00:00
Get pricing for 0 entries
alphalens
Quantiles Statistics

Returns Analysis

Information Analysis

Turnover Analysis

<matplotlib.figure.Figure at 0x7f6545ec7510>

quantiles = 6
bins      = None

def factor(mask):
    return Latest(inputs=[precog_top_100.predicted_five_day_log_return], mask=mask)

prices_cache = \
run_tear_sheet( factor       = factor,
                factor_name  = factor_name,
                start_date   = start_date,
                end_date     = end_date,
                top_liquid   = top_liquid,
                filter_universe = filter_universe,
                show_sector_plots = show_sector_plots,
                avgretplot   = avgretplot,               
                periods      = periods,
                quantiles    = quantiles,
                bins         = bins,
                filter_zscore = filter_zscore,
                long_short   = long_short,
                prices_cache = prices_cache)

construct factor history
Running pipeline: 2010-01-01 00:00:00  -  2010-12-17 00:00:00
Running pipeline: 2010-12-18 00:00:00  -  2011-12-03 00:00:00
Running pipeline: 2011-12-06 00:00:00  -  2012-11-20 00:00:00
Running pipeline: 2012-11-21 00:00:00  -  2013-11-06 00:00:00
Running pipeline: 2013-11-07 00:00:00  -  2014-10-23 00:00:00
Running pipeline: 2014-10-24 00:00:00  -  2015-10-09 00:00:00
Running pipeline: 2015-10-10 00:00:00  -  2016-09-24 00:00:00
Running pipeline: 2016-09-27 00:00:00  -  2017-02-15 00:00:00
Get pricing for 0 entries
alphalens
Quantiles Statistics

Returns Analysis

Information Analysis

Turnover Analysis

<matplotlib.figure.Figure at 0x7f65b0526110>

def factor(mask):
    return Latest(inputs=[precog_top_500.predicted_five_day_log_return], mask=mask)

prices_cache = \
run_tear_sheet( factor       = factor,
                factor_name  = factor_name,
                start_date   = start_date,
                end_date     = end_date,
                top_liquid   = top_liquid,
                filter_universe = filter_universe,
                show_sector_plots = show_sector_plots,
                avgretplot   = avgretplot,               
                periods      = periods,
                quantiles    = quantiles,
                bins         = bins,
                filter_zscore = filter_zscore,
                long_short   = long_short,
                prices_cache = prices_cache)

construct factor history
Running pipeline: 2010-01-01 00:00:00  -  2010-12-17 00:00:00
Running pipeline: 2010-12-18 00:00:00  -  2011-12-03 00:00:00
Running pipeline: 2011-12-06 00:00:00  -  2012-11-20 00:00:00
Running pipeline: 2012-11-21 00:00:00  -  2013-11-06 00:00:00
Running pipeline: 2013-11-07 00:00:00  -  2014-10-23 00:00:00
Running pipeline: 2014-10-24 00:00:00  -  2015-10-09 00:00:00
Running pipeline: 2015-10-10 00:00:00  -  2016-09-24 00:00:00
Running pipeline: 2016-09-27 00:00:00  -  2017-02-15 00:00:00
Get pricing for 0 entries
alphalens
Quantiles Statistics

Returns Analysis

Information Analysis

Turnover Analysis

<matplotlib.figure.Figure at 0x7f651eb58290>

long_short  = False

def factor(mask):
    return Latest(inputs=[precog_top_100.predicted_five_day_log_return], mask=mask)

prices_cache = \
run_tear_sheet( factor       = factor,
                factor_name  = factor_name,
                start_date   = start_date,
                end_date     = end_date,
                top_liquid   = top_liquid,
                filter_universe = filter_universe,
                show_sector_plots = show_sector_plots,
                avgretplot   = avgretplot,               
                periods      = periods,
                quantiles    = quantiles,
                bins         = bins,
                filter_zscore = filter_zscore,
                long_short   = long_short,
                prices_cache = prices_cache)

construct factor history
Running pipeline: 2010-01-01 00:00:00  -  2010-12-17 00:00:00
Running pipeline: 2010-12-18 00:00:00  -  2011-12-03 00:00:00
Running pipeline: 2011-12-06 00:00:00  -  2012-11-20 00:00:00
Running pipeline: 2012-11-21 00:00:00  -  2013-11-06 00:00:00
Running pipeline: 2013-11-07 00:00:00  -  2014-10-23 00:00:00
Running pipeline: 2014-10-24 00:00:00  -  2015-10-09 00:00:00
Running pipeline: 2015-10-10 00:00:00  -  2016-09-24 00:00:00
Running pipeline: 2016-09-27 00:00:00  -  2017-02-15 00:00:00
Get pricing for 0 entries
alphalens
Quantiles Statistics

Returns Analysis

Information Analysis

Turnover Analysis

<matplotlib.figure.Figure at 0x7f659d981650>

def factor(mask):
    return Latest(inputs=[precog_top_500.predicted_five_day_log_return], mask=mask)

prices_cache = \
run_tear_sheet( factor       = factor,
                factor_name  = factor_name,
                start_date   = start_date,
                end_date     = end_date,
                top_liquid   = top_liquid,
                filter_universe = filter_universe,
                show_sector_plots = show_sector_plots,
                avgretplot   = avgretplot,               
                periods      = periods,
                quantiles    = quantiles,
                bins         = bins,
                filter_zscore = filter_zscore,
                long_short   = long_short,
                prices_cache = prices_cache)

construct factor history
Running pipeline: 2010-01-01 00:00:00  -  2010-12-17 00:00:00
Running pipeline: 2010-12-18 00:00:00  -  2011-12-03 00:00:00
Running pipeline: 2011-12-06 00:00:00  -  2012-11-20 00:00:00
Running pipeline: 2012-11-21 00:00:00  -  2013-11-06 00:00:00
Running pipeline: 2013-11-07 00:00:00  -  2014-10-23 00:00:00
Running pipeline: 2014-10-24 00:00:00  -  2015-10-09 00:00:00
Running pipeline: 2015-10-10 00:00:00  -  2016-09-24 00:00:00
Running pipeline: 2016-09-27 00:00:00  -  2017-02-15 00:00:00
Get pricing for 0 entries
alphalens
Quantiles Statistics

Returns Analysis

Information Analysis

Turnover Analysis

<matplotlib.figure.Figure at 0x7f654fc15890>

	min	max	mean	std	count	count %
factor_quantile
1	-1.602	-0.030	-0.050300	0.035672	22300	12.141274
2	-0.029	-0.010	-0.017913	0.005729	37640	20.493164
3	-0.009	0.000	-0.004203	0.002850	37396	20.360318
4	0.001	0.010	0.005281	0.002833	34829	18.962710
5	0.011	0.030	0.018378	0.005394	34916	19.010078
6	0.031	1.308	0.056987	0.038618	16590	9.032455

	1	3	5	10
Ann. alpha	0.063	0.049	0.040	0.006
beta	-0.009	-0.004	-0.018	0.009
Mean Period Wise Return Top Quantile (bps)	1.866	4.133	4.711	-3.836
Mean Period Wise Return Bottom Quantile (bps)	-3.117	-7.479	-11.301	-14.836
Mean Period Wise Spread (bps)	3.352	3.640	2.909	0.602

	1	3	5	10
IC Mean	0.022	0.027	0.025	0.014
IC Std.	0.140	0.143	0.144	0.141
t-stat(IC)	6.529	8.003	7.276	4.150
p-value(IC)	0.000	0.000	0.000	0.000
IC Skew	0.069	0.168	0.101	-0.069
IC Kurtosis	0.084	0.145	0.134	-0.054
Ann. IR	2.453	3.007	2.734	1.559

	1	3	5	10
Quantile 1 Mean Turnover	0.233	0.367	0.430	0.457
Quantile 2 Mean Turnover	0.436	0.585	0.637	0.666
Quantile 3 Mean Turnover	0.570	0.662	0.683	0.682
Quantile 4 Mean Turnover	0.639	0.736	0.756	0.763
Quantile 5 Mean Turnover	0.476	0.633	0.691	0.720
Quantile 6 Mean Turnover	0.279	0.440	0.505	0.535

	min	max	mean	std	count	count %
factor_quantile
1	-1.602	-0.030	-0.053048	0.036951	102238	11.920269
2	-0.029	-0.010	-0.017909	0.005652	173039	20.175193
3	-0.009	0.000	-0.004276	0.002854	158039	18.426293
4	0.001	0.010	0.005308	0.002840	162209	18.912487
5	0.011	0.030	0.018651	0.005570	169424	19.753708
6	0.031	1.308	0.055647	0.039959	92733	10.812049

	1	3	5	10
Ann. alpha	0.149	0.091	0.059	0.020
beta	-0.004	-0.005	-0.015	-0.003
Mean Period Wise Return Top Quantile (bps)	5.349	10.354	10.347	6.893
Mean Period Wise Return Bottom Quantile (bps)	-5.044	-10.195	-11.475	-10.620
Mean Period Wise Spread (bps)	13.121	8.385	5.620	3.151

	1	3	5	10
IC Mean	0.031	0.034	0.028	0.012
IC Std.	0.108	0.114	0.113	0.107
t-stat(IC)	12.066	12.722	10.419	4.689
p-value(IC)	0.000	0.000	0.000	0.000
IC Skew	-0.113	-0.010	-0.008	-0.075
IC Kurtosis	0.842	0.768	0.610	0.057
Ann. IR	4.534	4.780	3.915	1.762

	1	3	5	10
Quantile 1 Mean Turnover	0.357	0.560	0.653	0.701
Quantile 2 Mean Turnover	0.543	0.709	0.771	0.801
Quantile 3 Mean Turnover	0.663	0.758	0.783	0.781
Quantile 4 Mean Turnover	0.660	0.753	0.773	0.777
Quantile 5 Mean Turnover	0.552	0.717	0.779	0.810
Quantile 6 Mean Turnover	0.364	0.572	0.670	0.708

	min	max	mean	std	count	count %
factor_quantile
1	-1.602	0.036	-0.036422	0.034834	32426	17.654393
2	-0.097	0.050	-0.012155	0.014389	31412	17.102319
3	-0.071	0.065	-0.003708	0.013294	30966	16.859493
4	-0.057	0.079	0.003546	0.013581	29718	16.180018
5	-0.038	0.159	0.011672	0.015054	29319	15.962781
6	-0.027	1.308	0.035129	0.036178	29830	16.240996

	1	3	5	10
Quantile 1 Mean Turnover	0.307	0.507	0.608	0.658
Quantile 2 Mean Turnover	0.597	0.757	0.810	0.837
Quantile 3 Mean Turnover	0.669	0.781	0.800	0.807
Quantile 4 Mean Turnover	0.682	0.784	0.805	0.811
Quantile 5 Mean Turnover	0.626	0.772	0.820	0.841
Quantile 6 Mean Turnover	0.356	0.586	0.693	0.742

	min	max	mean	std	count	count %
factor_quantile
1	-1.602	0.029	-0.035513	0.037148	149424	17.421842
2	-0.160	0.048	-0.012709	0.016807	146395	17.068680
3	-0.091	0.065	-0.003526	0.015144	144008	16.790372
4	-0.066	0.084	0.004362	0.015153	141057	16.446305
5	-0.048	0.188	0.013643	0.016998	138477	16.145494
6	-0.029	1.308	0.037258	0.039006	138321	16.127306

	1	3	5	10
Quantile 1 Mean Turnover	0.374	0.626	0.753	0.811
Quantile 2 Mean Turnover	0.629	0.775	0.818	0.839
Quantile 3 Mean Turnover	0.685	0.792	0.811	0.813
Quantile 4 Mean Turnover	0.686	0.794	0.815	0.819
Quantile 5 Mean Turnover	0.634	0.783	0.827	0.849
Quantile 6 Mean Turnover	0.384	0.635	0.760	0.814