Notebook

Alphalens boilerplate

In [1]:
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline import factors, filters, classifiers
from quantopian.pipeline.filters import  StaticAssets
from quantopian.pipeline.factors import CustomFactor, Returns, AverageDollarVolume, SimpleMovingAverage
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.classifiers.morningstar import Sector  
from quantopian.pipeline.filters.morningstar import Q500US, Q1500US, Q3000US

import math
import datetime
import numpy as np
import pandas as pd

MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}

## Helper functions

def high_volume_universe(top_liquid, min_price = None, min_volume = None):  
    """
    Computes a security universe of liquid stocks and filtering out
    hard to trade ones
    Returns
    -------
    high_volume_tradable - zipline.pipeline.filter
    """
    
    if top_liquid == 500:
        universe = Q500US()
    elif top_liquid == 1500:
        universe = Q1500US()
    elif top_liquid == 3000:
        universe = Q3000US()        
    else:        
        universe = filters.make_us_equity_universe(
            target_size=top_liquid,
            rankby=factors.AverageDollarVolume(window_length=200),
            mask=filters.default_us_equity_universe_mask(),
            groupby=Sector(),
            max_group_weight=0.3,
            smoothing_func=lambda f: f.downsample('month_start'),
        )
    
    if min_price is not None:
        price = SimpleMovingAverage(inputs=[USEquityPricing.close],
                                    window_length=21, mask=universe)
        universe &= (price >= min_price)
        
    if min_volume is not None:
        volume = SimpleMovingAverage(inputs=[USEquityPricing.volume],
                                     window_length=21, mask=universe)
        universe &= (volume >= min_volume)
        
    return universe
     
def construct_factor_history(factor_cls, start_date='2015-10-1', end_date='2016-2-1', 
                             factor_name='factor', top_liquid=500,
                             sector_column=None):
    """
    Creates a DataFrame containing daily factor values and sector codes for a liquidity 
    constrained universe. The returned DataFrame is can be used in the factor tear sheet.
    """
    ok_universe = high_volume_universe(top_liquid)
       
    factor = factor_cls(mask=ok_universe)
    sector = Sector(mask=ok_universe)    
       
    pipe = Pipeline()
    pipe.add(factor, factor_name)
    if sector_column is not None: # this is very slow too
        pipe.add(sector, sector_column)  
    pipe.set_screen(ok_universe)

    daily_factor = run_pipeline(pipe, start_date=start_date, end_date=end_date, chunksize=250)
       
    return daily_factor.dropna()

def get_daily_price(sid_universe, start_date, end_date, extra_days_before=0, extra_days_after=0):
    """
    Creates a DataFrame containing daily percentage returns and price
    """   
    extra_days = math.ceil(extra_days_before * 365.0/252.0) + 3 # just to be sure
    start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d") - datetime.timedelta(days=extra_days)
    start_date = start_date.strftime("%Y-%m-%d")
    
    extra_days = math.ceil(extra_days_after * 365.0/252.0) + 3 # just to be sure
    end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d") + datetime.timedelta(days=extra_days)
    end_date = end_date.strftime("%Y-%m-%d")
    
    pricing = get_pricing(sid_universe, start_date=start_date, end_date=end_date, fields='open_price')
    
    return pricing

#
# 'run_tear_sheet' glues all the function together to make life easier to run the tear sheet on a pipeline factor
#

import alphalens
import alphalens.performance as perf 
import alphalens.utils as utils

def run_tear_sheet(factor,
                   factor_name,
                   start_date,
                   end_date,
                   top_liquid,
                   show_sector_plots,
                   avgretplot,
                   periods,
                   quantiles,
                   bins,
                   filter_zscore,
                   long_short,
                   prices_cache = None):
     
    sector_column = 'sector_code' if show_sector_plots else None
    days_before, days_after = (0,0)

    if avgretplot is not None:   
        days_before, days_after = avgretplot
        days_after = max(days_after, max(periods) + 1)
    
    #
    ## Run the Pipeline
    #
    print 'construct factor history'
    factor = construct_factor_history(factor, start_date=start_date, end_date=end_date, 
                                      factor_name=factor_name, top_liquid=top_liquid,
                                      sector_column=sector_column)
    #
    ## Get prices
    #
    sid_universe = set( factor.index.levels[1].unique() )
    if prices_cache is not None:
        cached_sids = set(prices_cache.columns)
        sid_universe -= cached_sids
        
    print 'Get pricing for %d entries' % len(sid_universe)
    if sid_universe:
        prices = get_daily_price(sid_universe, start_date=start_date, end_date=end_date, 
                                 extra_days_before=days_before, extra_days_after=days_after)
        if prices_cache is not None:
            prices = pd.concat([prices, prices_cache], axis=1)
    else:
        prices = prices_cache

    #
    ## Use Alphalens to create a factor tear sheet
    #
    print 'Alphalens'
    
    if len(np.isinf(factor[factor_name])) > 0:
        print 'Dropping inf or -inf values from factor'
        factor[factor_name] = factor[factor_name].replace([np.inf, -np.inf], np.nan)
    
    sectors_series = factor[sector_column] if show_sector_plots else None
    factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor=factor[factor_name],
                                                                       prices=prices,
                                                                       groupby=sectors_series,
                                                                       by_group=False,
                                                                       quantiles=quantiles,
                                                                       bins=bins,
                                                                       periods=periods,
                                                                       filter_zscore=filter_zscore,
                                                                       groupby_labels=MORNINGSTAR_SECTOR_CODES)

    if avgretplot:
        alphalens.tears.create_event_returns_tear_sheet(factor_data=factor_data,
                                                        prices=prices,
                                                        avgretplot=avgretplot,
                                                        long_short=long_short,
                                                        by_group=show_sector_plots)

    #alphalens.tears.create_full_tear_sheet(factor_data=factor_data,
    #                                       long_short=long_short,
    #                                       group_adjust=False,
    #                                       by_group=show_sector_plots)
    alphalens.plotting.plot_quantile_statistics_table(factor_data)
    alphalens.tears.create_returns_tear_sheet(factor_data=factor_data,
                                              long_short=long_short,
                                              by_group=show_sector_plots)

    
    return prices

Define our factor

In [2]:
#
# Many technical indicators are already defined in Pipeline but we can build our own CustomFactor too
#
from quantopian.pipeline.factors import RSI, BollingerBands, Aroon, FastStochasticOscillator
from quantopian.pipeline.factors import IchimokuKinkoHyo, RateOfChangePercentage, TrueRange
from quantopian.pipeline.factors import MovingAverageConvergenceDivergenceSignal, AnnualizedVolatility

Define settings

In [3]:
factor_name = 'factor'

start_date  = '2009-01-01'
end_date    = '2014-05-01'
top_liquid  = 500
show_sector_plots = False

# alphalens specific
periods = (1, 3)
quantiles = 5
bins      = None
avgretplot  = (1, 10)  # use None to avoid plotting or (days_before, days_after)
filter_zscore = None
long_short  = True

prices_cache = None # this saves lots of time when running tear sheet multiple times

Run the tear sheet

In [4]:
def factor(mask):
    return -RSI(mask=mask, window_length=15)

prices_cache = \
run_tear_sheet( factor       = factor,
                factor_name  = factor_name,
                start_date   = start_date,
                end_date     = end_date,
                top_liquid   = top_liquid,
                show_sector_plots = show_sector_plots,
                avgretplot   = avgretplot,               
                periods      = periods,
                quantiles    = quantiles,
                bins         = bins,
                filter_zscore = filter_zscore,
                long_short   = long_short,
                prices_cache = prices_cache)
construct factor history
Get pricing for 808 entries
Alphalens
Dropping inf or -inf values from factor
/usr/local/lib/python2.7/dist-packages/matplotlib/axes/_axes.py:2790: MatplotlibDeprecationWarning: Use of None object as fmt keyword argument to suppress plotting of data values is deprecated since 1.4; use the string "none" instead.
  warnings.warn(msg, mplDeprecation, stacklevel=1)
Quantiles Statistics
min max mean std count count %
factor_quantile
1 -100.000000 -24.947146 -71.592999 11.141301 134098 20.032118
2 -88.196721 -19.391635 -60.723944 10.464185 133783 19.985062
3 -82.019462 -15.953386 -53.691138 10.355026 133668 19.967882
4 -75.111111 -10.913706 -46.427975 10.022605 133782 19.984912
5 -67.120000 -0.000000 -34.241842 10.810839 134084 20.030026
Returns Analysis
1 3
Ann. alpha 0.017 0.013
beta 0.069 0.093
Mean Period Wise Return Top Quantile (bps) 0.510 2.241
Mean Period Wise Return Bottom Quantile (bps) -2.012 -4.991
Mean Period Wise Spread (bps) 2.523 2.407
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:727: FutureWarning: pd.rolling_apply is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,min_periods=1,window=3).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:767: FutureWarning: pd.rolling_apply is deprecated for DataFrame and will be removed in a future version, replace with 
	DataFrame.rolling(center=False,min_periods=1,window=3).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:519: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(window=22,center=False).mean()
  pd.rolling_mean(mean_returns_spread_bps, 22).plot(color='orangered',
<matplotlib.figure.Figure at 0x7f7e3af22810>