Notebook

Alphalens + Quantopian | How To¶

In [1]:
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline import factors, filters, classifiers
from quantopian.pipeline.filters import  StaticAssets
from quantopian.pipeline.factors import CustomFactor, Returns, AverageDollarVolume, SimpleMovingAverage
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.filters.morningstar import IsPrimaryShare
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.data.builtin import USEquityPricing

import math
import datetime
import numpy as np
import pandas as pd
In [2]:
MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}

Helper functions¶

In [3]:
def high_volume_universe(top_liquid, min_price = None, min_volume = None):  
    """
    Computes a security universe of liquid stocks and filtering out
    hard to trade ones
    Returns
    -------
    high_volume_tradable - zipline.pipeline.filter
    """
    
    full_filter = filters.make_us_equity_universe(
        target_size=top_liquid,
        rankby=factors.AverageDollarVolume(window_length=200),
        mask=filters.default_us_equity_universe_mask(),
        groupby=classifiers.morningstar.Sector(),
        max_group_weight=0.3,
        smoothing_func=lambda f: f.downsample('month_start'),
    )
    
    if min_price is not None:
        price = SimpleMovingAverage(inputs=[USEquityPricing.close],
                                    window_length=21, mask=full_filter)
        full_filter &= (price >= min_price)
        
    if min_volume is not None:
        volume = SimpleMovingAverage(inputs=[USEquityPricing.volume],
                                     window_length=21, mask=full_filter)
        full_filter &= (volume >= min_volume)
        
    return full_filter

def run_pipeline_chunks(pipe, start_date, end_date, chunks_len = None):
    """
    Drop-in replacement for run_pipeline.
    run_pipeline fails over a very long period of time (memery usage),
    so we need to split in chunks the pipeline and concatenate the results
    """
    chunks  = []
    current = pd.Timestamp(start_date)
    end     = pd.Timestamp(end_date)
    step    = pd.Timedelta(weeks=50) if chunks_len is None else chunks_len
    
    while current <= end:
        
        current_end = current + step
        if current_end > end:
            current_end = end
        
        print 'Running pipeline:', current, ' - ', current_end
        results = run_pipeline(pipe, current.strftime("%Y-%m-%d"), current_end.strftime("%Y-%m-%d"))
        chunks.append(results)
        
        # pipeline returns more days than requested (if no trading day), so get last date from the results
        current_end = results.index.get_level_values(0)[-1].tz_localize(None)
        current = current_end + pd.Timedelta(days=1)

    return pd.concat(chunks)
       
def construct_factor_history(factor_cls, start_date='2015-10-1', end_date='2016-2-1', 
                             factor_name='factor', top_liquid=500,
                             sector_column=None, filter_universe=True):
    """
    Creates a DataFrame containing daily factor values and sector codes for a liquidity 
    constrained universe. The returned DataFrame is can be used in the factor tear sheet.
    """
    if filter_universe: # this is very slow!
        ok_universe = high_volume_universe(top_liquid)
    else:
        ok_universe = AverageDollarVolume(window_length=20).top(top_liquid)
       
    factor = factor_cls(mask=ok_universe)
    sector = Sector(mask=ok_universe)    
       
    pipe = Pipeline()
    pipe.add(factor, factor_name)
    if sector_column is not None: # this is very slow too
        pipe.add(sector, sector_column)  
    pipe.set_screen(ok_universe)

    daily_factor = run_pipeline_chunks(pipe, start_date=start_date, end_date=end_date)
    #daily_factor = run_pipeline(pipe, start_date=start_date, end_date=end_date)
       
    return daily_factor.dropna()

def get_daily_price(sid_universe, start_date, end_date, extra_days_before=0, extra_days_after=0):
    """
    Creates a DataFrame containing daily percentage returns and price
    """   
    extra_days = math.ceil(extra_days_before * 365.0/252.0) + 3 # just to be sure
    start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d") - datetime.timedelta(days=extra_days)
    start_date = start_date.strftime("%Y-%m-%d")
    
    extra_days = math.ceil(extra_days_after * 365.0/252.0) + 3 # just to be sure
    end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d") + datetime.timedelta(days=extra_days)
    end_date = end_date.strftime("%Y-%m-%d")
    
    pricing = get_pricing(sid_universe, start_date=start_date, end_date=end_date, fields='open_price')
    
    return pricing

run_tear_sheet glue all the function together to make life easier to run the tear sheet on a pipeline factor¶

In [4]:
import alphalens
import alphalens.performance as perf 
import alphalens.utils as utils

def run_tear_sheet(factor,
                   factor_name,
                   start_date,
                   end_date,
                   top_liquid,
                   filter_universe,
                   show_sector_plots,
                   avgretplot,
                   periods,
                   quantiles,
                   bins,
                   filter_zscore,
                   long_short,
                   prices_cache = None):
     
    sector_column = 'sector_code' if show_sector_plots else None
    days_before, days_after = (0,0)

    if avgretplot is not None:   
        days_before, days_after = avgretplot
        days_after = max(days_after, max(periods) + 1)
    
    #
    ## Run the Pipeline
    #
    print 'construct factor history'
    factor = construct_factor_history(factor, start_date=start_date, end_date=end_date, 
                                      factor_name=factor_name, top_liquid=top_liquid,
                                      sector_column=sector_column, filter_universe=filter_universe)
    #
    ## Get prices
    #
    sid_universe = set( factor.index.levels[1].unique() )
    if prices_cache is not None:
        cached_sids = set(prices_cache.columns)
        sid_universe -= cached_sids
        
    print 'Get pricing for %d entries' % len(sid_universe)
    if sid_universe:
        prices = get_daily_price(sid_universe, start_date=start_date, end_date=end_date, 
                                 extra_days_before=days_before, extra_days_after=days_after)
        if prices_cache is not None:
            prices = pd.concat([prices, prices_cache], axis=1)
    else:
        prices = prices_cache

    #
    ## Use Alphalens to create a factor tear sheet
    #
    print 'Alphalens'
    
    if len(np.isinf(factor[factor_name])) > 0:
        print 'Dropping inf or -inf values from factor'
        factor[factor_name] = factor[factor_name].replace([np.inf, -np.inf], np.nan)
    
    sectors_series = factor[sector_column] if show_sector_plots else None
    factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor=factor[factor_name],
                                                                       prices=prices,
                                                                       groupby=sectors_series,
                                                                       by_group=False,
                                                                       quantiles=quantiles,
                                                                       bins=bins,
                                                                       periods=periods,
                                                                       filter_zscore=filter_zscore,
                                                                       groupby_labels=MORNINGSTAR_SECTOR_CODES)

    if avgretplot:
        alphalens.tears.create_event_returns_tear_sheet(factor_data=factor_data,
                                                        prices=prices,
                                                        avgretplot=avgretplot,
                                                        long_short=long_short,
                                                        by_group=show_sector_plots)

    alphalens.tears.create_full_tear_sheet(factor_data=factor_data,
                                           long_short=long_short,
                                           group_adjust=False,
                                           by_group=show_sector_plots)

    
    return prices

Define settings¶

In [5]:
factor_name = 'factor'

start_date  = '2016-01-01'
end_date    = '2017-01-01'
top_liquid  = 1500
filter_universe = True     # use Quantopian universe filtering
show_sector_plots = True

# alphalens specific
periods = (1, 10, 50, 90)
quantiles = 5
bins      = None
avgretplot  = (5, 20)  # use None to avoid plotting or (days_before, days_after) e.g. (5, 20)
filter_zscore = None
long_short  = True

prices_cache = None # this saves lots of time when running tear sheet multiple times

Run the tear sheet¶

In [6]:
window = 91

ni = SimpleMovingAverage(inputs=[morningstar.income_statement.net_income],window_length=window)
ebitda = SimpleMovingAverage(inputs=[morningstar.income_statement.ebitda],window_length=window)
ebit = SimpleMovingAverage(inputs=[morningstar.income_statement.ebit],window_length=window)
assets = SimpleMovingAverage(inputs=[morningstar.balance_sheet.total_assets],window_length=window)
equity = SimpleMovingAverage(inputs=[morningstar.balance_sheet.total_equity],window_length=window)
revenue = SimpleMovingAverage(inputs=[morningstar.income_statement.total_revenue],window_length=window)
revenue_growth = SimpleMovingAverage(inputs=[morningstar.operation_ratios.revenue_growth],window_length=window)
fcf = SimpleMovingAverage(inputs=[morningstar.cash_flow_statement.free_cash_flow],window_length=window)
mkt_cap = morningstar.valuation.market_cap.latest

# The ranking of each factor can be skipped

my_factors = {
        #Operation Factors
        'EBITDA/REVENUE': lambda mask : (ebitda/revenue), #.rank(ascending=False, mask=mask),
#        'EBIT/REVENUE': lambda mask : (ebit/revenue), #.rank(ascending=False, mask=mask),
#        'NI/REVENUE': lambda mask : (ni/revenue), #.rank(ascending=False, mask=mask),
#        'FCF/REVENUE': lambda mask : (fcf/revenue), #.rank(ascending=False, mask=mask),
        
        #Book Factors
#        'EBITDA/ASSETS': lambda mask : (ebitda/assets), #.rank(ascending=False, mask=mask),
#        'EBIT/ASSETS': lambda mask : (ebit/assets), #.rank(ascending=False, mask=mask),
#        'NI/ASSETS': lambda mask : (ni/assets), #.rank(ascending=False, mask=mask),
#        'FCF/ASSETS': lambda mask : (fcf/assets), #.rank(ascending=False, mask=mask),
#        'EBITDA/EQUITY': lambda mask : (ebitda/equity), #.rank(ascending=False, mask=mask),
#        'EBIT/EQUITY': lambda mask : (ebit/equity), #.rank(ascending=False, mask=mask),
#        'NI/EQUITY': lambda mask: (ni/equity), #.rank(ascending=False, mask=mask),
#        'FCF/EQUITY': lambda mask: (fcf/equity), #.rank(ascending=False, mask=mask),
        
        #Momentum Factors
#        'MOMENTUM': lambda mask : Returns(window_length=window), #.rank(ascending=False, mask=mask),
        
        #Growtch Factors
#        'REVENUE GROWTH': lambda mask : revenue_growth.rank(ascending=False, mask=mask),
        
        #Value Factors
#        'P/REVENUE': lambda mask : (mkt_cap/revenue), #.rank(ascending=True, mask=mask),
#        'P/EBITDA': lambda mask : (mkt_cap/ebitda), #.rank(ascending=True, mask=mask),
#        'P/EBIT': lambda mask : (mkt_cap/ebit), #.rank(ascending=True, mask=mask),
#        'P/NI': lambda mask : (mkt_cap/ni), #.rank(ascending=True, mask=mask),
#        'P/ASSETS': lambda mask : (mkt_cap/assets), #.rank(ascending=True, mask=mask),
#        'P/EQUITY': lambda mask : (mkt_cap/equity), #.rank(ascending=True, mask=mask),
#        'P/FCF': lambda mask : (mkt_cap/fcf), #.rank(ascending=True, mask=mask),
                  
        #Target
#        'RETURNS': lambda mask : Returns(window_length=window), #.rank(ascending=False, mask=mask)
    }
In [7]:
for name, factor in my_factors.items():
    print("Processing ", name)
    prices_cache = \
    run_tear_sheet( factor       = factor,
                    factor_name  = factor_name,
                    start_date   = start_date,
                    end_date     = end_date,
                    top_liquid   = top_liquid,
                    filter_universe = filter_universe,
                    show_sector_plots = show_sector_plots,
                    avgretplot   = avgretplot,               
                    periods      = periods,
                    quantiles    = quantiles,
                    bins         = bins,
                    filter_zscore = filter_zscore,
                    long_short   = long_short,
                    prices_cache = prices_cache)
('Processing ', 'EBITDA/REVENUE')
construct factor history
Running pipeline: 2016-01-01 00:00:00  -  2016-12-16 00:00:00
Running pipeline: 2016-12-17 00:00:00  -  2017-01-01 00:00:00
Get pricing for 1700 entries
Alphalens
Dropping inf or -inf values from factor
/usr/local/lib/python2.7/dist-packages/matplotlib/axes/_axes.py:2790: MatplotlibDeprecationWarning: Use of None object as fmt keyword argument to suppress plotting of data values is deprecated since 1.4; use the string "none" instead.
  warnings.warn(msg, mplDeprecation, stacklevel=1)
Quantiles Statistics
min max mean std count count %
factor_quantile
1 -288351.823529 0.082101 -39.665808 1381.942147 73131 20.027057
2 0.058076 0.153581 0.110678 0.021790 72982 19.986253
3 0.135011 0.234913 0.182246 0.024284 72985 19.987074
4 0.216088 0.370235 0.285728 0.039049 72982 19.986253
5 0.340614 337.399626 0.621769 2.628090 73081 20.013364
Returns Analysis
1 10 50 90
Ann. alpha 0.303 0.146 0.110 0.573
beta -0.562 -0.582 -0.565 -2.014
Mean Period Wise Return Top Quantile (bps) -0.469 -4.496 -3.679 5.890
Mean Period Wise Return Bottom Quantile (bps) -0.554 7.978 75.122 126.021
Mean Period Wise Spread (bps) 0.093 -1.148 -1.530 -1.269
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:727: FutureWarning: pd.rolling_apply is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,min_periods=1,window=10).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:767: FutureWarning: pd.rolling_apply is deprecated for DataFrame and will be removed in a future version, replace with 
	DataFrame.rolling(center=False,min_periods=1,window=10).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:727: FutureWarning: pd.rolling_apply is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,min_periods=1,window=50).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:767: FutureWarning: pd.rolling_apply is deprecated for DataFrame and will be removed in a future version, replace with 
	DataFrame.rolling(center=False,min_periods=1,window=50).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:727: FutureWarning: pd.rolling_apply is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,min_periods=1,window=90).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:767: FutureWarning: pd.rolling_apply is deprecated for DataFrame and will be removed in a future version, replace with 
	DataFrame.rolling(center=False,min_periods=1,window=90).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:519: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(window=22,center=False).mean()
  pd.rolling_mean(mean_returns_spread_bps, 22).plot(color='orangered',
Information Analysis
1 10 50 90
IC Mean 0.007 0.008 0.000 0.006
IC Std. 0.131 0.128 0.072 0.060
t-stat(IC) 0.863 1.057 0.003 1.482
p-value(IC) 0.389 0.292 0.997 0.140
IC Skew -0.030 0.081 0.190 0.383
IC Kurtosis -0.505 -0.597 -0.681 -0.167
Ann. IR 0.861 1.055 0.003 1.479
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:215: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(window=22,center=False).mean()
  pd.rolling_mean(ic, 22).plot(ax=a,
Turnover Analysis
1 10 50 90
Quantile 1 Mean Turnover 0.007 0.054 0.219 0.323
Quantile 2 Mean Turnover 0.011 0.086 0.299 0.407
Quantile 3 Mean Turnover 0.011 0.085 0.308 0.424
Quantile 4 Mean Turnover 0.009 0.067 0.251 0.354
Quantile 5 Mean Turnover 0.005 0.034 0.137 0.202
1 10 50 90
Mean Factor Rank Autocorrelation 1.0 0.991 0.912 0.838
<matplotlib.figure.Figure at 0x7f8e09474090>