Notebook

International Factor Research - Alphalens Example

In [1]:
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.pipeline.data import EquityPricing, factset
from quantopian.pipeline.factors import Returns, SimpleMovingAverage
from quantopian.pipeline.domain import (
    AT_EQUITIES, # Austria
    AU_EQUITIES, # Australia
    BE_EQUITIES, # Belgium
    BR_EQUITIES, # Brazil
    CA_EQUITIES, # Canada
    CH_EQUITIES, # Switzerland
    CN_EQUITIES, # China
    DE_EQUITIES, # Germany
    DK_EQUITIES, # Denmark
    ES_EQUITIES, # Spain
    FI_EQUITIES, # Finland
    FR_EQUITIES, # France
    GB_EQUITIES, # Great Britain
    HK_EQUITIES, # Hong Kong
    IE_EQUITIES, # Ireland
    IN_EQUITIES, # India
    IT_EQUITIES, # Italy
    JP_EQUITIES, # Japan
    KR_EQUITIES, # South Korea
    NL_EQUITIES, # Netherlands
    NO_EQUITIES, # Norway
    NZ_EQUITIES, # New Zealand
    PT_EQUITIES, # Portugal
    SE_EQUITIES, # Sweden
    SG_EQUITIES, # Singapore
    US_EQUITIES, # United States
)
from quantopian.research import run_pipeline

import pandas as pd
import numpy as np

import time

The below helper function makes it easier to get Alphalens-formatted factor and returns data given a pipeline factor, a domain, and date bounds.

In [5]:
def evaluate_factor(factor, 
                    domain, 
                    start_date, 
                    end_date,
                    factor_screen=None,
                    quantiles=5,
                    returns_lengths=(1, 5, 10)):
    """Analyze a Pipeline Factor using Alphalens.
    
    Parameters
    ----------
    factor : quantopian.pipeline.factors.Factor
        Factor producing scores to be evaluated.
    domain : quantopian.pipeline.domain.Domain
        Domain on which the factor should be evaluated.
    start_date : str or pd.Timestamp
        Start date for evaluation period.
    end_date : str or pd.Timestamp
        End date for evaluation period.
    standardize : 
    factor_screen : quantopian.pipeline.filters.Filter, optional
        Filter defining which assets ``factor`` should be evaluated on.
        Default is ``factor.notnull()``.
    quantiles : int, optional
        Number of buckets to use for quantile groups. Default is 5
    returns_lengths : sequence[int]
        Forward-returns horizons to use when evaluating ``factor``. 
        Default is 1-day, 5-day, and 10-day returns.
        
    Returns
    -------
    factor_data : pd.DataFrame
        A (date, asset)-indexed DataFrame with the following columns:
            'factor': float64
                Values produced by ``factor``.
            'factor_quantiles': int64
                Daily quantile label for each
    """
    calendar = domain.calendar
    # Roll input dates to the next trading session.
    start_date = calendar.minute_to_session_label(pd.Timestamp(start_date, tz='UTC'))
    end_date = calendar.minute_to_session_label(pd.Timestamp(end_date, tz='UTC'))
    
    if factor_screen is None:
        factor_screen = factor.notnull()
        
    # Run pipeline to get factor values and quantiles.
    display('Getting factor values...')
    factor_pipe = Pipeline(
        {'factor': factor, 
         'factor_quantile': factor.quantiles(quantiles, mask=factor_screen)},
        screen=factor_screen,
        domain=domain,
    )
    factor_results = run_pipeline(factor_pipe, start_date, end_date, chunksize=250)
    
    column_order = []
    returns_cols = {}
    for length in returns_lengths:
        colname = '{}D'.format(length)
        column_order.append(colname)
        # Add 1 because "1-day" returns needs 2 price observations.
        returns_cols[colname] = Returns(window_length=length + 1)
    returns_pipe = Pipeline(returns_cols, domain=domain)
    
    # Compute returns for the period after the factor pipeline, then 
    # shift the results back to align with our factor values.
    display('Getting forward returns values...')
    returns_start_date = start_date
    returns_end_date = end_date + domain.calendar.day * max(returns_lengths)
    raw_returns = run_pipeline(returns_pipe, returns_start_date, returns_end_date, chunksize=500)
    
    shifted_returns = {}
    for name, length in zip(column_order, returns_lengths):
        # Shift 1-day returns back by a day, 5-day returns back by 5 days, etc.
        raw = raw_returns[name]
        shifted_returns[name] = backshift_returns_series(raw, length)
        
    # Merge backshifted returns into a single frame indexed like our desired output.
    display('Merging factor values with forward returns...')
    merged_returns = pd.DataFrame(
        data=shifted_returns, 
        index=factor_results.index, 
        columns=column_order,
    )
    
    # Concat factor results and forward returns column-wise.
    merged = pd.concat([factor_results, merged_returns], axis=1)
    merged.index.set_names(['date', 'asset'], inplace=True)
    
    # Drop NaNs
    merged = merged.dropna(how='any')
    
    # Add a Business Day Offset to the DateTimeIndex
    merged.index.levels[0].freq = pd.tseries.offsets.BDay()
    
    display('Complete')

    return merged

def backshift_returns_series(series, N):
    """Shift a multi-indexed series backwards by N observations in the first level.
    
    This can be used to convert backward-looking returns into a forward-returns series.
    """
    ix = series.index
    dates, sids = ix.levels
    date_labels, sid_labels = map(np.array, ix.labels)
    # Output date labels will contain the all but the last N dates.
    new_dates = dates[:-N]
    # Output data will remove the first M rows, where M is the index of the
    # last record with one of the first N dates.
    cutoff = date_labels.searchsorted(N)
    new_date_labels = date_labels[cutoff:] - N
    new_sid_labels = sid_labels[cutoff:]
    new_values = series.values[cutoff:]
    assert new_date_labels[0] == 0
    new_index = pd.MultiIndex(
        levels=[new_dates, sids],
        labels=[new_date_labels, new_sid_labels],
        sortorder=1,
        names=ix.names,
    )
    return pd.Series(data=new_values, index=new_index)

def backshift_returns_series(series, N):
    """Shift a multi-indexed series backwards by N observations in the first level.
    
    This can be used to convert backward-looking returns into a forward-returns series.
    """
    ix = series.index
    dates, sids = ix.levels
    date_labels, sid_labels = map(np.array, ix.labels)

    # Output date labels will contain the all but the last N dates.
    new_dates = dates[:-N]

    # Output data will remove the first M rows, where M is the index of the
    # last record with one of the first N dates.
    cutoff = date_labels.searchsorted(N)
    new_date_labels = date_labels[cutoff:] - N
    new_sid_labels = sid_labels[cutoff:]
    new_values = series.values[cutoff:]

    assert new_date_labels[0] == 0

    new_index = pd.MultiIndex(
        levels=[new_dates, sids],
        labels=[new_date_labels, new_sid_labels],
        sortorder=1,
        names=ix.names,
    )

    return pd.Series(data=new_values, index=new_index)

Enter your Alpha factor, universe filter, domain, and date range below

Create the factor one wants to test and associated filter. Specify domain and date range to run Alphalens

In [6]:
# Our alpha factor.
from quantopian.pipeline.data.factset import Fundamentals

class Momentum(CustomFactor):
    # Default inputs
    inputs = [EquityPricing.close]

    # Compute momentum
    def compute(self, today, assets, out, close):
        out[:] = close[-1] / close[0]
        
momentum_1m = Momentum(window_length=22)
momentum_6m = Momentum(window_length=132)    
momentum_2_6m = momentum_6m/momentum_1m

earningyield = Fundamentals.earn_yld_af.latest
high_ey = earningyield.percentile_between(50, 100)

price_book = Fundamentals.pbk_af.latest
low_pb = price_book.percentile_between(0, 50)

roic = Fundamentals.roic_af.latest
high_roic = roic.percentile_between(50, 100)

market_cap = Fundamentals.mkt_val_public.latest
small_cap = market_cap.percentile_between(0,50)

volume = EquityPricing.volume.latest 

erp5= high_ey & low_pb & high_roic

# Below is the factor to pass to Alphalens to analyze.
# This must be a factor which returns a numerical number proportional to alpha for each security
# It cannot be a filter
my_factor = earningyield

# Below is the filter used by Alphalens.
my_filter = (volume > 50000) & small_cap

# Date range to run Alphalens
start_date = '2007-1-1'
end_date = '2019-1-1'

# Specify the domain to use for data
my_domain = JP_EQUITIES
In [7]:
# Call evaluate_factor on our factor to get Alphalens-formatted data.
al_data = evaluate_factor(
    my_factor, 
    my_domain, 
    start_date, 
    end_date, 
    factor_screen=my_filter,
)
'Getting factor values...'

Pipeline Execution Time: 5.18 Seconds
'Getting forward returns values...'

Pipeline Execution Time: 4.58 Seconds
'Merging factor values with forward returns...'
'Complete'
In [8]:
# Import Alphalens and run our factor data through a tear sheet.
from alphalens.tears import create_full_tear_sheet

create_full_tear_sheet(al_data)
Quantiles Statistics
min max mean std count count %
factor_quantile
0 -4216.60000 0.313653 -74.598365 183.280643 184258 20.114360
1 -123.91500 3.970870 -6.178155 16.330174 182652 19.939043
2 -40.45190 8.010640 2.131513 5.256618 182636 19.937296
3 -12.57530 15.738700 6.896749 2.146551 182671 19.941117
4 4.10714 1111.710000 18.585270 35.651435 183835 20.068184
Returns Analysis
1D 5D 10D
Ann. alpha -0.036 -0.019 0.006
beta -0.094 -0.140 -0.147
Mean Period Wise Return Top Quantile (bps) -1.552 -0.077 0.436
Mean Period Wise Return Bottom Quantile (bps) 4.619 1.112 0.264
Mean Period Wise Spread (bps) -6.171 -0.984 0.399
<matplotlib.figure.Figure at 0x7f08c25b9dd8>
Information Analysis
1D 5D 10D
IC Mean 0.027 0.037 0.046
IC Std. 0.082 0.095 0.100
Risk-Adjusted IC 0.328 0.388 0.462
t-stat(IC) 17.803 21.071 25.041
p-value(IC) 0.000 0.000 0.000
IC Skew -0.039 0.059 0.080
IC Kurtosis 0.246 0.046 0.011
/venvs/py35/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j
Turnover Analysis
10D 1D 5D
Quantile 1 Mean Turnover 0.317 0.212 0.282
Quantile 2 Mean Turnover 0.364 0.244 0.325
Quantile 3 Mean Turnover 0.399 0.272 0.359
Quantile 4 Mean Turnover 0.371 0.254 0.337
1D 5D 10D
Mean Factor Rank Autocorrelation 0.997 0.989 0.978
In [ ]: