Notebook

Define Helper Functions

In [1]:
#https://www.quantopian.com/posts/new-video-learn-from-the-experts-ep-2-fast-iterative-factor-development-with-kyle
from quantopian.research import run_pipeline

import quantopian.optimize as opt
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import CustomFactor
from quantopian.pipeline.data.builtin import EquityPricing
from quantopian.pipeline.data.factset import RBICSFocus
from quantopian.pipeline.data.factset.estimates import Actuals, PeriodicConsensus
from quantopian.pipeline.data.factset import Fundamentals as ff
from quantopian.pipeline.data.morningstar import Fundamentals as mf

from quantopian.pipeline.filters import QTradableStocksUS
from zipline.utils.tradingcalendar import trading_day

import numpy as np
import pandas as pd

# Pipeline parameters
USE_SECTORS = True
PIPE_NORMALIZE = False
In [2]:
def clip(data, threshold=0.025, drop=False):
    data = pd.Series(data)
    data_notnull = data[data.notnull()]
    if data_notnull.shape[0] > 0:
        low_cutoff = data_notnull.quantile(threshold)
        high_cutoff = data_notnull.quantile(1 - threshold)
        if not drop:
            data = data.clip(lower=low_cutoff, upper=high_cutoff).values
        else:
            data = data[(data < low_cutoff) | (data > high_cutoff)]
    
    return data
 
    
def standardize(data, winsorize=True, sectors=None, threshold=0.025):
    data = pd.Series(data)
    if winsorize:
        data = clip(data, threshold=threshold)    
    
    # Prepare the data
    dfData = pd.DataFrame({'data': data})
    if USE_SECTORS and sectors is not None:
        dfData['sector'] = sectors
    else:
        dfData['sector'] = ''
    
    # Standardize the data
    zscore = lambda x: (x - x.mean()) / (x.std() == 0 and 1 or x.std())
    data = dfData.groupby(['sector'])['data'].transform(zscore)
    
    return data


def normalize(data, demean=False):
    data = pd.Series(data)
    if demean:
        data = data - data.mean()
        
    denom = data.abs().sum()
    if denom == 0:
        denom = 1
    
    return data / denom

Define Factors

In [6]:
class Current(CustomFactor):
    factor = mf.current_ratio
        
    inputs = [factor, RBICSFocus().l2_name]
    window_length = 1
    window_safe = True

    def compute(self, today, assets, out, factor, sectors):
        # Calculate surprise
        factor = factor[-1, :]
        match = factor
        match[factor > 5] = np.nan
        match = match - 1 # current ratio > 1 is welcomed

        # Replace inf w/ NaN
        match[np.isinf(match)] = np.nan

        # Standardize the data
        match = standardize(match, sectors=sectors.as_string_array()[-1, :])

        # Normalize the data (NOTE: only include if looking at factor individually)
        if PIPE_NORMALIZE:
            match = normalize(match)

        out[:] = match

Define Pipelines

In [7]:
def make_factors():
    factors = {}

    factors['CurrentGrowth'] = Current

    return factors

# Define the universe
universe = QTradableStocksUS()

def factor_pipeline(universe):
    all_factors = make_factors()
    
    factors = {a: all_factors[a]() for a in all_factors}
    
    pipe = Pipeline(columns=factors, screen=universe)
    
    return pipe

Run Pipelines

In [8]:
start_date = '2014-01-04'
end_date = '2016-08-29'

factor_pipe = run_pipeline(factor_pipeline(universe), start_date=start_date, end_date=end_date).dropna(how='all')

Pipeline Execution Time: 32.20 Seconds
In [9]:
# factor_pipe[factor_pipe['PEGrowth'] > 10].head()
factor_pipe.head()
Out[9]:
CurrentGrowth
2014-01-06 00:00:00+00:00 Equity(2 [HWM]) -1.040209
Equity(24 [AAPL]) -0.449510
Equity(39 [DDC]) 1.891932
Equity(41 [ARCB]) -0.392664
Equity(52 [ABM]) -0.114905
In [10]:
factor_pipe.hist();
In [11]:
factor_pipe.fillna(0).corr()  # Filling NaNs with 0 assumes empty values are the mean (z-score/rank of 0)
# factor_pipe.corr()  # Drops NaNs; results are not much different
Out[11]:
CurrentGrowth
CurrentGrowth 1.0

Get Alpha Factors

In [12]:
# Get the alphas
alphas = factor_pipe.copy()

# Replace infs and NaNs
alphas[np.isinf(alphas)] = np.nan
alphas.fillna(0, inplace=True)

Get Prices

In [13]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
In [14]:
# Get pricing data (extends 6 months to minimize dropping in Alphalens)
new_start_date = (datetime.strptime(start_date, '%Y-%m-%d') - relativedelta(months=6)).strftime('%Y-%m-%d')
new_end_date = (datetime.strptime(end_date, '%Y-%m-%d') + relativedelta(months=6)).strftime('%Y-%m-%d')
assets = factor_pipe.reset_index()['level_1'].unique()
dates = factor_pipe.reset_index()['level_0'].unique()
prices = get_pricing(assets, start_date=new_start_date, end_date=new_end_date, fields='close_price')

Analyze Alpha via Alphalens

In [15]:
import alphalens as al
from scipy import stats
In [16]:
def get_ic_table(ic_data):
    ic_summary_table = pd.DataFrame()
    ic_summary_table["IC Mean"] = ic_data.mean()
    ic_summary_table["IC Std."] = ic_data.std()
    ic_summary_table["Risk-Adjusted IC"] = \
        ic_data.mean() / ic_data.std()
    t_stat, p_value = stats.ttest_1samp(ic_data, 0)
    ic_summary_table["t-stat(IC)"] = t_stat
    ic_summary_table["p-value(IC)"] = p_value
    ic_summary_table["IC Skew"] = stats.skew(ic_data)
    ic_summary_table["IC Kurtosis"] = stats.kurtosis(ic_data)

    return ic_summary_table.apply(lambda x: x.round(3)).T
In [17]:
results = None
for i, col in enumerate(sorted(alphas.columns)):
    if i > 0:
        print('')
    print(col)
    
    # Get the factor data
    data = alphas[col]
    data = data[data != 0].dropna()
    try:
        factor_data = al.utils.get_clean_factor_and_forward_returns(data,
                                                                    prices,
                                                                    quantiles=None,
                                                                    bins=(-np.inf, 0, np.inf),
                                                                    periods=[1, 3, 7, 14],
                                                                    max_loss=1.
                                                                   )

        # Output the results
        # al.tears.create_full_tear_sheet(factor_data)
        # al.tears.create_returns_tear_sheet(factor_data)
        ic = al.performance.factor_information_coefficient(factor_data)
        ic = get_ic_table(ic)
        
        ic.columns = pd.MultiIndex.from_product([[col], ic.columns])
        if results is None:
            results = ic
        else:
            results = pd.concat([results, ic], axis=1)
            
        
    except Exception as e:
        print('Error: {}'.format(e))
        continue
CurrentGrowth
Dropped 0.3% entries from factor data: 0.3% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 100.0%, not exceeded: OK!
In [18]:
temp = None
i = 0
unique_vals = results.columns.get_level_values(0).unique()
for j, factor in enumerate(sorted(unique_vals)):
    i += 1
    res = results.xs(factor, axis=1, level=0, drop_level=False)
    
    if temp is None:
        temp = res
    else:
        temp = pd.concat([temp, res], axis=1)
        
    if i > 4 or j == len(unique_vals) - 1:
        display(temp)
        temp = None
        i = 0
CurrentGrowth
1D 3D 7D 14D
IC Mean -0.005 -0.008 -0.012 -0.017
IC Std. 0.061 0.059 0.059 0.058
Risk-Adjusted IC -0.081 -0.133 -0.200 -0.300
t-stat(IC) -2.090 -3.443 -5.166 -7.747
p-value(IC) 0.037 0.001 0.000 0.000
IC Skew 0.073 0.093 0.233 0.012
IC Kurtosis -0.281 -0.065 -0.005 -0.416

Analyze Alpha via Thomas Wiecki's Notebook

In [19]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import empyrical as ep
import alphalens as al
import pyfolio as pf

from quantopian.research.experimental import get_factor_returns, get_factor_loadings
In [20]:
# Load risk factor loadings and returns
factor_loadings = get_factor_loadings(assets, start_date, new_end_date)
factor_returns = get_factor_returns(start_date, new_end_date)


# Fix a bug in the risk returns
factor_returns.loc[factor_returns.value.idxmax(), 'value'] = 0
/venvs/py35/lib/python3.5/site-packages/pandas/core/indexing.py:132: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
/venvs/py35/lib/python3.5/site-packages/ipykernel_launcher.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
In [21]:
def calc_perf_attrib(portfolio_returns, portfolio_pos, factor_returns, factor_loadings):
    start = portfolio_returns.index[0]
    end = portfolio_returns.index[-1]
    factor_loadings.index = factor_loadings.index.set_names(['dt', 'ticker'])
    portfolio_pos.index = portfolio_pos.index.set_names(['dt'])
    
    portfolio_pos = portfolio_pos.drop('cash', axis=1)
    portfolio_pos.columns.name = 'ticker'
    portfolio_pos.columns = portfolio_pos.columns.astype('int')
    
    return ep.perf_attrib(
        portfolio_returns, 
        portfolio_pos.stack().dropna(),
        factor_returns.loc[start:end], 
        factor_loadings.loc[start:end])

def plot_exposures(risk_exposures, ax=None):
    rep = risk_exposures.stack().reset_index()
    rep.columns = ['dt', 'factor', 'exposure']
    sns.boxplot(x='exposure', y='factor', data=rep, orient='h', ax=ax, order=risk_exposures.columns[::-1])

def compute_turnover(df):
    return df.dropna().unstack().dropna(how='all').fillna(0).diff().abs().sum(1)

def get_max_median_position_concentration(expos):
    longs = expos.loc[expos > 0]
    shorts = expos.loc[expos < 0]

    return expos.groupby(level=0).quantile([.05, .25, .5, .75, .95]).unstack()

def compute_factor_stats(factor, pricing, factor_returns, factor_loadings, periods=range(1, 15), view=None):
    factor_data_total = al.utils.get_clean_factor_and_forward_returns(
        factor, 
        pricing,
        quantiles=None,
        bins=(-np.inf, 0, np.inf),
        periods=periods,
        cumulative_returns=False,
    )

    portfolio_returns_total = al.performance.factor_returns(factor_data_total)
    portfolio_returns_total.columns = portfolio_returns_total.columns.map(lambda x: int(x[:-1]))
    for i in portfolio_returns_total.columns:
        portfolio_returns_total[i] = portfolio_returns_total[i].shift(i)

    portfolio_returns_specific = pd.DataFrame(columns=portfolio_returns_total.columns, index=portfolio_returns_total.index)
    
    # closure
    def calc_perf_attrib_c(i, portfolio_returns_total=portfolio_returns_total, 
                           factor_data_total=factor_data_total, factor_returns=factor_returns, 
                           factor_loadings=factor_loadings):
        return calc_perf_attrib(portfolio_returns_total[i], 
                                factor_data_total['factor'].unstack().assign(cash=0).shift(i), 
                                factor_returns, factor_loadings)
    
    if view is None:
        perf_attrib = map(calc_perf_attrib_c, portfolio_returns_total.columns)
    else:
        perf_attrib = view.map_sync(calc_perf_attrib_c, portfolio_returns_total.columns)
        
    for i, pa in enumerate(perf_attrib):
        if i == 0:
            risk_exposures_portfolio = pa[0]
            perf_attribution = pa[1]
        portfolio_returns_specific[i + 1] = pa[1]['specific_returns']
    
    delay_sharpes_total = portfolio_returns_total.apply(ep.sharpe_ratio)
    delay_sharpes_specific = portfolio_returns_specific.apply(ep.sharpe_ratio)
    
    turnover = compute_turnover(factor)
    n_holdings = factor.groupby(level=0).count()
    perc_holdings = get_max_median_position_concentration(factor)
    
    return {'factor_data_total': factor_data_total, 
            'portfolio_returns_total': portfolio_returns_total,
            'portfolio_returns_specific': portfolio_returns_specific,
            'risk_exposures_portfolio': risk_exposures_portfolio,
            'perf_attribution': perf_attribution,
            'delay_sharpes_total': delay_sharpes_total,
            'delay_sharpes_specific': delay_sharpes_specific,
            'turnover': turnover,
            'n_holdings': n_holdings,
            'perc_holdings': perc_holdings,
    }

def plot_overview_tear_sheet(factor, pricing, factor_returns, factor_loadings, periods=range(1, 15), view=None):
    fig = plt.figure(figsize=(16, 16))
    gs = plt.GridSpec(5, 4)
    ax1 = plt.subplot(gs[0:2, 0:2])
    
    factor_stats = compute_factor_stats(factor, pricing, factor_returns, factor_loadings, periods=periods, view=view)
                         
    sharpes = pd.DataFrame({'specific': factor_stats['delay_sharpes_specific'], 
                  'total': factor_stats['delay_sharpes_total']})
#     display(sharpes)
    sharpes.plot.bar(ax=ax1)
    ax1.set(xlabel='delay', ylabel='IR')

    ax2a = plt.subplot(gs[0, 2:4])
    delay_cum_rets_total = factor_stats['portfolio_returns_total'][list(range(1, 5))].apply(ep.cum_returns)
    delay_cum_rets_total.plot(ax=ax2a)
    ax2a.set(title='Total returns', ylabel='Cumulative returns')
    
    ax2b = plt.subplot(gs[1, 2:4])
    delay_cum_rets_specific = factor_stats['portfolio_returns_specific'][list(range(1, 5))].apply(ep.cum_returns)
    delay_cum_rets_specific.plot(ax=ax2b)
    ax2b.set(title='Specific returns', ylabel='Cumulative returns')
    
    ax3 = plt.subplot(gs[2:4, 0:2])
    plot_exposures(factor_stats['risk_exposures_portfolio'].reindex(columns=factor_stats['perf_attribution'].columns), 
                   ax=ax3)

    ax4 = plt.subplot(gs[2:4, 2])
    ep.cum_returns_final(factor_stats['perf_attribution']).plot.barh(ax=ax4)
    ax4.set(xlabel='Cumulative returns')

    ax5 = plt.subplot(gs[2:4, 3], sharey=ax4)
    factor_stats['perf_attribution'].apply(ep.annual_volatility).plot.barh(ax=ax5)
    ax5.set(xlabel='Ann. volatility')

    ax6 = plt.subplot(gs[-1, 0:2])
    factor_stats['n_holdings'].plot(color='b', ax=ax6)
    ax6.set_ylabel('# holdings', color='b')
    ax6.tick_params(axis='y', labelcolor='b')
    
    ax62 = ax6.twinx()
    factor_stats['turnover'].plot(color='r', ax=ax62)
    ax62.set_ylabel('turnover', color='r')
    ax62.tick_params(axis='y', labelcolor='r')
    
    ax7 = plt.subplot(gs[-1, 2:4])
    factor_stats['perc_holdings'].plot(ax=ax7)
    ax7.set(ylabel='Long/short perc holdings')
    
    gs.tight_layout(fig)
    
    return fig, factor_stats, sharpes
In [22]:
# Loop through all columns
results = None
for i, col in enumerate(sorted(alphas.columns)):
    if i > 0:
        print('')
    print(col)
    
    # Get the factor data
    try:
        data = alphas[col]
        data = data[data != 0].dropna()
        fig, factor_stats, sharpes = plot_overview_tear_sheet(data,
                                                     prices,
                                                     factor_returns,
                                                     factor_loadings);
        plt.show()
        
        sharpes.columns = pd.MultiIndex.from_product([[col], sharpes.columns])
        if results is None:
            results = sharpes
        else:
            results = pd.concat([results, sharpes], axis=1)
        
    except Exception as e:
        print('Error: {}'.format(e))
        continue
        
# results
CurrentGrowth
Dropped 0.3% entries from factor data: 0.3% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!
In [23]:
temp = None
i = 0
unique_vals = results.columns.get_level_values(0).unique()
for j, factor in enumerate(unique_vals):
    i += 1
    res = results.xs(factor, axis=1, level=0, drop_level=False)
    
    if temp is None:
        temp = res
    else:
        temp = pd.concat([temp, res], axis=1)
        
    if i > 4 or j == len(unique_vals) - 1:
        display(temp)
        temp = None
        i = 0
CurrentGrowth
specific total
1 0.964261 -0.845589
2 1.009502 -0.917935
3 1.020810 -0.906553
4 1.027213 -0.878672
5 1.027792 -0.881584
6 0.990129 -0.944221
7 0.985334 -0.975286
8 0.964312 -0.989866
9 0.976267 -1.014535
10 0.986683 -0.979988
11 0.961968 -0.976929
12 0.979762 -0.964353
13 0.943348 -0.924788
14 0.920043 -0.916336
In [ ]: