Notebook
In [1]:
# From https://www.quantopian.com/posts/relevant-fundamental-factors#5b9563549ad4a0004e03850d
    # Working Capital to Assets  
    # Retained Earnings to Assets  
    # EBITA to Assets  
    # Sales to Assets  
    # Mkt Cap to Liabilities

# Basically, valuation ratios are most commonly used when 'pricing' securities.
# - Present value models (Dividens / FCFE / FCFF), require to compute CAPM required return on equity and the growth rate
# - Multipliers models (P/E, P/S, P/B...)
# Other models to consider:
# - Altman z-score (credit risk)
# - Beinish M-score (reporting quality & account manipulations)
# - ROE decomposition 
In [2]:
import numpy as np
import pandas as pd
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns, AnnualizedVolatility
from quantopian.pipeline.classifiers.fundamentals import Sector  
from quantopian.pipeline.filters import QTradableStocksUS
from time import time
from quantopian.pipeline.data.psychsignal import stocktwits
from sklearn import preprocessing
In [3]:
import alphalens as al
In [4]:
MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}
In [5]:
def make_factors():
    
        class MessageSum(CustomFactor):
            inputs = [USEquityPricing.high, USEquityPricing.low, USEquityPricing.close, stocktwits.bull_scored_messages, stocktwits.bear_scored_messages, stocktwits.total_scanned_messages]
            window_length = 5
            def compute(self, today, assets, out, high, low, close, bull, bear, total):
                v = np.nansum((high-low)/close, axis=0)
                out[:] = preprocess(v*np.nansum(total*(bear-bull), axis=0))
                
        class fcf(CustomFactor):
            inputs = [Fundamentals.fcf_yield]
            window_length = 1
            def compute(self, today, assets, out, fcf_yield):
                out[:] = preprocess(np.nan_to_num(fcf_yield[-1,:]))
                
        class Direction(CustomFactor):
            inputs = [USEquityPricing.open, USEquityPricing.close]
            window_length = 21
            def compute(self, today, assets, out, open, close):
                p = (close-open)/close
                out[:] = preprocess(np.nansum(-p,axis=0))
                
        class mean_rev(CustomFactor):   
            inputs = [USEquityPricing.high,USEquityPricing.low,USEquityPricing.close]
            window_length = 30
            def compute(self, today, assets, out, high, low, close):
            
                p = (high+low+close)/3

                m = len(close[0,:])
                n = len(close[:,0])
                
                b = np.zeros(m)
                a = np.zeros(m)
                
                for k in range(10,n+1):
                    price_rel = np.nanmean(p[-k:,:],axis=0)/p[-1,:]
                    wt = np.nansum(price_rel)
                    b += wt*price_rel
                    price_rel = 1.0/price_rel
                    wt = np.nansum(price_rel)
                    a += wt*price_rel
                
                out[:] = preprocess(b-a)
                
        class volatility(CustomFactor):
            inputs = [USEquityPricing.high, USEquityPricing.low, USEquityPricing.close, USEquityPricing.volume]
            window_length = 5
            def compute(self, today, assets, out, high, low, close, volume):
                vol = np.nansum(volume,axis=0)*np.nansum(np.absolute((high-low)/close),axis=0)
                out[:] = preprocess(-vol)
                
        class growthscore(CustomFactor):
            inputs = [Fundamentals.growth_score]
            window_length = 1
            def compute(self, today, assets, out, growth_score):
                out[:] = preprocess(growth_score[-1,:])
                
        class peg_ratio(CustomFactor):
            inputs = [Fundamentals.peg_ratio]
            window_length = 1
            def compute(self, today, assets, out, peg_ratio):
                out[:] = preprocess(-1.0/peg_ratio[-1,:])
                                                    
        return {
            'MessageSum':              MessageSum,
            'FCF':                     fcf,
            'Direction':               Direction,
            'mean_rev':                mean_rev,
            'volatility':              volatility,
            'GrowthScore':             growthscore,
            'PegRatio':                peg_ratio,
        }
In [6]:
def preprocess(a):
    
    a = np.nan_to_num(a - np.nanmean(a))

    return preprocessing.scale(a)
In [7]:
universe = QTradableStocksUS()

factors = make_factors()

combined_alpha = None

for name, f in factors.iteritems():
        if combined_alpha == None:
            combined_alpha = f(mask=universe)
        else:
            combined_alpha = combined_alpha+f(mask=universe)

pipe = Pipeline(
    columns = {
            'CombinedAlpha' : combined_alpha,
            'Sector' : Sector()
    },
    screen=universe
)

start_timer = time()
results = run_pipeline(pipe, '2016-08-29', '2018-08-29')
end_timer = time()
results.fillna(value=0);

print "Time to run pipeline %.2f secs" % (end_timer - start_timer)
/usr/local/lib/python2.7/dist-packages/sklearn/preprocessing/data.py:153: UserWarning: Numerical issues were encountered when centering the data and might not be solved. Dataset may contain too large values. You may need to prescale your features.
  warnings.warn("Numerical issues were encountered "
Time to run pipeline 67.52 secs
In [8]:
my_factor = results['CombinedAlpha']
sectors = results['Sector']
asset_list = results.index.levels[1].unique()
prices = get_pricing(asset_list, start_date='2016-08-29', end_date='2018-09-07', fields='open_price')
periods = (1,3,5,10,21)

factor_data = al.utils.get_clean_factor_and_forward_returns(factor=my_factor,
                                                            prices=prices,
                                                            groupby=sectors,
                                                            groupby_labels=MORNINGSTAR_SECTOR_CODES,
                                                            periods=periods,
                                                            quantiles = 5)
Dropped 3.2% entries from factor data: 3.2% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!
In [9]:
mean_return_by_q_daily, std_err_by_q_daily = al.performance.mean_return_by_quantile(factor_data,
                                                                                    by_date=True)
mean_return_by_q, std_err_by_q = al.performance.mean_return_by_quantile(factor_data,
                                                                        by_group=False)
ic = al.performance.factor_information_coefficient(factor_data)
In [10]:
al.tears.create_information_tear_sheet(factor_data)
Information Analysis
1D 3D 5D 10D 21D
IC Mean 0.015 0.023 0.030 0.036 0.042
IC Std. 0.094 0.095 0.094 0.089 0.072
Risk-Adjusted IC 0.163 0.245 0.318 0.406 0.575
t-stat(IC) 3.618 5.418 7.037 8.988 12.736
p-value(IC) 0.000 0.000 0.000 0.000 0.000
IC Skew 0.193 0.313 0.266 0.010 0.047
IC Kurtosis 0.529 0.446 0.504 0.951 0.570
<matplotlib.figure.Figure at 0x7fb933618fd0>
In [ ]:
al.tears.create_full_tear_sheet(factor_data, by_group=True);
Quantiles Statistics
min max mean std count count %
factor_quantile
1 -92.165794 -1.032725 -3.483037 4.498830 206883 20.018772
2 -1.688123 0.043333 -0.724548 0.326554 206586 19.990033
3 -0.475302 0.908461 0.194615 0.259933 206601 19.991485
4 0.351961 1.973381 1.080428 0.311077 206586 19.990033
5 1.215583 46.541406 2.935476 1.501595 206789 20.009676
Returns Analysis
1D 3D 5D 10D 21D
Ann. alpha 0.097 0.080 0.076 0.073 0.067
beta 0.035 0.036 0.046 0.023 -0.056
Mean Period Wise Return Top Quantile (bps) 2.907 2.833 2.969 2.596 2.029
Mean Period Wise Return Bottom Quantile (bps) -3.245 -2.977 -2.990 -2.814 -2.186
Mean Period Wise Spread (bps) 6.155 5.822 5.975 5.421 4.220
<matplotlib.figure.Figure at 0x7fb91d0234d0>
Information Analysis
1D 3D 5D 10D 21D
IC Mean 0.015 0.023 0.030 0.036 0.042
IC Std. 0.094 0.095 0.094 0.089 0.072
Risk-Adjusted IC 0.163 0.245 0.318 0.406 0.575
t-stat(IC) 3.618 5.418 7.037 8.988 12.736
p-value(IC) 0.000 0.000 0.000 0.000 0.000
IC Skew 0.193 0.313 0.266 0.010 0.047
IC Kurtosis 0.529 0.446 0.504 0.951 0.570