# From https://www.quantopian.com/posts/relevant-fundamental-factors#5b9563549ad4a0004e03850d
    # Working Capital to Assets  
    # Retained Earnings to Assets  
    # EBITA to Assets  
    # Sales to Assets  
    # Mkt Cap to Liabilities

# Basically, valuation ratios are most commonly used when 'pricing' securities.
# - Present value models (Dividens / FCFE / FCFF), require to compute CAPM required return on equity and the growth rate
# - Multipliers models (P/E, P/S, P/B...)
# Other models to consider:
# - Altman z-score (credit risk)
# - Beinish M-score (reporting quality & account manipulations)
# - ROE decomposition

import numpy as np
import pandas as pd
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns, AnnualizedVolatility
from quantopian.pipeline.classifiers.fundamentals import Sector  
from quantopian.pipeline.filters import QTradableStocksUS
from time import time
from quantopian.pipeline.data.psychsignal import stocktwits
from sklearn import preprocessing

import alphalens as al

MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}

def make_factors():
    
        class MessageSum(CustomFactor):
            inputs = [USEquityPricing.high, USEquityPricing.low, USEquityPricing.close, stocktwits.bull_scored_messages, stocktwits.bear_scored_messages, stocktwits.total_scanned_messages]
            window_length = 5
            def compute(self, today, assets, out, high, low, close, bull, bear, total):
                v = np.nansum((high-low)/close, axis=0)
                out[:] = preprocess(v*np.nansum(total*(bear-bull), axis=0))
                
        class fcf(CustomFactor):
            inputs = [Fundamentals.fcf_yield]
            window_length = 1
            def compute(self, today, assets, out, fcf_yield):
                out[:] = preprocess(np.nan_to_num(fcf_yield[-1,:]))
                
        class Direction(CustomFactor):
            inputs = [USEquityPricing.open, USEquityPricing.close]
            window_length = 21
            def compute(self, today, assets, out, open, close):
                p = (close-open)/close
                out[:] = preprocess(np.nansum(-p,axis=0))
                
        class mean_rev(CustomFactor):   
            inputs = [USEquityPricing.high,USEquityPricing.low,USEquityPricing.close]
            window_length = 30
            def compute(self, today, assets, out, high, low, close):
            
                p = (high+low+close)/3

                m = len(close[0,:])
                n = len(close[:,0])
                
                b = np.zeros(m)
                a = np.zeros(m)
                
                for k in range(10,n+1):
                    price_rel = np.nanmean(p[-k:,:],axis=0)/p[-1,:]
                    wt = np.nansum(price_rel)
                    b += wt*price_rel
                    price_rel = 1.0/price_rel
                    wt = np.nansum(price_rel)
                    a += wt*price_rel
                
                out[:] = preprocess(b-a)
                
        class volatility(CustomFactor):
            inputs = [USEquityPricing.high, USEquityPricing.low, USEquityPricing.close, USEquityPricing.volume]
            window_length = 5
            def compute(self, today, assets, out, high, low, close, volume):
                vol = np.nansum(volume,axis=0)*np.nansum(np.absolute((high-low)/close),axis=0)
                out[:] = preprocess(-vol)
                
        class growthscore(CustomFactor):
            inputs = [Fundamentals.growth_score]
            window_length = 1
            def compute(self, today, assets, out, growth_score):
                out[:] = preprocess(growth_score[-1,:])
                
        class peg_ratio(CustomFactor):
            inputs = [Fundamentals.peg_ratio]
            window_length = 1
            def compute(self, today, assets, out, peg_ratio):
                out[:] = preprocess(-1.0/peg_ratio[-1,:])
                                                    
        return {
            'MessageSum':              MessageSum,
            'FCF':                     fcf,
            'Direction':               Direction,
            'mean_rev':                mean_rev,
            'volatility':              volatility,
            'GrowthScore':             growthscore,
            'PegRatio':                peg_ratio,
        }

def preprocess(a):
    
    a = np.nan_to_num(a - np.nanmean(a))

    return preprocessing.scale(a)

universe = QTradableStocksUS()

factors = make_factors()

combined_alpha = None

for name, f in factors.iteritems():
        if combined_alpha == None:
            combined_alpha = f(mask=universe)
        else:
            combined_alpha = combined_alpha+f(mask=universe)

pipe = Pipeline(
    columns = {
            'CombinedAlpha' : combined_alpha,
            'Sector' : Sector()
    },
    screen=universe
)

start_timer = time()
results = run_pipeline(pipe, '2016-08-29', '2018-08-29')
end_timer = time()
results.fillna(value=0);

print "Time to run pipeline %.2f secs" % (end_timer - start_timer)

/usr/local/lib/python2.7/dist-packages/sklearn/preprocessing/data.py:153: UserWarning: Numerical issues were encountered when centering the data and might not be solved. Dataset may contain too large values. You may need to prescale your features.
  warnings.warn("Numerical issues were encountered "

Time to run pipeline 67.52 secs

my_factor = results['CombinedAlpha']
sectors = results['Sector']
asset_list = results.index.levels[1].unique()
prices = get_pricing(asset_list, start_date='2016-08-29', end_date='2018-09-07', fields='open_price')
periods = (1,3,5,10,21)

factor_data = al.utils.get_clean_factor_and_forward_returns(factor=my_factor,
                                                            prices=prices,
                                                            groupby=sectors,
                                                            groupby_labels=MORNINGSTAR_SECTOR_CODES,
                                                            periods=periods,
                                                            quantiles = 5)

Dropped 3.2% entries from factor data: 3.2% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!

mean_return_by_q_daily, std_err_by_q_daily = al.performance.mean_return_by_quantile(factor_data,
                                                                                    by_date=True)
mean_return_by_q, std_err_by_q = al.performance.mean_return_by_quantile(factor_data,
                                                                        by_group=False)
ic = al.performance.factor_information_coefficient(factor_data)

al.tears.create_information_tear_sheet(factor_data)

Information Analysis

<matplotlib.figure.Figure at 0x7fb933618fd0>

al.tears.create_full_tear_sheet(factor_data, by_group=True);

Quantiles Statistics

Returns Analysis

<matplotlib.figure.Figure at 0x7fb91d0234d0>

Information Analysis

	1D	3D	5D	10D	21D
IC Mean	0.015	0.023	0.030	0.036	0.042
IC Std.	0.094	0.095	0.094	0.089	0.072
Risk-Adjusted IC	0.163	0.245	0.318	0.406	0.575
t-stat(IC)	3.618	5.418	7.037	8.988	12.736
p-value(IC)	0.000	0.000	0.000	0.000	0.000
IC Skew	0.193	0.313	0.266	0.010	0.047
IC Kurtosis	0.529	0.446	0.504	0.951	0.570

	min	max	mean	std	count	count %
factor_quantile
1	-92.165794	-1.032725	-3.483037	4.498830	206883	20.018772
2	-1.688123	0.043333	-0.724548	0.326554	206586	19.990033
3	-0.475302	0.908461	0.194615	0.259933	206601	19.991485
4	0.351961	1.973381	1.080428	0.311077	206586	19.990033
5	1.215583	46.541406	2.935476	1.501595	206789	20.009676

	1D	3D	5D	10D	21D
Ann. alpha	0.097	0.080	0.076	0.073	0.067
beta	0.035	0.036	0.046	0.023	-0.056
Mean Period Wise Return Top Quantile (bps)	2.907	2.833	2.969	2.596	2.029
Mean Period Wise Return Bottom Quantile (bps)	-3.245	-2.977	-2.990	-2.814	-2.186
Mean Period Wise Spread (bps)	6.155	5.822	5.975	5.421	4.220

	1D	3D	5D	10D	21D
IC Mean	0.015	0.023	0.030	0.036	0.042
IC Std.	0.094	0.095	0.094	0.089	0.072
Risk-Adjusted IC	0.163	0.245	0.318	0.406	0.575
t-stat(IC)	3.618	5.418	7.037	8.988	12.736
p-value(IC)	0.000	0.000	0.000	0.000	0.000
IC Skew	0.193	0.313	0.266	0.010	0.047
IC Kurtosis	0.529	0.446	0.504	0.951	0.570