Notebook
In [10]:
#Important Python Modules
import numpy as np  
import pandas as pd
from scipy import stats

# Pipeline essential imports
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline

#Pipeline stock universe Import
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data.builtin import USEquityPricing

#Pipeline Factors & Alphalens import
from quantopian.pipeline.factors import CustomFactor, Returns, SimpleMovingAverage
import alphalens as al

#Factor imports
from quantopian.pipeline.data.factset.estimates import Actuals, PeriodicConsensus,LongTermConsensus
import quantopian.pipeline.data.factset.estimates as fe

#Global Pipeline parameters
USE_SECTORS = True
PIPE_NORMALIZE = True
WMIN = 0.01     #If using winsorize function instead of clip() 
WMAX = 0.99     #, WMIN global parameter is also used in clip()

#universe
coverage_filter = # secret
myfilter = coverage_filter

universe = QTradableStocksUS() & myfilter

#Period of Measurement
start=pd.Timestamp('2007-01-02')  
end=pd.Timestamp('2019-06-30')
from datetime import datetime, timedelta
startstr = datetime.strftime(start, "%Y-%m-%d")
endstr = datetime.strftime(end, "%Y-%m-%d")

#################################################################################################################
# Helpful Tools for standardizing and cleaning the data

# CustomFactor to fill zscored factors containing NaN with 0 value.
class NanToNum(CustomFactor):
    window_length = 1
    def compute(self, today, assets, out, factor):
        out[:] = np.nan_to_num(factor[-1])
        
        
def clip(data, threshold=WMIN, drop=False):
    data = pd.Series(data)
    data_notnull = data[data.notnull()]
    if data_notnull.shape[0] > 0:
        low_cutoff = data_notnull.quantile(threshold)
        high_cutoff = data_notnull.quantile(1 - threshold)
        if not drop:
            data = data.clip(lower=low_cutoff, upper=high_cutoff).values
        else:
            data = data[(data < low_cutoff) | (data > high_cutoff)]
    
    return data
 
    
def standardize(data, winsorize=True, sectors=None, threshold=0.025):
    data = pd.Series(data)
    if winsorize:
        data = clip(data, threshold=threshold)    
    
    # Prepare the data
    dfData = pd.DataFrame({'data': data})
    if USE_SECTORS and sectors is not None:
        dfData['sector'] = sectors
    else:
        dfData['sector'] = ''
    
    # Standardize the data
    zscore = lambda x: (x - x.mean()) / (x.std() == 0 and 1 or x.std())
    data = dfData.groupby(['sector'])['data'].transform(zscore)
    
    return data


def normalize(data, demean=False):
    data = pd.Series(data)
    if demean:
        data = data - data.mean()
        
    denom = data.abs().sum()
    if denom == 0:
        denom = 1
    
    return data / denom        

#################################################################################################################


#factor formulation 

#secret sauce
#secret sauce
#secret sauce

#################################################################################################################

def make_pipeline():
    
    earnings_matter = Earnings_Estimates_JL()

    
    return Pipeline(
        columns={
            'earnings_matter':earnings_matter,          
        } ,
        screen=universe
    )

result = run_pipeline(make_pipeline(), start_date=start, end_date=end)

#################################################################################################################

#Computing Forward Returns
assets = result.index.levels[1]
pricing = get_pricing(assets, start, end + pd.Timedelta(days=250), fields="open_price")

factor_returns = al.utils.get_clean_factor_and_forward_returns(result['earnings_matter'], pricing,
                                                                    periods=[1,2,3,4,5],
                                                                    max_loss=1.)
#Alphalens Full Tearsheet Output
al.tears.create_full_tear_sheet(factor_returns)

Pipeline Execution Time: 2 Minutes, 26.39 Seconds
Dropped 0.2% entries from factor data: 0.2% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 100.0%, not exceeded: OK!
Quantiles Statistics
min max mean std count count %
factor_quantile
1 -9.626377 0.238692 -1.810795 2.108387 944738 20.026141
2 -0.525064 0.552004 0.195493 0.141978 942892 19.987010
3 0.037152 0.760762 0.404815 0.105656 943131 19.992076
4 0.141705 1.035046 0.519373 0.102276 942622 19.981287
5 0.390006 6.628924 1.149123 0.751316 944141 20.013486
Returns Analysis
1D 2D 3D 4D 5D
Ann. alpha 0.085 0.072 0.066 0.061 0.058
beta -0.153 -0.163 -0.171 -0.173 -0.175
Mean Period Wise Return Top Quantile (bps) 2.070 1.584 1.278 1.066 0.919
Mean Period Wise Return Bottom Quantile (bps) -2.005 -1.491 -1.231 -1.081 -0.988
Mean Period Wise Spread (bps) 4.076 3.113 2.562 2.207 1.969
<matplotlib.figure.Figure at 0x7fa8264f3ef0>
Information Analysis
1D 2D 3D 4D 5D
IC Mean 0.015 0.017 0.018 0.018 0.018
IC Std. 0.091 0.096 0.098 0.099 0.099
Risk-Adjusted IC 0.169 0.182 0.184 0.183 0.182
t-stat(IC) 9.451 10.196 10.301 10.240 10.215
p-value(IC) 0.000 0.000 0.000 0.000 0.000
IC Skew -0.199 -0.270 -0.313 -0.371 -0.394
IC Kurtosis 1.340 1.302 1.404 1.326 1.245
Turnover Analysis
1D 2D 3D 4D 5D
Quantile 1 Mean Turnover 0.047 0.084 0.116 0.146 0.173
Quantile 2 Mean Turnover 0.093 0.159 0.214 0.261 0.303
Quantile 3 Mean Turnover 0.104 0.174 0.231 0.280 0.320
Quantile 4 Mean Turnover 0.093 0.156 0.207 0.251 0.288
Quantile 5 Mean Turnover 0.052 0.090 0.124 0.154 0.182
1D 2D 3D 4D 5D
Mean Factor Rank Autocorrelation 0.973 0.947 0.921 0.896 0.873
In [ ]:
    
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: