#Important Python Modules
import numpy as np
import pandas as pd
from scipy import stats
# Pipeline essential imports
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
#Pipeline stock universe Import
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data.builtin import USEquityPricing
#Pipeline Factors & Alphalens import
from quantopian.pipeline.factors import CustomFactor, Returns, SimpleMovingAverage
import alphalens as al
#Factor imports
from quantopian.pipeline.data.factset.estimates import Actuals, PeriodicConsensus,LongTermConsensus
import quantopian.pipeline.data.factset.estimates as fe
#Global Pipeline parameters
USE_SECTORS = True
PIPE_NORMALIZE = True
WMIN = 0.01 #If using winsorize function instead of clip()
WMAX = 0.99 #, WMIN global parameter is also used in clip()
#universe
coverage_filter = # secret
myfilter = coverage_filter
universe = QTradableStocksUS() & myfilter
#Period of Measurement
start=pd.Timestamp('2007-01-02')
end=pd.Timestamp('2019-06-30')
from datetime import datetime, timedelta
startstr = datetime.strftime(start, "%Y-%m-%d")
endstr = datetime.strftime(end, "%Y-%m-%d")
#################################################################################################################
# Helpful Tools for standardizing and cleaning the data
# CustomFactor to fill zscored factors containing NaN with 0 value.
class NanToNum(CustomFactor):
window_length = 1
def compute(self, today, assets, out, factor):
out[:] = np.nan_to_num(factor[-1])
def clip(data, threshold=WMIN, drop=False):
data = pd.Series(data)
data_notnull = data[data.notnull()]
if data_notnull.shape[0] > 0:
low_cutoff = data_notnull.quantile(threshold)
high_cutoff = data_notnull.quantile(1 - threshold)
if not drop:
data = data.clip(lower=low_cutoff, upper=high_cutoff).values
else:
data = data[(data < low_cutoff) | (data > high_cutoff)]
return data
def standardize(data, winsorize=True, sectors=None, threshold=0.025):
data = pd.Series(data)
if winsorize:
data = clip(data, threshold=threshold)
# Prepare the data
dfData = pd.DataFrame({'data': data})
if USE_SECTORS and sectors is not None:
dfData['sector'] = sectors
else:
dfData['sector'] = ''
# Standardize the data
zscore = lambda x: (x - x.mean()) / (x.std() == 0 and 1 or x.std())
data = dfData.groupby(['sector'])['data'].transform(zscore)
return data
def normalize(data, demean=False):
data = pd.Series(data)
if demean:
data = data - data.mean()
denom = data.abs().sum()
if denom == 0:
denom = 1
return data / denom
#################################################################################################################
#factor formulation
#secret sauce
#secret sauce
#secret sauce
#################################################################################################################
def make_pipeline():
earnings_matter = Earnings_Estimates_JL()
return Pipeline(
columns={
'earnings_matter':earnings_matter,
} ,
screen=universe
)
result = run_pipeline(make_pipeline(), start_date=start, end_date=end)
#################################################################################################################
#Computing Forward Returns
assets = result.index.levels[1]
pricing = get_pricing(assets, start, end + pd.Timedelta(days=250), fields="open_price")
factor_returns = al.utils.get_clean_factor_and_forward_returns(result['earnings_matter'], pricing,
periods=[1,2,3,4,5],
max_loss=1.)
#Alphalens Full Tearsheet Output
al.tears.create_full_tear_sheet(factor_returns)