#Important Python Modules
import numpy as np  
import pandas as pd
from scipy import stats

# Pipeline essential imports
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline

#Pipeline stock universe Import
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data.builtin import USEquityPricing

#Pipeline Factors & Alphalens import
from quantopian.pipeline.factors import CustomFactor, Returns, SimpleMovingAverage
import alphalens as al

#Factor imports
from quantopian.pipeline.data.factset.estimates import Actuals, PeriodicConsensus,LongTermConsensus
import quantopian.pipeline.data.factset.estimates as fe

#Global Pipeline parameters
USE_SECTORS = True
PIPE_NORMALIZE = True
WMIN = 0.01     #If using winsorize function instead of clip() 
WMAX = 0.99     #, WMIN global parameter is also used in clip()

#universe
coverage_filter = # secret
myfilter = coverage_filter

universe = QTradableStocksUS() & myfilter

#Period of Measurement
start=pd.Timestamp('2007-01-02')  
end=pd.Timestamp('2019-06-30')
from datetime import datetime, timedelta
startstr = datetime.strftime(start, "%Y-%m-%d")
endstr = datetime.strftime(end, "%Y-%m-%d")

#################################################################################################################
# Helpful Tools for standardizing and cleaning the data

# CustomFactor to fill zscored factors containing NaN with 0 value.
class NanToNum(CustomFactor):
    window_length = 1
    def compute(self, today, assets, out, factor):
        out[:] = np.nan_to_num(factor[-1])
        
        
def clip(data, threshold=WMIN, drop=False):
    data = pd.Series(data)
    data_notnull = data[data.notnull()]
    if data_notnull.shape[0] > 0:
        low_cutoff = data_notnull.quantile(threshold)
        high_cutoff = data_notnull.quantile(1 - threshold)
        if not drop:
            data = data.clip(lower=low_cutoff, upper=high_cutoff).values
        else:
            data = data[(data < low_cutoff) | (data > high_cutoff)]
    
    return data
 
    
def standardize(data, winsorize=True, sectors=None, threshold=0.025):
    data = pd.Series(data)
    if winsorize:
        data = clip(data, threshold=threshold)    
    
    # Prepare the data
    dfData = pd.DataFrame({'data': data})
    if USE_SECTORS and sectors is not None:
        dfData['sector'] = sectors
    else:
        dfData['sector'] = ''
    
    # Standardize the data
    zscore = lambda x: (x - x.mean()) / (x.std() == 0 and 1 or x.std())
    data = dfData.groupby(['sector'])['data'].transform(zscore)
    
    return data


def normalize(data, demean=False):
    data = pd.Series(data)
    if demean:
        data = data - data.mean()
        
    denom = data.abs().sum()
    if denom == 0:
        denom = 1
    
    return data / denom        

#################################################################################################################


#factor formulation 

#secret sauce
#secret sauce
#secret sauce

#################################################################################################################

def make_pipeline():
    
    earnings_matter = Earnings_Estimates_JL()

    
    return Pipeline(
        columns={
            'earnings_matter':earnings_matter,          
        } ,
        screen=universe
    )

result = run_pipeline(make_pipeline(), start_date=start, end_date=end)

#################################################################################################################

#Computing Forward Returns
assets = result.index.levels[1]
pricing = get_pricing(assets, start, end + pd.Timedelta(days=250), fields="open_price")

factor_returns = al.utils.get_clean_factor_and_forward_returns(result['earnings_matter'], pricing,
                                                                    periods=[1,2,3,4,5],
                                                                    max_loss=1.)
#Alphalens Full Tearsheet Output
al.tears.create_full_tear_sheet(factor_returns)

Dropped 0.2% entries from factor data: 0.2% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 100.0%, not exceeded: OK!
Quantiles Statistics

Returns Analysis

<matplotlib.figure.Figure at 0x7fa8264f3ef0>

Information Analysis

Turnover Analysis

	min	max	mean	std	count	count %
factor_quantile
1	-9.626377	0.238692	-1.810795	2.108387	944738	20.026141
2	-0.525064	0.552004	0.195493	0.141978	942892	19.987010
3	0.037152	0.760762	0.404815	0.105656	943131	19.992076
4	0.141705	1.035046	0.519373	0.102276	942622	19.981287
5	0.390006	6.628924	1.149123	0.751316	944141	20.013486

	1D	2D	3D	4D	5D
Ann. alpha	0.085	0.072	0.066	0.061	0.058
beta	-0.153	-0.163	-0.171	-0.173	-0.175
Mean Period Wise Return Top Quantile (bps)	2.070	1.584	1.278	1.066	0.919
Mean Period Wise Return Bottom Quantile (bps)	-2.005	-1.491	-1.231	-1.081	-0.988
Mean Period Wise Spread (bps)	4.076	3.113	2.562	2.207	1.969

	1D	2D	3D	4D	5D
IC Mean	0.015	0.017	0.018	0.018	0.018
IC Std.	0.091	0.096	0.098	0.099	0.099
Risk-Adjusted IC	0.169	0.182	0.184	0.183	0.182
t-stat(IC)	9.451	10.196	10.301	10.240	10.215
p-value(IC)	0.000	0.000	0.000	0.000	0.000
IC Skew	-0.199	-0.270	-0.313	-0.371	-0.394
IC Kurtosis	1.340	1.302	1.404	1.326	1.245

	1D	2D	3D	4D	5D
Quantile 1 Mean Turnover	0.047	0.084	0.116	0.146	0.173
Quantile 2 Mean Turnover	0.093	0.159	0.214	0.261	0.303
Quantile 3 Mean Turnover	0.104	0.174	0.231	0.280	0.320
Quantile 4 Mean Turnover	0.093	0.156	0.207	0.251	0.288
Quantile 5 Mean Turnover	0.052	0.090	0.124	0.154	0.182