from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline,symbols
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data import Fundamentals
from zipline.utils.tradingcalendar import trading_day
from quantopian.pipeline.data.builtin import USEquityPricing
import matplotlib as plt
import numpy as np
from quantopian.pipeline import CustomFactor

tstart = '2007-01-10'
tend = '2018-01-10'
tstartprevious = tstart[0:8] + '01'
tendplus = tend[0:8] + '20'
top_percentile = 10
bottom_percetile = 90

def make_pipeline():
    
    # Create basic indicators for each stock, such as its sector 
    # classification, country of domicile, last price and market_cap
    sector = Fundamentals.morningstar_sector_code.latest
    
    factor = Fundamentals.pb_ratio.latest
    factor = factor.zscore(mask=factor.percentile_between(0,95))
    
    Universe =  (QTradableStocksUS() & factor.notnull())
    
    
    #longs = factor.percentile_between(0, 25, mask=Universe)
    #shorts = factor.percentile_between(75,100, mask=Universe)

    return Pipeline(
            columns = { 'sector': sector,
                        'factor': factor,
                        #'longs': longs,
                        #'shorts': shorts,
                      },
            screen=Universe
            )

my_pipe = run_pipeline(make_pipeline(), tstart, tend)
#my_pipe.loc[:,'factor'] = np.log(my_pipe.factor)

my_pipe.head()

sector_labels = {101:'Basic_material',
                 102:'Consumer_Cyclical',
                 103:'Financial_Services',
                 104:'Real_state',
                 205:'Consumer_defensive',
                 206:'Healthcare',
                 207:'Utilities',
                 308:'Communication_services',
                 309:'Energy',
                 310:'Industrials',
                 311:'Technology'}

# Create a list with the total assets in the whole period.
assets = my_pipe.index.levels[1].unique()
len(assets)

3936

# Create a basic plot with the population of each sector and each quartile
#quart_sectors = my_pipe.groupby(['quartiles', 'sector']).agg(['count']);
#quart_sectors.columns = quart_sectors.columns.droplevel(0)
#quart_sectors = quart_sectors.reset_index()
#quart_sectors

#bottom_quart_sectors = quart_sectors.loc[quart_sectors['quartiles']==0];
#bottom_quart_sectors.loc[:,'norm'] = bottom_quart_sectors.loc[:,'count']*100.0/bottom_quart_sectors.loc[:,'count'].sum();
#bottom_quart_sectors['sector'] = bottom_quart_sectors['sector'].map(sector_labels);
#bottom_quart_sectors

# Then get the pricing for each of them.
pricing = get_pricing(assets,start_date=tstartprevious,end_date=tendplus,fields='open_price')

clean_df = my_pipe.copy();
clean_df.index = clean_df.index.droplevel(0)
clean_df = clean_df[~clean_df.index.duplicated(keep='first')];
assets_sectors = clean_df.set_index(clean_df.index)['sector'].to_dict()
len(assets_sectors)

3936

import alphalens
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(my_pipe['factor'],
                                                                   pricing,
                                                                   quantiles=10,
                                                                   periods=(1,10,22),
                                                                   #groupby=assets_sectors,
                                                                   #groupby_labels=sector_labels
                                                                  )

# Run analysis
alphalens.tears.create_full_tear_sheet(factor_data)

Quantiles Statistics

Returns Analysis

/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:727: FutureWarning: pd.rolling_apply is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,min_periods=1,window=22).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:767: FutureWarning: pd.rolling_apply is deprecated for DataFrame and will be removed in a future version, replace with 
	DataFrame.rolling(center=False,min_periods=1,window=22).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))

Information Analysis

Turnover Analysis

<matplotlib.figure.Figure at 0x7f2d11e80450>

mean_return_quantile_sector, mean_return_quantile_sector_err = alphalens.performance.mean_return_by_quantile(factor_data, by_group=True)

alphalens.plotting.plot_quantile_returns_bar(mean_return_quantile_sector, by_group=True);

import matplotlib.pyplot as plt
import numpy as np
aa = my_pipe.loc[my_pipe.index[0][0],:'factor']
np.histogram(aa, bins=30)

(array([70, 70, 70, 70, 69, 70, 70, 35,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0, 35, 70, 70, 69, 70, 70, 70, 70]),
 array([  1.00000000e+00,   7.07666667e+01,   1.40533333e+02,
          2.10300000e+02,   2.80066667e+02,   3.49833333e+02,
          4.19600000e+02,   4.89366667e+02,   5.59133333e+02,
          6.28900000e+02,   6.98666667e+02,   7.68433333e+02,
          8.38200000e+02,   9.07966667e+02,   9.77733333e+02,
          1.04750000e+03,   1.11726667e+03,   1.18703333e+03,
          1.25680000e+03,   1.32656667e+03,   1.39633333e+03,
          1.46610000e+03,   1.53586667e+03,   1.60563333e+03,
          1.67540000e+03,   1.74516667e+03,   1.81493333e+03,
          1.88470000e+03,   1.95446667e+03,   2.02423333e+03,
          2.09400000e+03]))

		factor	sector
2007-01-10 00:00:00+00:00	Equity(2 [ARNC])	-0.593090	101
	Equity(24 [AAPL])	1.793635	311
	Equity(31 [ABAX])	0.961492	206
	Equity(41 [ARCB])	-0.712120	310
	Equity(52 [ABM])	-0.463212	310

	min	max	mean	std	count	count %
factor_quantile
1	-1.022555	-0.618892	-0.785982	0.074751	507782	10.025966
2	-0.789805	-0.477684	-0.632441	0.049598	506404	9.998758
3	-0.641484	-0.345783	-0.506509	0.046429	505984	9.990465
4	-0.505049	-0.180041	-0.373857	0.050261	506458	9.999824
5	-0.366757	0.020758	-0.224298	0.060429	506638	10.003378
6	-0.249932	0.213550	-0.047471	0.074069	505779	9.986418
7	-0.083308	0.514869	0.176712	0.094049	506189	9.994513
8	0.157187	0.954636	0.491985	0.132252	506160	9.993940
9	0.496901	1.711422	1.043607	0.237447	506085	9.992460
10	1.204560	6.509518	2.611826	0.923450	507190	10.014277

	1	10	22
Ann. alpha	0.010	0.012	0.014
beta	-0.075	-0.101	-0.114
Mean Period Wise Return Top Quantile (bps)	0.620	6.200	14.448
Mean Period Wise Return Bottom Quantile (bps)	0.193	3.224	8.175
Mean Period Wise Spread (bps)	-0.047	-0.028	-0.003

	1	10	22
IC Mean	0.004	0.006	0.006
IC Std.	0.110	0.113	0.115
t-stat(IC)	1.745	2.672	2.777
p-value(IC)	0.081	0.008	0.006
IC Skew	-0.157	-0.320	-0.513
IC Kurtosis	0.161	0.085	0.295
Ann. IR	0.528	0.808	0.840

	1	10	22
Quantile 1 Mean Turnover	0.015	0.089	0.163
Quantile 2 Mean Turnover	0.028	0.161	0.294
Quantile 3 Mean Turnover	0.034	0.195	0.357
Quantile 4 Mean Turnover	0.036	0.205	0.375
Quantile 5 Mean Turnover	0.037	0.211	0.385
Quantile 6 Mean Turnover	0.036	0.207	0.380
Quantile 7 Mean Turnover	0.034	0.191	0.350
Quantile 8 Mean Turnover	0.029	0.169	0.311
Quantile 9 Mean Turnover	0.022	0.130	0.240
Quantile 10 Mean Turnover	0.013	0.079	0.149