Notebook
In [20]:
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline,symbols
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data import Fundamentals
from zipline.utils.tradingcalendar import trading_day
from quantopian.pipeline.data.builtin import USEquityPricing
import matplotlib as plt
import numpy as np
from quantopian.pipeline import CustomFactor
In [19]:
tstart = '2007-01-10'
tend = '2018-01-10'
tstartprevious = tstart[0:8] + '01'
tendplus = tend[0:8] + '20'
top_percentile = 10
bottom_percetile = 90
In [18]:
def make_pipeline():
    
    # Create basic indicators for each stock, such as its sector 
    # classification, country of domicile, last price and market_cap
    sector = Fundamentals.morningstar_sector_code.latest
    
    factor = Fundamentals.pb_ratio.latest
    factor = factor.zscore(mask=factor.percentile_between(0,95))
    
    Universe =  (QTradableStocksUS() & factor.notnull())
    
    
    #longs = factor.percentile_between(0, 25, mask=Universe)
    #shorts = factor.percentile_between(75,100, mask=Universe)

    return Pipeline(
            columns = { 'sector': sector,
                        'factor': factor,
                        #'longs': longs,
                        #'shorts': shorts,
                      },
            screen=Universe
            )
In [21]:
my_pipe = run_pipeline(make_pipeline(), tstart, tend)
#my_pipe.loc[:,'factor'] = np.log(my_pipe.factor)
In [22]:
my_pipe.head()
Out[22]:
factor sector
2007-01-10 00:00:00+00:00 Equity(2 [ARNC]) -0.593090 101
Equity(24 [AAPL]) 1.793635 311
Equity(31 [ABAX]) 0.961492 206
Equity(41 [ARCB]) -0.712120 310
Equity(52 [ABM]) -0.463212 310
In [23]:
sector_labels = {101:'Basic_material',
                 102:'Consumer_Cyclical',
                 103:'Financial_Services',
                 104:'Real_state',
                 205:'Consumer_defensive',
                 206:'Healthcare',
                 207:'Utilities',
                 308:'Communication_services',
                 309:'Energy',
                 310:'Industrials',
                 311:'Technology'}
In [24]:
# Create a list with the total assets in the whole period.
assets = my_pipe.index.levels[1].unique()
len(assets)
Out[24]:
3936
In [78]:
# Create a basic plot with the population of each sector and each quartile
#quart_sectors = my_pipe.groupby(['quartiles', 'sector']).agg(['count']);
#quart_sectors.columns = quart_sectors.columns.droplevel(0)
#quart_sectors = quart_sectors.reset_index()
#quart_sectors
In [79]:
#bottom_quart_sectors = quart_sectors.loc[quart_sectors['quartiles']==0];
#bottom_quart_sectors.loc[:,'norm'] = bottom_quart_sectors.loc[:,'count']*100.0/bottom_quart_sectors.loc[:,'count'].sum();
#bottom_quart_sectors['sector'] = bottom_quart_sectors['sector'].map(sector_labels);
#bottom_quart_sectors
In [25]:
# Then get the pricing for each of them.
pricing = get_pricing(assets,start_date=tstartprevious,end_date=tendplus,fields='open_price')
In [26]:
clean_df = my_pipe.copy();
clean_df.index = clean_df.index.droplevel(0)
clean_df = clean_df[~clean_df.index.duplicated(keep='first')];
assets_sectors = clean_df.set_index(clean_df.index)['sector'].to_dict()
len(assets_sectors)
Out[26]:
3936
In [29]:
import alphalens
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(my_pipe['factor'],
                                                                   pricing,
                                                                   quantiles=10,
                                                                   periods=(1,10,22),
                                                                   #groupby=assets_sectors,
                                                                   #groupby_labels=sector_labels
                                                                  )
In [30]:
# Run analysis
alphalens.tears.create_full_tear_sheet(factor_data)
Quantiles Statistics
min max mean std count count %
factor_quantile
1 -1.022555 -0.618892 -0.785982 0.074751 507782 10.025966
2 -0.789805 -0.477684 -0.632441 0.049598 506404 9.998758
3 -0.641484 -0.345783 -0.506509 0.046429 505984 9.990465
4 -0.505049 -0.180041 -0.373857 0.050261 506458 9.999824
5 -0.366757 0.020758 -0.224298 0.060429 506638 10.003378
6 -0.249932 0.213550 -0.047471 0.074069 505779 9.986418
7 -0.083308 0.514869 0.176712 0.094049 506189 9.994513
8 0.157187 0.954636 0.491985 0.132252 506160 9.993940
9 0.496901 1.711422 1.043607 0.237447 506085 9.992460
10 1.204560 6.509518 2.611826 0.923450 507190 10.014277
Returns Analysis
1 10 22
Ann. alpha 0.010 0.012 0.014
beta -0.075 -0.101 -0.114
Mean Period Wise Return Top Quantile (bps) 0.620 6.200 14.448
Mean Period Wise Return Bottom Quantile (bps) 0.193 3.224 8.175
Mean Period Wise Spread (bps) -0.047 -0.028 -0.003
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:727: FutureWarning: pd.rolling_apply is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,min_periods=1,window=22).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/usr/local/lib/python2.7/dist-packages/alphalens/plotting.py:767: FutureWarning: pd.rolling_apply is deprecated for DataFrame and will be removed in a future version, replace with 
	DataFrame.rolling(center=False,min_periods=1,window=22).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
Information Analysis
1 10 22
IC Mean 0.004 0.006 0.006
IC Std. 0.110 0.113 0.115
t-stat(IC) 1.745 2.672 2.777
p-value(IC) 0.081 0.008 0.006
IC Skew -0.157 -0.320 -0.513
IC Kurtosis 0.161 0.085 0.295
Ann. IR 0.528 0.808 0.840
Turnover Analysis
1 10 22
Quantile 1 Mean Turnover 0.015 0.089 0.163
Quantile 2 Mean Turnover 0.028 0.161 0.294
Quantile 3 Mean Turnover 0.034 0.195 0.357
Quantile 4 Mean Turnover 0.036 0.205 0.375
Quantile 5 Mean Turnover 0.037 0.211 0.385
Quantile 6 Mean Turnover 0.036 0.207 0.380
Quantile 7 Mean Turnover 0.034 0.191 0.350
Quantile 8 Mean Turnover 0.029 0.169 0.311
Quantile 9 Mean Turnover 0.022 0.130 0.240
Quantile 10 Mean Turnover 0.013 0.079 0.149
1 10 22
Mean Factor Rank Autocorrelation 0.999 0.992 0.983
<matplotlib.figure.Figure at 0x7f2d11e80450>
In [47]:
mean_return_quantile_sector, mean_return_quantile_sector_err = alphalens.performance.mean_return_by_quantile(factor_data, by_group=True)
In [48]:
alphalens.plotting.plot_quantile_returns_bar(mean_return_quantile_sector, by_group=True);
In [49]:
import matplotlib.pyplot as plt
import numpy as np
aa = my_pipe.loc[my_pipe.index[0][0],:'factor']
np.histogram(aa, bins=30)
Out[49]:
(array([70, 70, 70, 70, 69, 70, 70, 35,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0, 35, 70, 70, 69, 70, 70, 70, 70]),
 array([  1.00000000e+00,   7.07666667e+01,   1.40533333e+02,
          2.10300000e+02,   2.80066667e+02,   3.49833333e+02,
          4.19600000e+02,   4.89366667e+02,   5.59133333e+02,
          6.28900000e+02,   6.98666667e+02,   7.68433333e+02,
          8.38200000e+02,   9.07966667e+02,   9.77733333e+02,
          1.04750000e+03,   1.11726667e+03,   1.18703333e+03,
          1.25680000e+03,   1.32656667e+03,   1.39633333e+03,
          1.46610000e+03,   1.53586667e+03,   1.60563333e+03,
          1.67540000e+03,   1.74516667e+03,   1.81493333e+03,
          1.88470000e+03,   1.95446667e+03,   2.02423333e+03,
          2.09400000e+03]))