Notebook
In [39]:
import numpy as np
import pandas as pd
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns
from quantopian.pipeline.classifiers.fundamentals import Sector  
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data.factset import Fundamentals
from quantopian.pipeline.data import Fundamentals, factset
from quantopian.pipeline.data import EquityPricing
from time import time
import alphalens as al
In [40]:
MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}
In [41]:
sector = Sector()
qtu = QTradableStocksUS()

universe = qtu
my_screen = universe

#training period 1
start = '2007-01-01'
end = '2010-12-31'
end_plus_one = '2011-02-01'

#training period 2
#start = '2011-01-01'
#end = '2014-12-31'
#end_plus_one = '2015-02-01'

#training period 3
#start = '2015-01-01'
#end = '2018-12-31'
#end_plus_one = '2019-02-01'

#OOS
#start = '2004-01-01'
#end = '2006-12-31'

class MyYrGr(CustomFactor):  
    window_length = 1        
    def compute(self, today, assets, out, f, g):               
        out[:] = (g[0]/f[0])

my_factor = MyYrGr(mask=universe, inputs = [factset.Fundamentals.mkt_val_public, factset.Fundamentals.free_cf_minus_div_ltm])

pipe = Pipeline(
    columns = {
            'MyFactor': my_factor,
            'Sector' : Sector()
    },
    screen=universe & my_factor.notnull() & my_factor.notnan() & my_factor.isfinite()
)

start_timer = time()
results = run_pipeline(pipe, start, end)
results.head()
results = results.dropna()
end_timer = time()
print "Time to run pipeline %.2f secs" % (end_timer - start_timer)
print "Total records: %d" % (results.shape[0])

Pipeline Execution Time: 5.01 Seconds
Time to run pipeline 9.10 secs
Total records: 1606192
In [42]:
results.MyFactor.value_counts()
Out[42]:
-0.031767    1008
 0.006773     792
 0.074000     428
-0.091912     416
 0.028280     390
-0.271626     379
-0.066846     378
 0.036296     361
 0.025722     360
 0.055325     356
 0.040920     327
-0.025274     327
 0.092110     313
 0.144509     311
 0.054707     291
 0.023855     291
 0.005981     291
 0.035163     289
-0.099059     289
 0.105354     276
 0.080265     273
-0.289901     273
 0.244153     269
 0.057068     268
 0.260290     268
 0.007907     266
 0.026530     265
 0.042424     264
 0.018284     263
 0.190700     263
             ... 
-0.043526       1
 0.093191       1
 0.090963       1
-0.168936       1
 0.062391       1
 0.060585       1
 0.121860       1
-0.080099       1
 0.049578       1
 0.089287       1
 0.085771       1
 0.093830       1
 0.040248       1
 0.006260       1
-0.018481       1
 0.036606       1
 0.097674       1
-0.109621       1
-0.181616       1
-0.058633       1
 0.016405       1
 0.038738       1
-0.132131       1
-0.083609       1
-0.011222       1
-0.150029       1
-0.015161       1
 0.059664       1
-0.122806       1
 0.063494       1
Name: MyFactor, dtype: int64
In [43]:
my_factor = results['MyFactor']
print my_factor.head()
sectors = results['Sector']
asset_list = results.index.levels[1]
prices = get_pricing(asset_list, start_date=start, end_date=end_plus_one, fields='open_price')
prices.head()
periods = (1, 5, 10, 15, 21)
factor_data = al.utils.get_clean_factor_and_forward_returns(factor=my_factor,
                                                            prices=prices,
                                                            groupby=sectors,
                                                            groupby_labels=MORNINGSTAR_SECTOR_CODES,
                                                            periods=periods, max_loss=10000)
2007-01-03 00:00:00+00:00  Equity(2 [ARNC])    -0.039194
                           Equity(24 [AAPL])    0.021701
                           Equity(31 [ABAX])    0.026965
                           Equity(41 [ARCB])    0.027571
                           Equity(52 [ABM])     0.093850
Name: MyFactor, dtype: float64
Dropped 0.2% entries from factor data: 0.2% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 1000000.0%, not exceeded: OK!
In [44]:
factor_data.head()
Out[44]:
1D 5D 10D 15D 21D factor group factor_quantile
date asset
2007-01-03 00:00:00+00:00 Equity(2 [ARNC]) -0.022938 -0.014300 0.006678 0.064893 0.095634 -0.039194 Basic Materials 1
Equity(24 [AAPL]) -0.024568 0.098041 0.067331 0.009514 -0.024916 0.021701 Technology 3
Equity(31 [ABAX]) 0.011923 -0.005702 -0.014515 -0.084500 0.141006 0.026965 Healthcare 3
Equity(41 [ARCB]) 0.045160 0.079410 0.084340 0.049828 0.067361 0.027571 Industrials 3
Equity(52 [ABM]) 0.008709 0.009580 0.083749 0.070589 0.145629 0.093850 Industrials 5
In [45]:
al.tears.create_returns_tear_sheet(factor_data, by_group=True)
Returns Analysis
1D 5D 10D 15D 21D
Ann. alpha 0.018 0.022 0.019 0.016 0.008
beta -0.073 -0.124 -0.135 -0.150 -0.188
Mean Period Wise Return Top Quantile (bps) 0.629 0.359 0.233 0.127 0.057
Mean Period Wise Return Bottom Quantile (bps) -1.047 -0.911 -0.844 -0.776 -0.718
Mean Period Wise Spread (bps) 1.676 1.316 1.123 0.951 0.830
<matplotlib.figure.Figure at 0x7fb382e4b6a0>