Notebook
In [1]:
import numpy as np
import pandas as pd
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns
from quantopian.pipeline.classifiers.fundamentals import Sector  
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data.factset import RBICSFocus
from quantopian.pipeline.data import Fundamentals, factset
from quantopian.pipeline.data import EquityPricing
from time import time
import alphalens as al
In [2]:
MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}
In [3]:
sector = Sector()
qtu = QTradableStocksUS()

universe = qtu
my_screen = universe

#training period 1
start = '2007-01-01'
end = '2010-12-31'
end_plus_one = '2011-02-01'

#training period 2
#start = '2011-01-01'
#end = '2014-12-31'
#end_plus_one = '2015-02-01'

#training period 3
#start = '2015-01-01'
#end = '2018-12-31'
#end_plus_one = '2019-02-01'

#OOS
#start = '2004-01-01'
#end = '2006-12-31'

class MyYrGr(CustomFactor):  
    window_length = 252        
    def compute(self, today, assets, out, f):               
        out[:] = (f[-1]-f[-252])/np.abs(f[-252])

my_factor = MyYrGr(mask=universe, inputs = [factset.Fundamentals.roa_qf])

pipe = Pipeline(
    columns = {
            'MyFactor': my_factor,
            'Sector' : Sector()
    },
    screen=universe
)

start_timer = time()
results = run_pipeline(pipe, start, end)
results.head()
results = results.dropna()
end_timer = time()
print "Time to run pipeline %.2f secs" % (end_timer - start_timer)
print "Total records: %d" % (results.shape[0])

Pipeline Execution Time: 24.73 Seconds
Time to run pipeline 30.40 secs
Total records: 1711259
In [5]:
results.MyFactor.value_counts()
Out[5]:
 0.000000    12875
 0.099342      251
-1.117525      220
-0.852280      180
 1.266227      178
 1.080020      168
-0.061047      159
-0.127308      157
-1.656194      156
-4.003126      147
 0.770990      133
 0.084922      128
-0.097315      127
-0.201390      112
-0.309818      111
-2.372270      101
-0.443967      100
-0.290898       97
-0.069712       92
 0.313819       92
-0.237585       90
 0.170296       88
 0.110671       87
-0.242561       86
-0.148031       85
-0.218530       84
 0.227736       83
 0.117629       82
-1.985829       82
-0.305470       82
             ...  
 0.893734        1
 6.364171        1
 0.085718        1
 0.002958        1
 1.119427        1
 0.041909        1
 0.172901        1
-0.017053        1
-0.047203        1
 0.639357        1
-0.022633        1
 0.942759        1
 9.342926        1
 0.022150        1
 0.195549        1
 0.757203        1
 4.279369        1
 0.013093        1
 0.123518        1
 0.256099        1
 0.465529        1
-1.490446        1
-0.193349        1
 0.733606        1
-0.097225        1
 0.286726        1
-0.176666        1
 0.482137        1
-0.299311        1
 0.536649        1
Name: MyFactor, dtype: int64
In [6]:
my_factor = results['MyFactor']
print my_factor.head()
sectors = results['Sector']
asset_list = results.index.levels[1]
prices = get_pricing(asset_list, start_date=start, end_date=end_plus_one, fields='open_price')
prices.head()
periods = (1, 5, 10)
factor_data = al.utils.get_clean_factor_and_forward_returns(factor=my_factor,
                                                            prices=prices,
                                                            groupby=sectors,
                                                            groupby_labels=MORNINGSTAR_SECTOR_CODES,
                                                            periods=periods, max_loss=10000)
2007-01-03 00:00:00+00:00  Equity(2 [ARNC])     0.611309
                           Equity(24 [AAPL])    0.014596
                           Equity(31 [ABAX])    0.183914
                           Equity(41 [ARCB])   -0.143121
                           Equity(52 [ABM])     0.543047
Name: MyFactor, dtype: float64
Dropped 0.1% entries from factor data: 0.1% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 1000000.0%, not exceeded: OK!
In [7]:
factor_data.head()
Out[7]:
1D 5D 10D factor group factor_quantile
date asset
2007-01-03 00:00:00+00:00 Equity(2 [ARNC]) -0.022938 -0.014300 0.006678 0.611309 Basic Materials 5
Equity(24 [AAPL]) -0.024568 0.098041 0.067331 0.014596 Technology 3
Equity(31 [ABAX]) 0.011923 -0.005702 -0.014515 0.183914 Healthcare 4
Equity(41 [ARCB]) 0.045160 0.079410 0.084340 -0.143121 Industrials 2
Equity(52 [ABM]) 0.008709 0.009580 0.083749 0.543047 Industrials 5
In [8]:
al.tears.create_returns_tear_sheet(factor_data, by_group=True)
Returns Analysis
1D 5D 10D
Ann. alpha -0.056 -0.028 -0.006
beta -0.068 -0.066 -0.081
Mean Period Wise Return Top Quantile (bps) -0.413 -0.569 -0.675
Mean Period Wise Return Bottom Quantile (bps) 0.967 1.038 1.150
Mean Period Wise Spread (bps) -1.380 -1.529 -1.730
<matplotlib.figure.Figure at 0x7fba13adc7f0>