import numpy as np
import pandas as pd
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns
from quantopian.pipeline.classifiers.fundamentals import Sector
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data.factset import Fundamentals
from quantopian.pipeline.data import Fundamentals, factset
from quantopian.pipeline.data import EquityPricing
from time import time
import alphalens as al
MORNINGSTAR_SECTOR_CODES = {
-1: 'Misc',
101: 'Basic Materials',
102: 'Consumer Cyclical',
103: 'Financial Services',
104: 'Real Estate',
205: 'Consumer Defensive',
206: 'Healthcare',
207: 'Utilities',
308: 'Communication Services',
309: 'Energy',
310: 'Industrials',
311: 'Technology' ,
}
sector = Sector()
qtu = QTradableStocksUS()
universe = qtu
my_screen = universe
#training period 1
start = '2007-01-01'
end = '2010-12-31'
end_plus_one = '2011-02-01'
#training period 2
#start = '2011-01-01'
#end = '2014-12-31'
#end_plus_one = '2015-02-01'
#training period 3
#start = '2015-01-01'
#end = '2018-12-31'
#end_plus_one = '2019-02-01'
#OOS
#start = '2004-01-01'
#end = '2006-12-31'
class MyYrGr(CustomFactor):
window_length = 1
def compute(self, today, assets, out, f, g):
out[:] = (g[0]/f[0])
my_factor = MyYrGr(mask=universe, inputs = [factset.Fundamentals.mkt_val_public, factset.Fundamentals.free_cf_minus_div_ltm])
pipe = Pipeline(
columns = {
'MyFactor': my_factor,
'Sector' : Sector()
},
screen=universe & my_factor.notnull() & my_factor.notnan() & my_factor.isfinite()
)
start_timer = time()
results = run_pipeline(pipe, start, end)
results.head()
results = results.dropna()
end_timer = time()
print "Time to run pipeline %.2f secs" % (end_timer - start_timer)
print "Total records: %d" % (results.shape[0])
results.MyFactor.value_counts()
my_factor = results['MyFactor']
print my_factor.head()
sectors = results['Sector']
asset_list = results.index.levels[1]
prices = get_pricing(asset_list, start_date=start, end_date=end_plus_one, fields='open_price')
prices.head()
periods = (1, 5, 10, 15, 21)
factor_data = al.utils.get_clean_factor_and_forward_returns(factor=my_factor,
prices=prices,
groupby=sectors,
groupby_labels=MORNINGSTAR_SECTOR_CODES,
periods=periods, max_loss=10000)
factor_data.head()
al.tears.create_returns_tear_sheet(factor_data, by_group=True)