from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline,symbols
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data import Fundamentals
from zipline.utils.tradingcalendar import trading_day
from quantopian.pipeline.data.builtin import USEquityPricing
import matplotlib as plt
import numpy as np
from quantopian.pipeline import CustomFactor
tstart = '2007-01-10'
tend = '2018-01-10'
tstartprevious = tstart[0:8] + '01'
tendplus = tend[0:8] + '20'
top_percentile = 10
bottom_percetile = 90
def make_pipeline():
# Create basic indicators for each stock, such as its sector
# classification, country of domicile, last price and market_cap
sector = Fundamentals.morningstar_sector_code.latest
factor = Fundamentals.pb_ratio.latest
factor = factor.zscore(mask=factor.percentile_between(0,95))
Universe = (QTradableStocksUS() & factor.notnull())
#longs = factor.percentile_between(0, 25, mask=Universe)
#shorts = factor.percentile_between(75,100, mask=Universe)
return Pipeline(
columns = { 'sector': sector,
'factor': factor,
#'longs': longs,
#'shorts': shorts,
},
screen=Universe
)
my_pipe = run_pipeline(make_pipeline(), tstart, tend)
#my_pipe.loc[:,'factor'] = np.log(my_pipe.factor)
my_pipe.head()
sector_labels = {101:'Basic_material',
102:'Consumer_Cyclical',
103:'Financial_Services',
104:'Real_state',
205:'Consumer_defensive',
206:'Healthcare',
207:'Utilities',
308:'Communication_services',
309:'Energy',
310:'Industrials',
311:'Technology'}
# Create a list with the total assets in the whole period.
assets = my_pipe.index.levels[1].unique()
len(assets)
# Create a basic plot with the population of each sector and each quartile
#quart_sectors = my_pipe.groupby(['quartiles', 'sector']).agg(['count']);
#quart_sectors.columns = quart_sectors.columns.droplevel(0)
#quart_sectors = quart_sectors.reset_index()
#quart_sectors
#bottom_quart_sectors = quart_sectors.loc[quart_sectors['quartiles']==0];
#bottom_quart_sectors.loc[:,'norm'] = bottom_quart_sectors.loc[:,'count']*100.0/bottom_quart_sectors.loc[:,'count'].sum();
#bottom_quart_sectors['sector'] = bottom_quart_sectors['sector'].map(sector_labels);
#bottom_quart_sectors
# Then get the pricing for each of them.
pricing = get_pricing(assets,start_date=tstartprevious,end_date=tendplus,fields='open_price')
clean_df = my_pipe.copy();
clean_df.index = clean_df.index.droplevel(0)
clean_df = clean_df[~clean_df.index.duplicated(keep='first')];
assets_sectors = clean_df.set_index(clean_df.index)['sector'].to_dict()
len(assets_sectors)
import alphalens
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(my_pipe['factor'],
pricing,
quantiles=10,
periods=(1,10,22),
#groupby=assets_sectors,
#groupby_labels=sector_labels
)
# Run analysis
alphalens.tears.create_full_tear_sheet(factor_data)
mean_return_quantile_sector, mean_return_quantile_sector_err = alphalens.performance.mean_return_by_quantile(factor_data, by_group=True)
alphalens.plotting.plot_quantile_returns_bar(mean_return_quantile_sector, by_group=True);
import matplotlib.pyplot as plt
import numpy as np
aa = my_pipe.loc[my_pipe.index[0][0],:'factor']
np.histogram(aa, bins=30)