I would like to study the book to price ratio on tech stocks. For some reason the z-score is producing a lot of nans and alphales is droping nearly 30% of the data.
Am I doing something wrong? Thanks.
# Import Libraries
import numpy as np
import statsmodels.api as sm
import scipy.stats as stats
from statsmodels import regression
import matplotlib.pyplot as plt
import pandas as pd
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns
from quantopian.pipeline.classifiers.fundamentals import Sector
from quantopian.research import run_pipeline
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.filters import Q500US
import alphalens as al
# Book to Price Ratio Factor
class BookToPrice(CustomFactor):
# pb = price to book, we'll need to take the reciprocal later
inputs = [Fundamentals.pb_ratio.latest]
window_length = 1
def compute(self, today, assets, out, pb):
out[:] = 1/pb
# Sector information
MORNINGSTAR_SECTOR_CODES = {
-1: 'Misc',
101: 'Basic Materials',
102: 'Consumer Cyclical',
103: 'Financial Services',
104: 'Real Estate',
205: 'Consumer Defensive',
206: 'Healthcare',
207: 'Utilities',
308: 'Communication Services',
309: 'Energy',
310: 'Industrials',
311: 'Technology' ,
}
tech = Sector().eq(Sector.TECHNOLOGY)
begin_period = '2003-01-01'
end_period = '2018-01-01'
# Define Universe
universe = Q500US()
# Make Pipeline
def make_pipeline():
# Create Pipeline
pipe = Pipeline()
# My factor
myfactor = BookToPrice()
# Returns
returns = Returns(inputs=[USEquityPricing.close], window_length=2)
# Factor Rank
myfactor_rank = myfactor.zscore()
# Pipeline to return
return Pipeline(
columns={
'myfactor': myfactor,
'myfactor_rank': myfactor_rank,
'sector' : Sector()
},
screen = (universe & tech)
)
results = run_pipeline(make_pipeline(), begin_period, end_period)
results.fillna(value=0);
print ("The zscore is producing a lot of NaNs.")
results.head()
# Get sector, asset list, and pricing
sectors = results['sector']
asset_list = results.index.levels[1]
prices = get_pricing(asset_list, start_date=begin_period, end_date=end_period, fields='open_price')
prices.head(5)
# Get clean factor and forward retuens for factor
periods = (1, 5, 10)
factor_data = al.utils.get_clean_factor_and_forward_returns(factor=results['myfactor_rank'],
prices=prices,
groupby=sectors,
groupby_labels=MORNINGSTAR_SECTOR_CODES,
periods=periods,
binning_by_group=True)
factor_data.head()