Zscore producing too many Nans? Am I doing something wrong?¶

I would like to study the book to price ratio on tech stocks. For some reason the z-score is producing a lot of nans and alphales is droping nearly 30% of the data.

Am I doing something wrong? Thanks.

# Import Libraries

import numpy as np
import statsmodels.api as sm
import scipy.stats as stats
from statsmodels import regression
import matplotlib.pyplot as plt
import pandas as pd


from quantopian.pipeline import Pipeline
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns
from quantopian.pipeline.classifiers.fundamentals import Sector  
from quantopian.research import run_pipeline
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.filters import Q500US

import alphalens as al

# Book to Price Ratio Factor
class BookToPrice(CustomFactor):
    # pb = price to book, we'll need to take the reciprocal later
    inputs = [Fundamentals.pb_ratio.latest]
    window_length = 1
    
    def compute(self, today, assets, out, pb):
        out[:] = 1/pb

# Sector information
MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}
tech = Sector().eq(Sector.TECHNOLOGY)


begin_period = '2003-01-01'
end_period = '2018-01-01'

# Define Universe
universe = Q500US()

# Make Pipeline
def make_pipeline():

    # Create Pipeline
    pipe = Pipeline()

    # My factor
    myfactor = BookToPrice()
    
    # Returns
    returns = Returns(inputs=[USEquityPricing.close], window_length=2)
    
    # Factor Rank
    myfactor_rank = myfactor.zscore()

    # Pipeline to return    
    return Pipeline(
      columns={
          'myfactor': myfactor,
          'myfactor_rank': myfactor_rank,
          'sector' : Sector()
      },
      screen  = (universe & tech)
    )

results = run_pipeline(make_pipeline(), begin_period, end_period)
results.fillna(value=0);

print ("The zscore is producing a lot of NaNs.")
results.head()

The zscore is producing a lot of NaNs.

# Get sector, asset list, and pricing
sectors = results['sector']
asset_list = results.index.levels[1]
prices = get_pricing(asset_list, start_date=begin_period, end_date=end_period, fields='open_price')
prices.head(5)

# Get clean factor and forward retuens for factor
periods = (1, 5, 10)
factor_data = al.utils.get_clean_factor_and_forward_returns(factor=results['myfactor_rank'],
                                                            prices=prices,
                                                            groupby=sectors,
                                                            groupby_labels=MORNINGSTAR_SECTOR_CODES,
                                                            periods=periods,
                                                            binning_by_group=True)
factor_data.head()

Dropped 28.9% entries from factor data: 28.9% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!

	Equity(24 [AAPL])	Equity(67 [ADSK])	Equity(114 [ADBE])	Equity(115 [ADCT])	Equity(122 [ADI])	Equity(328 [ALTR])	Equity(337 [AMAT])	Equity(351 [AMD])	Equity(417 [ANDW])	Equity(465 [APH])	...	Equity(49139 [FIT])	Equity(49288 [LITE])	Equity(49506 [HPE])	Equity(49610 [SQ])	Equity(50049 [FTV])	Equity(50077 [TWLO])	Equity(50242 [DVMT])	Equity(50338 [NTNX])	Equity(50683 [SNAP])	Equity(50716 [DXC])
2003-01-02 00:00:00+00:00	0.920	7.116	12.462	14.84	17.315	11.378	10.699	6.56	10.30	4.453	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2003-01-03 00:00:00+00:00	0.948	7.195	12.711	16.24	18.207	11.884	11.105	7.01	10.66	4.690	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2003-01-06 00:00:00+00:00	0.962	6.992	13.134	19.18	18.921	12.174	11.876	7.05	10.65	4.670	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2003-01-07 00:00:00+00:00	0.947	7.190	13.664	19.74	19.942	13.177	12.290	7.25	11.07	4.740	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2003-01-08 00:00:00+00:00	0.934	7.507	14.063	18.55	19.806	12.725	11.972	7.06	10.74	4.846	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

		1D	5D	10D	factor	group	factor_quantile
date	asset
2005-09-20 00:00:00+00:00	Equity(24 [AAPL])	-0.000442	0.018860	0.035656	-0.273535	Technology	2
	Equity(67 [ADSK])	0.006731	0.028846	0.117308	-0.346485	Technology	1
	Equity(114 [ADBE])	-0.036250	-0.016731	0.042872	-0.286838	Technology	1
	Equity(122 [ADI])	-0.020921	-0.032457	0.004303	-0.197683	Technology	3
	Equity(328 [ALTR])	0.015387	0.006871	0.034475	-0.277710	Technology	1

		myfactor	myfactor_rank	sector
2003-01-02 00:00:00+00:00	Equity(24 [AAPL])	0.799808	NaN	311
	Equity(67 [ADSK])	0.361102	NaN	311
	Equity(114 [ADBE])	0.113600	NaN	311
	Equity(122 [ADI])	0.329402	NaN	311
	Equity(328 [ALTR])	0.239601	NaN	311

Zscore producing too many Nans? Am I doing something wrong?¶

Though 29% is under the 35% max, it seems a lot. Did I do something wrong with the zscore? Thanks.¶