Notebook
In [39]:
import pandas as pd
import numpy as np
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.factors import CustomFactor
from quantopian.pipeline.filters import Q500US
from quantopian.research import run_pipeline
from time import strftime
from datetime import timedelta, datetime
from quantopian.pipeline.data.builtin import USEquityPricing
import seaborn as sns
import alphalens
import matplotlib.pyplot as plt
import talib as ta
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.data.user_5bc6ecbe22e6183ec10bafba import test
from quantopian.pipeline import factors
In [2]:
MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}
In [75]:
factor1 = Fundamentals.growth_grade
factor2 = Fundamentals.pb_ratio
#factor_1_name = "Growth Grade"
#factor_2_name = "PB Ratio"
In [87]:
def auto_heatmap(factor1, factor2, factor_1_name, factor_2_name,
                 factor_1_type, factor_2_type,
                 quantile_groups = 4, 
                 start_date = '2010-02-01', end_date = '2018-11-15'):
    class PreviousClose(CustomFactor):  
        inputs = [USEquityPricing.close]  
        window_length = 1

        def compute(self, today, assets, out, close):  
            out[:] = close[0]

    class PreviousOpen(CustomFactor):  
        inputs = [USEquityPricing.open]  
        window_length = 1

        def compute(self, today, assets, out, open):  
            out[:] = open[0]     
     
    def make_pipeline():
        # define our pipeline
        pipe = Pipeline()
        
        universe = Q500US()
        
        if factor_1_type == "fundamental":
            factor_1 = Latest([factor1])
        elif factor_1_type == "factor":
            # Change factor parameters (window_length, fast_period etc) here:
            factor_1 = factor1()
            
        if factor_2_type == "fundamental":
            factor_2 = Latest([factor2])
        elif factor_2_type == "factor":
            # Change factor parameters (window_length, fast_period etc) here:
            factor_2 = factor2(window_length = 14)
            

        factor_1_dec = factor_1.quantiles(quantile_groups, mask=universe)
        factor_2_dec = factor_2.quantiles(quantile_groups, mask=universe)
        

    #     PreviousClose(window_length = 1) is the same as USEquityPricing.close.latest
        previous_close_price = PreviousClose(window_length = 2)
        close_price = USEquityPricing.close.latest
        open_price = USEquityPricing.open.latest

        Overnight_return = open_price / previous_close_price - 1
        Intraday_return = close_price / open_price - 1

        pipe = Pipeline(
            columns = {
                'open':open_price,
                'close':close_price,
                'close_previous':previous_close_price,
                'Overnight_return':Overnight_return,
                'Intraday_return':Intraday_return,
                factor_1_name:factor_1,
                factor_2_name:factor_2,
                factor_1_name+" quantile":factor_1_dec,
                factor_2_name+" quantile":factor_2_dec,
                'Sector': Sector(mask=universe),
                'first_30': test.first_30.latest,
                'last_30': test.last_30.latest
            },
            screen=universe
        )

        return pipe

    pipe = make_pipeline()

    results = run_pipeline(pipe, start_date, end_date).dropna()
    
    # Very important!
    # Lag factors to avoid look-forward-bias:
    cols_to_shift = [factor_1_name, factor_1_name+" quantile", factor_2_name, factor_2_name+" quantile"]
    results[cols_to_shift] = results.groupby(level=1)[cols_to_shift].shift(1)
    
    decile_means = results.groupby([factor_1_name+" quantile", factor_2_name+" quantile"])['Intraday_return', 'Overnight_return', 'first_30', 'last_30'].mean()
    decile_count = results.groupby([factor_1_name+" quantile", factor_2_name+" quantile"])['Overnight_return'].count()
    
    for ret_type in ['Intraday_return', 'Overnight_return', 'first_30', 'last_30']:
        sns.heatmap(decile_means[ret_type].unstack(), annot=True, linewidths=.5, fmt=".2%", 
                    center=0, cmap=sns.diverging_palette(20, 150, as_cmap=True), 
                    vmin=-0.005, vmax=0.005,).set_title(ret_type)
        plt.figure()
    
    sns.heatmap(decile_count.unstack(), annot=True, linewidths=.5, fmt='g', cmap="Blues").set_title('Number of Observations')
    plt.figure()
    
    decile_means_sector = results.groupby([factor_1_name+" quantile", factor_2_name+" quantile", "Sector"])['Intraday_return', 'Overnight_return', 'first_30', 'last_30'].mean()
    
    
    for ret_type in ['Intraday_return', 'Overnight_return', 'first_30', 'last_30']:
        def facet_heatmap(data, color, value_col, **kws):
            data = data.pivot(index=factor_1_name+" quantile", columns=factor_2_name+" quantile", values = value_col)
            sns.heatmap(data, annot=True, linewidths=.5, fmt=".2%", 
                        center=0, cmap=sns.diverging_palette(20, 150, as_cmap=True), 
                        vmin=-0.005, vmax=0.005, cbar = False, **kws)

        with sns.plotting_context(font_scale=5):
            g = sns.FacetGrid(decile_means_sector.reset_index(), col="Sector", col_wrap=3, size=4.5, aspect=1)

        g = g.map_dataframe(facet_heatmap, value_col = ret_type)

        g.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)
        g.fig.suptitle(ret_type + ' by sector')
        plt.figure()
        
    decile_count_sector = results.groupby([factor_1_name+" quantile", factor_2_name+" quantile", "Sector"])['Overnight_return'].count()
    
    def facet_heatmap_count(data, color, value_col, **kws):
            data = data.pivot(index=factor_1_name+" quantile", columns=factor_2_name+" quantile", values = value_col)
            sns.heatmap(data, annot=True, linewidths=.5, fmt='g', cmap="Blues", **kws)


    with sns.plotting_context(font_scale=5):
        g = sns.FacetGrid(decile_count_sector.reset_index(), col="Sector", col_wrap=3, size=4.5, aspect=1)

        g = g.map_dataframe(facet_heatmap_count, value_col = "Overnight_return")

        g.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)
        g.fig.suptitle('Number of observations by sector')
        plt.figure()
    
# results['cum_intraday']=results.groupby(level = 1)['Intraday_return'].apply(lambda x : x.shift().fillna(0).add(1).cumprod()).values
# results['cum_overnight']=results.groupby(level = 1)['Overnight_return'].apply(lambda x : x.shift().fillna(0).add(1).cumprod()).values
        
    return results

    
In [90]:
a = auto_heatmap(factor1 = Fundamentals.growth_grade, 
                 factor2 = Fundamentals.pb_ratio, 
                 factor_1_name = "Growth Grade", 
                 factor_2_name ="PB Ratio", 
                 factor_1_type = "fundamental",
                 factor_2_type = "fundamental",
                 quantile_groups = 5, 
                 start_date = '2012-10-01', 
                 end_date = '2018-11-15')

TypeErrorTraceback (most recent call last)
<ipython-input-90-3402d54c7ba7> in <module>()
      7                  quantile_groups = 5,
      8                  start_date = '2012-10-01',
----> 9                  end_date = '2018-11-15')

<ipython-input-87-e04c36c430a1> in auto_heatmap(factor1, factor2, factor_1_name, factor_2_name, factor_1_type, factor_2_type, quantile_groups, start_date, end_date)
     68         return pipe
     69 
---> 70     pipe = make_pipeline()
     71 
     72     results = run_pipeline(pipe, start_date, end_date).dropna()

<ipython-input-87-e04c36c430a1> in make_pipeline()
     24 
     25         if factor_1_type == "fundamental":
---> 26             factor_1 = Latest([factor1])
     27         elif factor_1_type == "factor":
     28             # Change factor parameters (window_length, fast_period etc) here:

/build/src/qexec_repo/zipline_repo/zipline/pipeline/mixins.pyc in __new__(cls, inputs, outputs, window_length, mask, dtype, missing_value, ndim, **kwargs)
    138             missing_value=missing_value,
    139             ndim=ndim,
--> 140             **kwargs
    141         )
    142 

/build/src/qexec_repo/zipline_repo/zipline/pipeline/term.pyc in __new__(cls, inputs, outputs, window_length, mask, domain, *args, **kwargs)
    508             window_length=window_length,
    509             domain=domain,
--> 510             *args, **kwargs
    511         )
    512 

/build/src/qexec_repo/zipline_repo/zipline/pipeline/term.pyc in __new__(cls, domain, dtype, missing_value, window_safe, ndim, *args, **kwargs)
    133                     ndim=ndim,
    134                     params=params,
--> 135                     *args, **kwargs
    136                 )
    137             return new_instance

/build/src/qexec_repo/zipline_repo/zipline/pipeline/term.pyc in _init(self, inputs, outputs, window_length, mask, *args, **kwargs)
    516         self.window_length = window_length
    517         self.mask = mask
--> 518         return super(ComputableTerm, self)._init(*args, **kwargs)
    519 
    520     @classmethod

/build/src/qexec_repo/zipline_repo/zipline/pipeline/term.pyc in _init(self, domain, dtype, missing_value, window_safe, ndim, params)
    275         # should set this flag to True.
    276         self._subclass_called_super_validate = False
--> 277         self._validate()
    278         assert self._subclass_called_super_validate, (
    279             "Term._validate() was not called.\n"

/build/src/qexec_repo/zipline_repo/zipline/pipeline/mixins.pyc in _validate(self)
    243                     name=type(self).__name__,
    244                     expected=self.dtype,
--> 245                     actual=self.inputs[0].dtype,
    246                 )
    247             )

TypeError: Latest expected an input of dtype float64, but got object instead.
In [ ]: