Making Fama French Visuals

Code to create this notebook was derived from the following: https://www.quantopian.com/posts/research-do-you-want-parameter-optimization-click-here-to-get-started-heat-maps-included

Pull in data for ME and BE_ME as well as stock returns

import numpy as np
import pandas as pd
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns, AverageDollarVolume
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline import CustomFactor 
from quantopian.pipeline.data import morningstar
from collections import defaultdict


dollar_volume = AverageDollarVolume(window_length=1)    
high_dollar_volume = dollar_volume.percentile_between(0, 100)
universe_screen = high_dollar_volume


class MarketCap(CustomFactor):
    inputs = [USEquityPricing.close, morningstar.valuation.shares_outstanding]
    window_length = 1
    def compute(self, today, assets, out, close, shares):
        out[:] = close * shares

class Book_To_Market(CustomFactor):
    inputs = [morningstar.balance_sheet.total_equity, USEquityPricing.close, morningstar.valuation.shares_outstanding]
    window_length = 1
    def compute(self, today, assets, out, equity, close, shares):
        out[:] = equity[-1] / (close[-1] * shares[-1])

class Price_Growth(CustomFactor):
    inputs = [USEquityPricing.close]
    window_length = 22
    def compute(self, today, assets, out, close):
        out[:] = (close[-1] - close[0]) / close[0] 
        
pipe = Pipeline(
        columns={
            'ME' : MarketCap(mask=universe_screen),
            'BE_ME' : Book_To_Market(mask=universe_screen),
            'Month' : Price_Growth(mask=universe_screen),
            'Sector': Sector(mask=universe_screen),
        },
        screen=universe_screen)

# generates a pandas frame with our stocks at the specified dates and their factor values and columns
results = run_pipeline(pipe, '2003-01-01', '2003-01-01')
# Removes any rows where Nan values exist for either BE or BE_ME
results = results.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
print results.count()

BE_ME     2999
ME        2999
Month     2999
Sector    2999
dtype: int64

Discard bank stocks and use pd.qcut to split factors into deciles

# To correspond with Fama French, we remove all stocks in the financial sector
results = results[results.Sector != 101]
#print results.count()
#results.head()

# Now we want to get the deciles of both FF factors so we can isolate returns 
results['ME_Decile'] = pd.qcut(results['ME'], 10, labels=False) + 1
results['BE_ME_Decile'] = pd.qcut(results['BE_ME'], 10, labels=False) + 1
# Next we obtain a value 
results['FF_Matrix'] = results['ME_Decile'] * results['BE_ME_Decile']
print results.count()
results.head()

# So now we have a dataframe with all the information we need to obtain the monthly returns of
# each portfolio within the 100 portfolios constructed in the Fama French 3 factor model
# Next we will set out to get the average returns of each of the 100 portfolios

BE_ME           2849
ME              2849
Month           2849
Sector          2849
ME_Decile       2849
BE_ME_Decile    2849
FF_Matrix       2849
dtype: int64

Get the mean monthly return for 1 of the 100 portfolios

# Now lets quickly test if we can get the amount of stocks in the '100' portfolio, and its average return
portfolio_100 = results[results.FF_Matrix == 100.0]
print portfolio_100.count()
print np.mean(portfolio_100['Month'])
portfolio_100.head()

BE_ME           5
ME              5
Month           5
Sector          5
ME_Decile       5
BE_ME_Decile    5
FF_Matrix       5
dtype: int64
-0.0829902209388

Create the frame that will produce the heat map for 1 day

from collections import defaultdict
BE_ME_Portfolio = [x for x in np.arange(1, 10, 1)]
ME_Portfolio = [x for x in np.arange(1, 10, 1)]


#: Create a dictionary to hold all the monthly return values
monthly_returns = defaultdict(dict)

for decile_BE in BE_ME_Portfolio:
    for decile_ME in ME_Portfolio:    
        Portfolio = results.loc[(results['BE_ME_Decile'] == decile_BE) & (results['ME_Decile'] == decile_ME)]
        monthly_return = np.mean(Portfolio['Month'])
        monthly_returns[decile_BE][decile_ME] = monthly_return

monthly_returns = pd.DataFrame(monthly_returns)
monthly_returns.index.name = "ME Decile"
monthly_returns.columns.name = "BE_ME Decile"

print monthly_returns.head()

BE_ME Decile         1         2         3         4         5         6  \
ME Decile                                                                  
1            -0.182220  0.164103 -0.102699 -0.090798 -0.006245 -0.077622   
2            -0.052705 -0.075352 -0.055272 -0.016001  0.003489 -0.016196   
3            -0.094477  0.041606  0.009870 -0.028204 -0.038072  0.014884   
4             0.029656  0.003229 -0.012716 -0.014060 -0.014572 -0.048949   
5            -0.027949 -0.057649 -0.027770 -0.024004 -0.009276 -0.038369   

BE_ME Decile         7         8         9  
ME Decile                                   
1            -0.142084 -0.138737 -0.091676  
2            -0.005661  0.015817 -0.054722  
3            -0.023970  0.012012 -0.053025  
4             0.000510 -0.042176 -0.054738  
5            -0.038409 -0.056270 -0.045664

Display the heat map

import matplotlib.pyplot as pyplot

def heat_map(df):
    """
    This creates our heatmap using our sharpe ratio dataframe
    """
    fig = pyplot.figure()
    ax = fig.add_subplot(111)
    axim = ax.imshow(df.values,cmap = pyplot.get_cmap('RdYlGn'), interpolation = 'nearest')
    ax.set_xlabel(df.columns.name)
    ax.set_xticks(np.arange(len(df.columns)))
    ax.set_xticklabels(list(df.columns))
    ax.set_ylabel(df.index.name)
    ax.set_yticks(np.arange(len(df.index)))
    ax.set_yticklabels(list(df.index))
    ax.set_title("Fama French 1 Month Returns")
    pyplot.colorbar(axim)
    #savefig('sample.png')
    
#: Plot our heatmap
heat_map(monthly_returns)



print monthly_returns

BE_ME Decile         1         2         3         4         5         6  \
ME Decile                                                                  
1            -0.182220  0.164103 -0.102699 -0.090798 -0.006245 -0.077622   
2            -0.052705 -0.075352 -0.055272 -0.016001  0.003489 -0.016196   
3            -0.094477  0.041606  0.009870 -0.028204 -0.038072  0.014884   
4             0.029656  0.003229 -0.012716 -0.014060 -0.014572 -0.048949   
5            -0.027949 -0.057649 -0.027770 -0.024004 -0.009276 -0.038369   
6             0.003371 -0.044455 -0.041024 -0.031829 -0.047482 -0.044741   
7            -0.024682 -0.043335 -0.025631 -0.053956 -0.039412 -0.017827   
8            -0.014967 -0.028825 -0.020557 -0.049058 -0.043334 -0.018395   
9            -0.018297 -0.019544 -0.042956 -0.036624 -0.063083 -0.048888   

BE_ME Decile         7         8         9  
ME Decile                                   
1            -0.142084 -0.138737 -0.091676  
2            -0.005661  0.015817 -0.054722  
3            -0.023970  0.012012 -0.053025  
4             0.000510 -0.042176 -0.054738  
5            -0.038409 -0.056270 -0.045664  
6            -0.045157 -0.053129 -0.061861  
7            -0.042722 -0.085331 -0.020284  
8            -0.051004 -0.118805 -0.113923  
9            -0.029243 -0.037621 -0.093287

Loop through all the trade dates in 1Q 2016, and output a heatmap for each day. These maps can then be saved and used to create and animated GIF

Note - this notebook could not be shared on the forum with the ~60 maps displayed. I altered the start date and end date below to only ouput a few days worth of maps.

#### This is the master version ####

#import datetime
#Trade_Dates_2 = local_csv('Trade_Dates_2.csv', date_column='Date')\
#    .sort_index(ascending=True)
    
    
trade_dates = get_pricing(
    ['SPY'], 
    fields='close_price', 
    start_date='2016-01-01', 
    end_date = '2016-01-10', 
    frequency='daily',
)

    

def heat_map(df):
    fig = pyplot.figure()
    ax = fig.add_subplot(111)
    axim = ax.imshow(df.values,clim=(-0.15, 0.15), cmap = pyplot.get_cmap('RdYlGn'), interpolation = 'quadric')
    #axim = ax.imshow(df.values,cmap = 'hot', interpolation = 'gaussian')
    ax.set_xlabel(df.columns.name)
    ax.set_xticks(np.arange(len(df.columns)))
    ax.set_xticklabels(list(df.columns))
    ax.set_ylabel(df.index.name)
    ax.set_yticks(np.arange(len(df.index)))
    ax.set_yticklabels(list(df.index))
    ax.grid(b=False)
    #ax.set_ybound(lower=-0.30, upper=0.30)
    #ax.set_title("Fama French 1 Month Returns")
    ax.set_title(date)
    pyplot.colorbar(axim)
    
for date in trade_dates.index:
#for date in Trade_Dates_2.index:
    results = run_pipeline(pipe, date, date)
    results = results.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
    results = results[results.Sector != 101]
    results['ME_Decile'] = pd.qcut(results['ME'], 10, labels=False) + 1
    results['BE_ME_Decile'] = pd.qcut(results['BE_ME'], 10, labels=False) + 1
    results['FF_Matrix'] = results['ME_Decile'] * results['BE_ME_Decile']
    BE_ME_Portfolio = [x for x in np.arange(1, 10, 1)]
    ME_Portfolio = [x for x in np.arange(1, 10, 1)]
    monthly_returns = defaultdict(dict)
    for decile_BE in BE_ME_Portfolio:
        for decile_ME in ME_Portfolio:    
            Portfolio = results.loc[(results['BE_ME_Decile'] == decile_BE) & (results['ME_Decile'] == decile_ME)]
            monthly_return = np.mean(Portfolio['Month'])
            monthly_returns[decile_BE][decile_ME] = monthly_return

    monthly_returns = pd.DataFrame(monthly_returns)
    monthly_returns.index.name = "ME Decile"
    monthly_returns.columns.name = "BE_ME Decile"

    #: Plot our heatmap
    heat_map(monthly_returns)

Code below will output a 3D rendering of Fama French portfolio returns on any given day

Note this will not run in Quantopian research environment

import numpy as np
from sklearn.cluster import MeanShift# as ms
from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import style
style.use("ggplot")

cluster_cols = ['BE_ME','ME','Month']

X = np.array(results[cluster_cols].values)

ms = MeanShift()
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_

n_clusters_ = len(np.unique(labels))

print("Number of estimated clusters:", n_clusters_)

colors = 10*['r','g','b','c','k','y','m']


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 12
fig_size[1] = 9
plt.rcParams["figure.figsize"] = fig_size

for i in range(len(X)):
    ax.scatter(X[i][0], X[i][1], X[i][2], c=colors[labels[i]], marker='o')


ax.scatter(cluster_centers[:,0],cluster_centers[:,1],cluster_centers[:,2],
            marker="x",color='k', s=150, linewidths = 5, zorder=10)

plt.title("Fama French Returns")


ax.set_xlabel('BE_ME')
ax.set_ylabel('ME')
ax.set_zlabel('Monthly Return')


plt.show()

InputRejected:
Importing make_blobs from sklearn.datasets.samples_generator raised an ImportError. No modules or attributes with a similar name were found. Our security system is concerned. If you continue to have import errors, your account will be suspended until a human can talk to you.

		BE_ME	ME	Month	Sector	ME_Decile	BE_ME_Decile	FF_Matrix
2003-01-02 00:00:00+00:00	Equity(24 [AAPL])	0.397849	1.029285e+10	-0.076031	311	10	4	40
	Equity(31 [ABAX])	0.320927	6.650741e+07	-0.070204	206	3	4	12
	Equity(37 [ABCW])	0.562741	5.083563e+08	0.024691	103	6	6	36
	Equity(49 [ABK])	0.608465	5.949605e+09	-0.101760	103	9	6	54
	Equity(51 [ABL])	217.993144	3.235010e+05	0.062147	102	1	10	10

		BE_ME	ME	Month	Sector	ME_Decile	BE_ME_Decile	FF_Matrix
2003-01-02 00:00:00+00:00	Equity(239 [AIG])	7.816271	7.520594e+09	-0.114422	103	10	10	100
	Equity(357 [TWX])	5.067926	1.934085e+10	-0.205630	102	10	10	100
	Equity(1335 [C])	4.531023	1.782511e+10	-0.093830	103	10	10	100
	Equity(7679 [JCI])	2.913358	8.509426e+09	-0.042649	310	10	10	100
	Equity(17104 [Q])	4.281351	8.401319e+09	0.041580	308	10	10	100