
Making Fama French Visuals

Pull in data for ME and BE_ME as well as stock returns

import numpy as np
import pandas as pd
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns, AverageDollarVolume
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline import CustomFactor 
from import morningstar
from collections import defaultdict

dollar_volume = AverageDollarVolume(window_length=1)    
high_dollar_volume = dollar_volume.percentile_between(0, 100)
universe_screen = high_dollar_volume

class MarketCap(CustomFactor):
    inputs = [USEquityPricing.close, morningstar.valuation.shares_outstanding]
    window_length = 1
    def compute(self, today, assets, out, close, shares):
        out[:] = close * shares

class Book_To_Market(CustomFactor):
    inputs = [morningstar.balance_sheet.total_equity, USEquityPricing.close, morningstar.valuation.shares_outstanding]
    window_length = 1
    def compute(self, today, assets, out, equity, close, shares):
        out[:] = equity[-1] / (close[-1] * shares[-1])

class Price_Growth(CustomFactor):
    inputs = [USEquityPricing.close]
    window_length = 22
    def compute(self, today, assets, out, close):
        out[:] = (close[-1] - close[0]) / close[0] 
pipe = Pipeline(
            'ME' : MarketCap(mask=universe_screen),
            'BE_ME' : Book_To_Market(mask=universe_screen),
            'Month' : Price_Growth(mask=universe_screen),
            'Sector': Sector(mask=universe_screen),

# generates a pandas frame with our stocks at the specified dates and their factor values and columns
results = run_pipeline(pipe, '2003-01-01', '2003-01-01')
# Removes any rows where Nan values exist for either BE or BE_ME
results = results.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
print results.count()
BE_ME     2999
ME        2999
Month     2999
Sector    2999
dtype: int64

Discard bank stocks and use pd.qcut to split factors into deciles

# To correspond with Fama French, we remove all stocks in the financial sector
results = results[results.Sector != 101]
#print results.count()

# Now we want to get the deciles of both FF factors so we can isolate returns 
results['ME_Decile'] = pd.qcut(results['ME'], 10, labels=False) + 1
results['BE_ME_Decile'] = pd.qcut(results['BE_ME'], 10, labels=False) + 1
# Next we obtain a value 
results['FF_Matrix'] = results['ME_Decile'] * results['BE_ME_Decile']
print results.count()

# So now we have a dataframe with all the information we need to obtain the monthly returns of
# each portfolio within the 100 portfolios constructed in the Fama French 3 factor model
# Next we will set out to get the average returns of each of the 100 portfolios
BE_ME           2849
ME              2849
Month           2849
Sector          2849
ME_Decile       2849
BE_ME_Decile    2849
FF_Matrix       2849
dtype: int64
BE_ME ME Month Sector ME_Decile BE_ME_Decile FF_Matrix
2003-01-02 00:00:00+00:00 Equity(24 [AAPL]) 0.397849 1.029285e+10 -0.076031 311 10 4 40
Equity(31 [ABAX]) 0.320927 6.650741e+07 -0.070204 206 3 4 12
Equity(37 [ABCW]) 0.562741 5.083563e+08 0.024691 103 6 6 36
Equity(49 [ABK]) 0.608465 5.949605e+09 -0.101760 103 9 6 54
Equity(51 [ABL]) 217.993144 3.235010e+05 0.062147 102 1 10 10

Get the mean monthly return for 1 of the 100 portfolios

# Now lets quickly test if we can get the amount of stocks in the '100' portfolio, and its average return
portfolio_100 = results[results.FF_Matrix == 100.0]
print portfolio_100.count()
print np.mean(portfolio_100['Month'])
BE_ME           5
ME              5
Month           5
Sector          5
ME_Decile       5
BE_ME_Decile    5
FF_Matrix       5
dtype: int64
BE_ME ME Month Sector ME_Decile BE_ME_Decile FF_Matrix
2003-01-02 00:00:00+00:00 Equity(239 [AIG]) 7.816271 7.520594e+09 -0.114422 103 10 10 100
Equity(357 [TWX]) 5.067926 1.934085e+10 -0.205630 102 10 10 100
Equity(1335 [C]) 4.531023 1.782511e+10 -0.093830 103 10 10 100
Equity(7679 [JCI]) 2.913358 8.509426e+09 -0.042649 310 10 10 100
Equity(17104 [Q]) 4.281351 8.401319e+09 0.041580 308 10 10 100

Create the frame that will produce the heat map for 1 day

from collections import defaultdict
BE_ME_Portfolio = [x for x in np.arange(1, 10, 1)]
ME_Portfolio = [x for x in np.arange(1, 10, 1)]

#: Create a dictionary to hold all the monthly return values
monthly_returns = defaultdict(dict)

for decile_BE in BE_ME_Portfolio:
    for decile_ME in ME_Portfolio:    
        Portfolio = results.loc[(results['BE_ME_Decile'] == decile_BE) & (results['ME_Decile'] == decile_ME)]
        monthly_return = np.mean(Portfolio['Month'])
        monthly_returns[decile_BE][decile_ME] = monthly_return

monthly_returns = pd.DataFrame(monthly_returns) = "ME Decile" = "BE_ME Decile"

print monthly_returns.head()
BE_ME Decile         1         2         3         4         5         6  \
ME Decile                                                                  
1            -0.182220  0.164103 -0.102699 -0.090798 -0.006245 -0.077622   
2            -0.052705 -0.075352 -0.055272 -0.016001  0.003489 -0.016196   
3            -0.094477  0.041606  0.009870 -0.028204 -0.038072  0.014884   
4             0.029656  0.003229 -0.012716 -0.014060 -0.014572 -0.048949   
5            -0.027949 -0.057649 -0.027770 -0.024004 -0.009276 -0.038369   

BE_ME Decile         7         8         9  
ME Decile                                   
1            -0.142084 -0.138737 -0.091676  
2            -0.005661  0.015817 -0.054722  
3            -0.023970  0.012012 -0.053025  
4             0.000510 -0.042176 -0.054738  
5            -0.038409 -0.056270 -0.045664  

Display the heat map

import matplotlib.pyplot as pyplot

def heat_map(df):
    This creates our heatmap using our sharpe ratio dataframe
    fig = pyplot.figure()
    ax = fig.add_subplot(111)
    axim = ax.imshow(df.values,cmap = pyplot.get_cmap('RdYlGn'), interpolation = 'nearest')
    ax.set_title("Fama French 1 Month Returns")
#: Plot our heatmap

print monthly_returns
BE_ME Decile         1         2         3         4         5         6  \
ME Decile                                                                  
1            -0.182220  0.164103 -0.102699 -0.090798 -0.006245 -0.077622   
2            -0.052705 -0.075352 -0.055272 -0.016001  0.003489 -0.016196   
3            -0.094477  0.041606  0.009870 -0.028204 -0.038072  0.014884   
4             0.029656  0.003229 -0.012716 -0.014060 -0.014572 -0.048949   
5            -0.027949 -0.057649 -0.027770 -0.024004 -0.009276 -0.038369   
6             0.003371 -0.044455 -0.041024 -0.031829 -0.047482 -0.044741   
7            -0.024682 -0.043335 -0.025631 -0.053956 -0.039412 -0.017827   
8            -0.014967 -0.028825 -0.020557 -0.049058 -0.043334 -0.018395   
9            -0.018297 -0.019544 -0.042956 -0.036624 -0.063083 -0.048888   

BE_ME Decile         7         8         9  
ME Decile                                   
1            -0.142084 -0.138737 -0.091676  
2            -0.005661  0.015817 -0.054722  
3            -0.023970  0.012012 -0.053025  
4             0.000510 -0.042176 -0.054738  
5            -0.038409 -0.056270 -0.045664  
6            -0.045157 -0.053129 -0.061861  
7            -0.042722 -0.085331 -0.020284  
8            -0.051004 -0.118805 -0.113923  
9            -0.029243 -0.037621 -0.093287  

Loop through all the trade dates in 1Q 2016, and output a heatmap for each day. These maps can then be saved and used to create and animated GIF

Note - this notebook could not be shared on the forum with the ~60 maps displayed. I altered the start date and end date below to only ouput a few days worth of maps.

#### This is the master version ####

#import datetime
#Trade_Dates_2 = local_csv('Trade_Dates_2.csv', date_column='Date')\
#    .sort_index(ascending=True)
trade_dates = get_pricing(
    end_date = '2016-01-10', 


def heat_map(df):
    fig = pyplot.figure()
    ax = fig.add_subplot(111)
    axim = ax.imshow(df.values,clim=(-0.15, 0.15), cmap = pyplot.get_cmap('RdYlGn'), interpolation = 'quadric')
    #axim = ax.imshow(df.values,cmap = 'hot', interpolation = 'gaussian')
    #ax.set_ybound(lower=-0.30, upper=0.30)
    #ax.set_title("Fama French 1 Month Returns")
for date in trade_dates.index:
#for date in Trade_Dates_2.index:
    results = run_pipeline(pipe, date, date)
    results = results.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
    results = results[results.Sector != 101]
    results['ME_Decile'] = pd.qcut(results['ME'], 10, labels=False) + 1
    results['BE_ME_Decile'] = pd.qcut(results['BE_ME'], 10, labels=False) + 1
    results['FF_Matrix'] = results['ME_Decile'] * results['BE_ME_Decile']
    BE_ME_Portfolio = [x for x in np.arange(1, 10, 1)]
    ME_Portfolio = [x for x in np.arange(1, 10, 1)]
    monthly_returns = defaultdict(dict)
    for decile_BE in BE_ME_Portfolio:
        for decile_ME in ME_Portfolio:    
            Portfolio = results.loc[(results['BE_ME_Decile'] == decile_BE) & (results['ME_Decile'] == decile_ME)]
            monthly_return = np.mean(Portfolio['Month'])
            monthly_returns[decile_BE][decile_ME] = monthly_return

    monthly_returns = pd.DataFrame(monthly_returns) = "ME Decile" = "BE_ME Decile"

    #: Plot our heatmap

Code below will output a 3D rendering of Fama French portfolio returns on any given day

Note this will not run in Quantopian research environment

import numpy as np
from sklearn.cluster import MeanShift# as ms
from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import style

cluster_cols = ['BE_ME','ME','Month']

X = np.array(results[cluster_cols].values)

ms = MeanShift()
labels = ms.labels_
cluster_centers = ms.cluster_centers_

n_clusters_ = len(np.unique(labels))

print("Number of estimated clusters:", n_clusters_)

colors = 10*['r','g','b','c','k','y','m']

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 12
fig_size[1] = 9
plt.rcParams["figure.figsize"] = fig_size

for i in range(len(X)):
    ax.scatter(X[i][0], X[i][1], X[i][2], c=colors[labels[i]], marker='o')

            marker="x",color='k', s=150, linewidths = 5, zorder=10)

plt.title("Fama French Returns")

ax.set_zlabel('Monthly Return')
Importing make_blobs from sklearn.datasets.samples_generator raised an ImportError. No modules or attributes with a similar name were found. Our security system is concerned. If you continue to have import errors, your account will be suspended until a human can talk to you.