import numpy as np
import pandas as pd
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns, AverageDollarVolume
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline import CustomFactor
from quantopian.pipeline.data import morningstar
from collections import defaultdict
dollar_volume = AverageDollarVolume(window_length=1)
high_dollar_volume = dollar_volume.percentile_between(0, 100)
universe_screen = high_dollar_volume
class MarketCap(CustomFactor):
inputs = [USEquityPricing.close, morningstar.valuation.shares_outstanding]
window_length = 1
def compute(self, today, assets, out, close, shares):
out[:] = close * shares
class Book_To_Market(CustomFactor):
inputs = [morningstar.balance_sheet.total_equity, USEquityPricing.close, morningstar.valuation.shares_outstanding]
window_length = 1
def compute(self, today, assets, out, equity, close, shares):
out[:] = equity[-1] / (close[-1] * shares[-1])
class Price_Growth(CustomFactor):
inputs = [USEquityPricing.close]
window_length = 22
def compute(self, today, assets, out, close):
out[:] = (close[-1] - close[0]) / close[0]
pipe = Pipeline(
columns={
'ME' : MarketCap(mask=universe_screen),
'BE_ME' : Book_To_Market(mask=universe_screen),
'Month' : Price_Growth(mask=universe_screen),
'Sector': Sector(mask=universe_screen),
},
screen=universe_screen)
# generates a pandas frame with our stocks at the specified dates and their factor values and columns
results = run_pipeline(pipe, '2003-01-01', '2003-01-01')
# Removes any rows where Nan values exist for either BE or BE_ME
results = results.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
print results.count()
# To correspond with Fama French, we remove all stocks in the financial sector
results = results[results.Sector != 101]
#print results.count()
#results.head()
# Now we want to get the deciles of both FF factors so we can isolate returns
results['ME_Decile'] = pd.qcut(results['ME'], 10, labels=False) + 1
results['BE_ME_Decile'] = pd.qcut(results['BE_ME'], 10, labels=False) + 1
# Next we obtain a value
results['FF_Matrix'] = results['ME_Decile'] * results['BE_ME_Decile']
print results.count()
results.head()
# So now we have a dataframe with all the information we need to obtain the monthly returns of
# each portfolio within the 100 portfolios constructed in the Fama French 3 factor model
# Next we will set out to get the average returns of each of the 100 portfolios
# Now lets quickly test if we can get the amount of stocks in the '100' portfolio, and its average return
portfolio_100 = results[results.FF_Matrix == 100.0]
print portfolio_100.count()
print np.mean(portfolio_100['Month'])
portfolio_100.head()
from collections import defaultdict
BE_ME_Portfolio = [x for x in np.arange(1, 10, 1)]
ME_Portfolio = [x for x in np.arange(1, 10, 1)]
#: Create a dictionary to hold all the monthly return values
monthly_returns = defaultdict(dict)
for decile_BE in BE_ME_Portfolio:
for decile_ME in ME_Portfolio:
Portfolio = results.loc[(results['BE_ME_Decile'] == decile_BE) & (results['ME_Decile'] == decile_ME)]
monthly_return = np.mean(Portfolio['Month'])
monthly_returns[decile_BE][decile_ME] = monthly_return
monthly_returns = pd.DataFrame(monthly_returns)
monthly_returns.index.name = "ME Decile"
monthly_returns.columns.name = "BE_ME Decile"
print monthly_returns.head()
import matplotlib.pyplot as pyplot
def heat_map(df):
"""
This creates our heatmap using our sharpe ratio dataframe
"""
fig = pyplot.figure()
ax = fig.add_subplot(111)
axim = ax.imshow(df.values,cmap = pyplot.get_cmap('RdYlGn'), interpolation = 'nearest')
ax.set_xlabel(df.columns.name)
ax.set_xticks(np.arange(len(df.columns)))
ax.set_xticklabels(list(df.columns))
ax.set_ylabel(df.index.name)
ax.set_yticks(np.arange(len(df.index)))
ax.set_yticklabels(list(df.index))
ax.set_title("Fama French 1 Month Returns")
pyplot.colorbar(axim)
#savefig('sample.png')
#: Plot our heatmap
heat_map(monthly_returns)
print monthly_returns
#### This is the master version ####
#import datetime
#Trade_Dates_2 = local_csv('Trade_Dates_2.csv', date_column='Date')\
# .sort_index(ascending=True)
trade_dates = get_pricing(
['SPY'],
fields='close_price',
start_date='2016-01-01',
end_date = '2016-01-10',
frequency='daily',
)
def heat_map(df):
fig = pyplot.figure()
ax = fig.add_subplot(111)
axim = ax.imshow(df.values,clim=(-0.15, 0.15), cmap = pyplot.get_cmap('RdYlGn'), interpolation = 'quadric')
#axim = ax.imshow(df.values,cmap = 'hot', interpolation = 'gaussian')
ax.set_xlabel(df.columns.name)
ax.set_xticks(np.arange(len(df.columns)))
ax.set_xticklabels(list(df.columns))
ax.set_ylabel(df.index.name)
ax.set_yticks(np.arange(len(df.index)))
ax.set_yticklabels(list(df.index))
ax.grid(b=False)
#ax.set_ybound(lower=-0.30, upper=0.30)
#ax.set_title("Fama French 1 Month Returns")
ax.set_title(date)
pyplot.colorbar(axim)
for date in trade_dates.index:
#for date in Trade_Dates_2.index:
results = run_pipeline(pipe, date, date)
results = results.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
results = results[results.Sector != 101]
results['ME_Decile'] = pd.qcut(results['ME'], 10, labels=False) + 1
results['BE_ME_Decile'] = pd.qcut(results['BE_ME'], 10, labels=False) + 1
results['FF_Matrix'] = results['ME_Decile'] * results['BE_ME_Decile']
BE_ME_Portfolio = [x for x in np.arange(1, 10, 1)]
ME_Portfolio = [x for x in np.arange(1, 10, 1)]
monthly_returns = defaultdict(dict)
for decile_BE in BE_ME_Portfolio:
for decile_ME in ME_Portfolio:
Portfolio = results.loc[(results['BE_ME_Decile'] == decile_BE) & (results['ME_Decile'] == decile_ME)]
monthly_return = np.mean(Portfolio['Month'])
monthly_returns[decile_BE][decile_ME] = monthly_return
monthly_returns = pd.DataFrame(monthly_returns)
monthly_returns.index.name = "ME Decile"
monthly_returns.columns.name = "BE_ME Decile"
#: Plot our heatmap
heat_map(monthly_returns)
import numpy as np
from sklearn.cluster import MeanShift# as ms
from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import style
style.use("ggplot")
cluster_cols = ['BE_ME','ME','Month']
X = np.array(results[cluster_cols].values)
ms = MeanShift()
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
n_clusters_ = len(np.unique(labels))
print("Number of estimated clusters:", n_clusters_)
colors = 10*['r','g','b','c','k','y','m']
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 12
fig_size[1] = 9
plt.rcParams["figure.figsize"] = fig_size
for i in range(len(X)):
ax.scatter(X[i][0], X[i][1], X[i][2], c=colors[labels[i]], marker='o')
ax.scatter(cluster_centers[:,0],cluster_centers[:,1],cluster_centers[:,2],
marker="x",color='k', s=150, linewidths = 5, zorder=10)
plt.title("Fama French Returns")
ax.set_xlabel('BE_ME')
ax.set_ylabel('ME')
ax.set_zlabel('Monthly Return')
plt.show()