from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.research import run_pipeline
from quantopian.pipeline.data.user_5aba7037bad61a0013e6e687 import test
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.filters import Q500US
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import talib
from datetime import datetime, timedelta, date
from scipy.stats.mstats import gmean
class PreviousClose(CustomFactor):
inputs = [USEquityPricing.close]
window_length = 1
def compute(self, today, assets, out, close):
out[:] = close[0]
class PreviousOpen(CustomFactor):
inputs = [USEquityPricing.open]
window_length = 1
def compute(self, today, assets, out, open):
out[:] = open[0]
def make_pipeline():
# Define our fundamental factor pipeline
pipe = Pipeline()
# PreviousClose(window_length = 1) is the same as USEquityPricing.close.latest
previous_close_price = PreviousClose(window_length = 2)
open_price = USEquityPricing.open.latest
close_price = USEquityPricing.close.latest
overnight_returns = open_price/previous_close_price - 1
OR = overnight_returns + 1
intraday_returns = close_price / open_price - 1
first_30 = test.first_30.latest
last_30 = test.last_30.latest
pipe = Pipeline(
columns = {
'Overnight Returns': overnight_returns,
'OR': OR,
'Intraday Returns': intraday_returns,
'First 30': first_30,
'Last 30': last_30
},
screen = Q500US()
)
return pipe
pipe = make_pipeline()
df = run_pipeline(pipe, '2006-01-03', '2018-11-08').dropna()
df.head(10)
df= df.reset_index()
df.rename(columns={'level_0':'date', 'level_1': 'ticker'}, inplace=True)
df.head(5)
earnings = pd.DataFrame(local_csv(path='bespoke_4Jun_2018.csv' , thousands=','))
#: Data cleaning
earnings.columns = (['symbol', 'date', 'time', 'prior_close', 'eps_act', \
'eps_est', 'eps_actvsest', 'rev_act', 'rev_est', 'rev_actvsest',\
'guidance', 'gap%', 'gap_pt', 'open_to_close%', 'open_to_close_pt',\
'day_chng%', 'day_chng_pt'])
earnings['date']=pd.to_datetime(earnings['date'])
def remove_earnings(data, earnings_df=earnings):
ticker = data['ticker'].unique()
earnings_stock_dates = earnings_df.loc[earnings_df.symbol==ticker[0], 'date']
dates = data[~data.date.isin(earnings_stock_dates)].copy()
del dates['ticker']
return dates
def get_RSI_first30(data, column='First 30', timeperiod=7):
data['RSI_'+str(column)] = talib.RSI(np.array(data[column], dtype='f8'), timeperiod)
data['RSI_'+ str(column)] = data['RSI_'+str(column)].shift()
return data
def get_RSI_last30(data, column='Last 30', timeperiod=7):
data['RSI_'+str(column)] = talib.RSI(np.array(data[column], dtype='f8'), timeperiod)
data['RSI_'+ str(column)] = data['RSI_'+str(column)].shift()
return data
def get_RSI_intraday(data, column='Intraday Returns', timeperiod=7):
data['RSI_'+str(column)] = talib.RSI(np.array(data[column], dtype='f8'), timeperiod)
data['RSI_'+ str(column)] = data['RSI_'+str(column)].shift()
return data
def get_RSI_overnight(data, column='Overnight Returns', timeperiod=7):
data['RSI_'+str(column)] = talib.RSI(np.array(data[column], dtype='f8'), timeperiod)
data['RSI_'+ str(column)] = data['RSI_'+str(column)].shift()
return data
df = df.groupby('ticker').apply(get_RSI_first30).dropna()
df = df.groupby('ticker').apply(get_RSI_last30).dropna()
df = df.groupby('ticker').apply(get_RSI_intraday).dropna()
df = df.groupby('ticker').apply(get_RSI_overnight).dropna()
df.head(5)
df['First 30 Dec'] = pd.qcut(df['First 30'],10,labels = False)
df['Last 30 Dec'] = pd.qcut(df['Last 30'],10,labels = False)
df['Intraday Dec'] = pd.qcut(df['Intraday Returns'],10,labels = False)
df['Overnight Dec'] = pd.qcut(df['Overnight Returns'],10,labels = False)
df['RSI_First 30 Dec'] = pd.qcut(df['RSI_First 30'],10,labels = False)
df['RSI_Last 30 Dec'] = pd.qcut(df['RSI_Last 30'],10,labels = False)
df['RSI_Intraday Dec'] = pd.qcut(df['RSI_Intraday Returns'],10,labels = False)
df['RSI_Overnight Dec'] = pd.qcut(df['RSI_Overnight Returns'],10,labels = False)
df = df.dropna()
df.head(10)
# Heatmap of RSI Intraday Dec/RSI Overnight Dec with mean of Overnight Returns
a = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_Overnight Dec'])['Overnight Returns'].mean())
sns.heatmap(a.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_Overnight Dec')
plt.title('RSI Intraday vs RSI Overnight with mean of Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI First 30 Dec with mean of Overnight Returns
b = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_First 30 Dec'])['Overnight Returns'].mean())
sns.heatmap(b.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_First 30 Dec')
plt.title('RSI Intraday vs RSI First 30 with mean of Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI Last 30 Dec with mean of Overnight Returns
c = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].mean())
sns.heatmap(c.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI Intraday vs RSI Last 30 with mean of Overnight Returns')
# Heatmap of RSI Overnight Dec/RSI First 30 Dec with mean of Overnight Returns
d = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_First 30 Dec'])['Overnight Returns'].mean())
sns.heatmap(d.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_Overnight Dec')
plt.ylabel('RSI_First 30 Dec')
plt.title('RSI Overnight vs RSI First 30 with mean of Overnight Returns')
# Heatmap of RSI Overnight Dec/RSI Last 30 Dec with mean of Overnight Returns
e = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].mean())
sns.heatmap(e.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_Overnight Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI Overnight vs RSI Last 30 with mean of Overnight Returns')
# Heatmap of RSI First 30 Dec/RSI Last 30 Dec with mean of Overnight Returns
f = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].mean())
sns.heatmap(f.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_First 30 Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI First 30 vs RSI Last 30 with mean of Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI Overnight Dec with standard deviation of Overnight Returns
g = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_Overnight Dec'])['Overnight Returns'].std())
sns.heatmap(g.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_Overnight Dec')
plt.title('RSI Intraday vs RSI Overnight with standard deviation of Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI First 30 Dec with standard deviation of Overnight Returns
h = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_First 30 Dec'])['Overnight Returns'].std())
sns.heatmap(h.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_First 30 Dec')
plt.title('RSI Intraday vs RSI First 30 with standard deviation of Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI Last 30 Dec with standard deviation of Overnight Returns
i = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].std())
sns.heatmap(i.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI Intraday vs RSI Last 30 with standard deviation of Overnight Returns')
# Heatmap of RSI Overnight Dec/RSI First 30 Dec with standard deviation of Overnight Returns
j = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_First 30 Dec'])['Overnight Returns'].std())
sns.heatmap(j.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_Overnight Dec')
plt.ylabel('RSI_First 30 Dec')
plt.title('RSI Overnight vs RSI First 30 with standard deviation of Overnight Returns')
# Heatmap of RSI Overnight Dec/RSI Last 30 Dec with standard deviation of Overnight Returns
k = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].std())
sns.heatmap(k.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_Overnight Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI Overnight vs RSI Last 30 with standard deviation of Overnight Returns')
# Heatmap of RSI First 30 Dec/RSI Last 30 Dec with standard deviation of Overnight Returns
l = pd.Series(df.groupby(['RSI_First 30 Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].std())
sns.heatmap(l.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_First 30 Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI First 30 vs RSI Last 30 with standard deviation of Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI Overnight Dec with geometric mean of Overnight Returns
m = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_Overnight Dec'])['OR'].apply(gmean))
sns.heatmap(m.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = '.2%')
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_Overnight Dec')
plt.title('RSI Intraday vs RSI Overnight with geometric mean of Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI First 30 Dec with geometric mean of Overnight Returns
n = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_First 30 Dec'])['OR'].apply(gmean))
sns.heatmap(n.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = '.2%')
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_First 30 Dec')
plt.title('RSI Intraday vs RSI First 30 with geometric mean of Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI Last 30 Dec with geometric mean of Overnight Returns
o = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_Last 30 Dec'])['OR'].apply(gmean))
sns.heatmap(o.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = '.2%')
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI Intraday vs RSI Last 30 with geometric mean of Overnight Returns')
# Heatmap of RSI Overnight Dec/RSI First 30 Dec with geometric mean of Overnight Returns
p = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_First 30 Dec'])['OR'].apply(gmean))
sns.heatmap(p.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = '.2%')
plt.xlabel('RSI_Overnight Dec')
plt.ylabel('RSI_First 30 Dec')
plt.title('RSI Overnight vs RSI First 30 with geometric mean of Overnight Returns')
# Heatmap of RSI Overnight Dec/RSI First 30 Dec with geometric mean of Overnight Returns
q = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_Last 30 Dec'])['OR'].apply(gmean))
sns.heatmap(q.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = '.2%')
plt.xlabel('RSI_Overnight Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI Overnight vs RSI Last 30 with geometric mean of Overnight Returns')
# Heatmap of RSI Overnight Dec/RSI Last 30 Dec with geometric mean of Overnight Returns
r = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_Last 30 Dec'])['OR'].apply(gmean))
sns.heatmap(r.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = '.2%')
plt.xlabel('RSI_Overnight Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI Overnight vs RSI Last 30 with geometric mean of Overnight Returns')
# Heatmap of RSI First 30 Dec/RSI Last 30 Dec with geometric mean of Overnight Returns
s = pd.Series(df.groupby(['RSI_First 30 Dec', 'RSI_Last 30 Dec'])['OR'].apply(gmean))
sns.heatmap(s.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = '.2%')
plt.xlabel('RSI_First 30 Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI First 30 vs RSI Last 30 with geometric mean of Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI Overnight Dec with number of observations from Overnight Returns
t = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_Overnight Dec'])['Overnight Returns'].count())
sns.heatmap(t.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = 'g')
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_Overnight Dec')
plt.title('RSI Intraday vs RSI Overnight with counts of observations from Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI First 30 Dec with number of observations from Overnight Returns
u = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_First 30 Dec'])['Overnight Returns'].count())
sns.heatmap(u.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = 'g')
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_First 30 Dec')
plt.title('RSI Intraday vs RSI First 30 with counts of observations from Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI Last 30 Dec with number of observations from Overnight Returns
v = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].count())
sns.heatmap(v.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = 'g')
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI Intraday vs RSI Last 30 with counts of observations from Overnight Returns')
# Heatmap of RSI Overnight Dec/RSI First 30 Dec with number of observations from Overnight Returns
w = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_First 30 Dec'])['Overnight Returns'].count())
sns.heatmap(w.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = 'g')
plt.xlabel('RSI_Overnight Dec')
plt.ylabel('RSI_First 30 Dec')
plt.title('RSI Overnight vs RSI First 30 with counts of observations from Overnight Returns')
# Heatmap of RSI Overnight Dec/RSI Last 30 Dec with number of observations from Overnight Returns
x = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].count())
sns.heatmap(x.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = 'g')
plt.xlabel('RSI_Overnight Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI Overnight vs RSI Last 30 with counts of observations from Overnight Returns')
# Heatmap of RSI First 30 Dec/RSI Last 30 Dec with number of observations from Overnight Returns
y = pd.Series(df.groupby(['RSI_First 30 Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].count())
sns.heatmap(y.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = 'g')
plt.xlabel('RSI_First 30 Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI First 30 vs RSI Last 30 with counts of observations from Overnight Returns')
def pt_succeed_rate(data):
sc = pd.Series(data > 0)
pt = float(sum(sc))/float(len(data))
return pt
# Heatmap of RSI Intraday Dec/RSI Overnight Dec with succeed rate of Overnight Returns
z = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_Overnight Dec'])['Overnight Returns'].apply(pt_succeed_rate))
sns.heatmap(z.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = '.2%')
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_Overnight Dec')
plt.title('RSI Intraday vs RSI Overnight with succeed rate of Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI First 30 Dec with succeed rate of Overnight Returns
x1 = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_First 30 Dec'])['Overnight Returns'].apply(pt_succeed_rate))
sns.heatmap(x1.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = '.2%')
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_First 30 Dec')
plt.title('RSI Intraday vs RSI First 30 with succeed rate of Overnight Returns')
# Heatmap of RSI Intraday Dec/RSI Last 30 Dec with succeed rate of Overnight Returns
y1 = pd.Series(df.groupby(['RSI_Intraday Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].apply(pt_succeed_rate))
sns.heatmap(y1.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = '.2%')
plt.xlabel('RSI_Intraday Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI Intraday vs RSI Last 30 with succeed rate of Overnight Returns')
# Heatmap of RSI Overnight Dec/RSI First 30 Dec with number of observations from Overnight Returns
y1 = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_First 30 Dec'])['Overnight Returns'].apply(pt_succeed_rate))
sns.heatmap(y1.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True)
plt.xlabel('RSI_Overnight Dec')
plt.ylabel('RSI_First 30 Dec')
plt.title('RSI Overnight vs RSI First 30 with counts of observations from Overnight Returns')
# Heatmap of RSI Overnight Dec/RSI Last 30 Dec with succeed rate of Overnight Returns
z1 = pd.Series(df.groupby(['RSI_Overnight Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].apply(pt_succeed_rate))
sns.heatmap(z1.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt ='.2%')
plt.xlabel('RSI_Overnight Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI Overnight vs RSI Last 30 with succeed rate of Overnight Returns')
# Heatmap of RSI First 30 Dec/RSI Last 30 Dec with succeed rate of Overnight Returns
z3 = pd.Series(df.groupby(['RSI_First 30 Dec', 'RSI_Last 30 Dec'])['Overnight Returns'].apply(pt_succeed_rate))
sns.heatmap(z3.unstack(), linewidths = 1, cmap = 'RdYlGn', xticklabels=True, yticklabels=True, robust=True, annot=True, fmt = '.2%')
plt.xlabel('RSI_First 30 Dec')
plt.ylabel('RSI_Last 30 Dec')
plt.title('RSI First 30 vs RSI Last 30 with succeed rate of Overnight Returns')