from quantopian.pipeline import Pipeline
from quantopian.pipeline import CustomFactor
from quantopian.research import run_pipeline
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.data.builtin import USEquityPricing
import datetime as dt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
import scipy
from statsmodels import regression
import seaborn as sns
import talib as ta
from quantopian.pipeline.data.quandl import yahoo_index_vix
class MarketCap(CustomFactor):
inputs = [USEquityPricing.close, morningstar.valuation.shares_outstanding]
window_length = 3
def compute(self, today, assets, out, close, shares):
out[:] = close[0] * shares[0]
class EV_to_EPS(CustomFactor):
inputs = [morningstar.valuation.enterprise_value, morningstar.earnings_report.basic_eps]
window_length = 3
def compute(self, today, assets, out, enterprise_value, basic_eps):
out[:] = enterprise_value[0]/basic_eps[0]
class Price_to_Earnings(CustomFactor):
inputs = [USEquityPricing.close, morningstar.earnings_report.basic_eps]
window_length = 3
def compute(self, today, assets, out, close, basic_eps):
out[:] = close[0]/basic_eps[0]
class Factor(CustomFactor):
inputs = [USEquityPricing.close, morningstar.balance_sheet.common_stock_equity,morningstar.valuation.shares_outstanding]
window_length = 3
def compute(self, today, assets, out, close, common_equity, shares):
out[:] = close[0]/(common_equity[0]/shares[0])
class F_Returns(CustomFactor):
inputs = [USEquityPricing.open]
window_length = 2
def compute(self, today, assets, out, open):
out[:] = open[-1]/open[0]-1
### Notes #######################
"""
We want to compute 1 day forward returns
So on day X before trading, we need the last 2 open prices
We get the return from X-2 to X-1
X-2 before trading is 3rd to last so window_length must be 3
"""
### Pipeline Parameters #########
factor = Factor()
f_returns = F_Returns()
#Stock liquidity and sector
liquidity = mkt_cap >= 1000000000
screen = (liquidity)
# NaNs are not include so len() ~= Max()
factor_rank = mkt_cap.rank(method='min',ascending=False,mask=screen) # Smaller the better
pipe = Pipeline()
pipe.add(factor_rank,'factor')
pipe.add(f_returns,'zf_returns')
pipe.set_screen(screen)
######### Simulation Parameters #########
# Simulation duration
start_date = '01/1/2014'
end_date = '04/18/2016'
data = run_pipeline(pipe,start_date=start_date,end_date=end_date)
# Create dataframe to store information
beta=pd.DataFrame(index=data.index.levels[0][:-1],columns=data.columns[:-1])
actual=pd.DataFrame(index=data.index.levels[0][:-1],columns=data.columns[:-1])
for i in range(len(data.index.levels[0])): # Run for every day except the last one
for j in range(len(data.columns)-1): # Run for each factor but not returns which is the last column
# Convert ranks to percentages so we can bucket
data[data.columns[j]].loc[data.index.levels[0][i]] = \
data[data.columns[j]].loc[data.index.levels[0][i]].values/max(data[data.columns[j]].loc[data.index.levels[0][i]])
# Sort into buckets dropna returns and calculate mean return
buckets = np.zeros(20)
buckets[0] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .00) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .05)].dropna().mean()
buckets[1] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .05) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .10)].dropna().mean()
buckets[2] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .10) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .15)].dropna().mean()
buckets[3] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .15) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .20)].dropna().mean()
buckets[4] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .20) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .25)].dropna().mean()
buckets[5] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .25) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .30)].dropna().mean()
buckets[6] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .30) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .35)].dropna().mean()
buckets[7] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .35) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .40)].dropna().mean()
buckets[8] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .40) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .45)].dropna().mean()
buckets[9] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .45) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .50)].dropna().mean()
buckets[10] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .50) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .55)].dropna().mean()
buckets[11] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .55) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .60)].dropna().mean()
buckets[12] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .60) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .65)].dropna().mean()
buckets[13] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .65) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .70)].dropna().mean()
buckets[14] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .70) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .75)].dropna().mean()
buckets[15] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .75) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .80)].dropna().mean()
buckets[16] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .80) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .85)].dropna().mean()
buckets[17] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .85) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .90)].dropna().mean()
buckets[18] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .90) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= .95)].dropna().mean()
buckets[19] = data['zf_returns'].loc[data.index.levels[0][i]] \
[(data[data.columns[j]].loc[data.index.levels[0][i]] > .95) \
& (data[data.columns[j]].loc[data.index.levels[0][i]] <= 1.0)].dropna().mean()
# Beta
x=range(len(buckets))
y=buckets
model = regression.linear_model.OLS(y,x).fit()
b = model.params[0]
beta[data.columns[j]].loc[data.index.levels[0][i]] = b*len(buckets)
actual[data.columns[j]].loc[data.index.levels[0][i]] = np.mean(buckets[-4:]) - np.mean(buckets[:4])
plt.plot(np.cumprod(actual[actual.columns]+1))
plt.legend(actual.columns)
factor_ema = ta.EMA(np.cumprod(actual['factor'].values.astype(float)+1))
cum_returns = np.cumprod(actual['factor'].values+1)
actual_returns = actual['factor'].values
plt.plot(factor_ema)
plt.plot(cum_returns)
true = cum_returns>factor_ema
returns = actual_returns[1:][true[:-1]]
np.mean(returns)/np.std(returns)