import numpy as np
import pandas as pd
from scipy import stats
from pytz import timezone
import datetime
import math
import time
from pykalman import KalmanFilter
from pandas.stats.api import ols
from sklearn.decomposition import FastICA, PCA
from sklearn import cluster
from sklearn import covariance
from statsmodels.stats.stattools import jarque_bera
from statsmodels.tsa.stattools import adfuller
import functools
from statsmodels.tsa.stattools import coint
import random
import itertools
from statsmodels.stats.moment_helpers import cov2corr, corr2cov, se_cov
from cvxopt import matrix
import cvxopt
cvxopt.solvers.options['show_progress'] = False
import matplotlib.pyplot as plt
import seaborn as sns
today = pd.Timestamp("2014-01-01")
start = pd.Timestamp("2006-01-01")
tickers = [
# BONDS
"TLT", # extended-duration USA treasury # replace with EDV in actual portfolio
# "BNDX", # international bonds short history! cannot bootstrap, have to plug in by hand after
# US EQUITY SECTORS
# "VPU", # utilities
# "VAW", # materials
# "VCR", # consumer discretionary
"VDC", # consumer staples
# "VDE", # energy
# "VFH", # financials
# "VGT", # information technology
# "VHT", # healthcare
# "VIS", # industrials
# "VOX", # telecom
# GLOBAL EQUITY
# "VPL", # pacific rim equity
# "VWO", # emerging markets equity
# "VGK", # europe equity
# ALTS
# "IAU", # gold trust
# "VNQ", # USA reit
# "DBC", # Commodities
]
sids = [ symbols(t) for t in tickers ]
closes = get_pricing(sids, start_date=start, end_date=today, frequency='daily', fields='price')
returns = np.log(closes).diff().fillna(0)
VolHalfLife = 5*4*3 # 12 weeks aka three months
def foothill_std(returns):
a = pd.ewmstd(returns, halflife=VolHalfLife, adjust=True).dropna()
return a
def floor_corr(corr):
corr[corr<0] = 0
return corr
stds = foothill_std(returns)
standardized_returns = (returns / stds).dropna()
standardized_corrs = pd.expanding_corr(standardized_returns, min_periods=VolHalfLife*2.0) # makes a Panel
standardized_corrs.dropna(inplace=True)
# adapted from https://wellecks.wordpress.com/2014/03/23/portfolio-optimization-with-python
def foothill_constrained_minvar(corr, std):
cov = corr2cov(corr, std)
n = len(corr.columns)
# assume equal returns
vu = np.ones(n)
# get into canonical form for cvxopt
P = matrix(cov)
q = matrix(np.zeros((n, 1)))
# inequality constraints Gx <= h
# captures the constraint (x >= 0)
G = matrix(-np.identity(n))
h = matrix(np.zeros((n,1)))
# equality constraint Ax = b; captures the constraint sum(x) == 1
A = matrix(1.0, (1,n))
b = matrix(1.0)
sol = cvxopt.solvers.qp(P, q, G, h, A, b)
return pd.Series(sol['x'], index=corr.columns)
standardized_returns.cumsum().plot(colormap='gist_rainbow')
def calc_allocations(corrs, stds, f):
allocations = [ f(floor_corr(corrs[t]), stds.loc[t]) for t in standardized_corrs.axes[0] ]
dfa = pd.DataFrame(allocations, index=standardized_corrs.axes[0])
return dfa
dfa = calc_allocations(standardized_corrs, stds, foothill_constrained_minvar)
dfa.plot(kind="area", title="constrained single-period optimizations",colormap='gist_rainbow')
BootstrapSampleSize = 6*4*5 # roughly six months of returns per sample
BootstrapSamplesPer = 100
randstate = np.random.RandomState()
def bootstrap_single_sample(returns):
std = returns.std()
# minor lookahead bias here, but we're in the middle of a bootstrap, so it's not the real future
standardized_returns = returns / std
standardized_corr = floor_corr(standardized_returns.corr())
x = foothill_constrained_minvar(standardized_corr, std)
return x
def sample_returns(returns, n):
rdf = pd.DataFrame(returns)
df = rdf.sample(n, replace=False, random_state = randstate)
return df
def bootstrapped_allocation(returns, sample_size, sample_count):
raw_allocs = [ bootstrap_single_sample(sample_returns(returns, sample_size)) for i in range(0,sample_count)]
allocs = pd.DataFrame(raw_allocs)
return allocs.mean()
# may as well start this on the standardized returns, re-standardizing won't hurt
window_ends = list(range(BootstrapSampleSize,len(standardized_returns),BootstrapSampleSize/4))
raw_bootstrap_allocs = [ bootstrapped_allocation(standardized_returns.iloc[0:x],
BootstrapSampleSize,
BootstrapSamplesPer)
for x in window_ends ]
index = [ standardized_returns.index[x] for x in window_ends ]
bootstrap_allocs = pd.DataFrame(raw_bootstrap_allocs, index=index)
bootstrap_allocs.plot(kind="area", title="constrained bootstrapped optimizations", colormap='gist_rainbow')
bootstrap_allocs.iloc[-1]
# we really shouldn't be bothering with allocations under 2%, so lets floor them
massaged_allocs = bootstrap_allocs.copy()
massaged_allocs[massaged_allocs<0.02] = 0.02
# and renormalize
massaged_allocs = massaged_allocs / massaged_allocs.sum(axis=1)
massaged_allocs.plot(kind="area", title="constrained bootstrapped optimizations", colormap='gist_rainbow')
massaged_allocs.iloc[-1]
# that actually looked pretty reasonable! a little heavy on the gold, but perhaps that is my bias!
# How does the portfolio perform?
port = massaged_allocs.copy()
port_rets = (port.resample("1B",fill_method='ffill') * returns).fillna(0).sum(axis=1)
port_rets.cumsum().plot()