Notebook
In [9]:
import numpy as np
import pandas as pd
from scipy import stats
from pytz import timezone
import datetime
import math
import time
from pykalman import KalmanFilter
from pandas.stats.api import ols
from sklearn.decomposition import FastICA, PCA
from sklearn import cluster
from sklearn import covariance
from statsmodels.stats.stattools import jarque_bera
from statsmodels.tsa.stattools import adfuller 
import functools
from statsmodels.tsa.stattools import coint
import random
import itertools
from statsmodels.stats.moment_helpers import cov2corr, corr2cov, se_cov
from cvxopt import matrix
import cvxopt
cvxopt.solvers.options['show_progress'] = False

import matplotlib.pyplot as plt
import seaborn as sns
today = pd.Timestamp("2014-01-01")
start = pd.Timestamp("2006-01-01")

tickers = [
    # BONDS
    "TLT",  # extended-duration USA treasury # replace with EDV in actual portfolio
#    "BNDX", # international bonds short history!  cannot bootstrap, have to plug in by hand after
    # US EQUITY SECTORS
#    "VPU", # utilities
#    "VAW", # materials
#    "VCR", # consumer discretionary
    "VDC", # consumer staples
#    "VDE", # energy
#    "VFH", # financials
#    "VGT", # information technology
#    "VHT", # healthcare
#    "VIS", # industrials
#    "VOX", # telecom
    # GLOBAL EQUITY
#    "VPL", # pacific rim equity
#    "VWO", # emerging markets equity
#    "VGK", # europe equity
    # ALTS
#    "IAU", # gold trust
#    "VNQ", # USA reit
#    "DBC", # Commodities
    ]
sids = [ symbols(t) for t in tickers ]
closes = get_pricing(sids, start_date=start, end_date=today, frequency='daily', fields='price')
returns = np.log(closes).diff().fillna(0)
In [10]:
VolHalfLife = 5*4*3 # 12 weeks aka three months

def foothill_std(returns):
    a = pd.ewmstd(returns, halflife=VolHalfLife, adjust=True).dropna()
    return a

def floor_corr(corr):
    corr[corr<0] = 0
    return corr

stds = foothill_std(returns)
standardized_returns = (returns / stds).dropna()
standardized_corrs = pd.expanding_corr(standardized_returns, min_periods=VolHalfLife*2.0) # makes a Panel
standardized_corrs.dropna(inplace=True)

# adapted from https://wellecks.wordpress.com/2014/03/23/portfolio-optimization-with-python
def foothill_constrained_minvar(corr, std):
    cov = corr2cov(corr, std)
    n = len(corr.columns)
    # assume equal returns
    vu = np.ones(n)
    # get into canonical form for cvxopt
    P = matrix(cov)
    q = matrix(np.zeros((n, 1)))
    # inequality constraints Gx <= h
    # captures the constraint (x >= 0)
    G = matrix(-np.identity(n))
    h = matrix(np.zeros((n,1)))
    # equality constraint Ax = b; captures the constraint sum(x) == 1
    A = matrix(1.0, (1,n))
    b = matrix(1.0)
    sol = cvxopt.solvers.qp(P, q, G, h, A, b)
    return pd.Series(sol['x'], index=corr.columns)

standardized_returns.cumsum().plot(colormap='gist_rainbow')
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd19e380350>
In [11]:
def calc_allocations(corrs, stds, f):
    allocations = [ f(floor_corr(corrs[t]), stds.loc[t]) for t in standardized_corrs.axes[0] ]
    dfa = pd.DataFrame(allocations, index=standardized_corrs.axes[0])
    return dfa

dfa = calc_allocations(standardized_corrs, stds, foothill_constrained_minvar)
In [12]:
dfa.plot(kind="area", title="constrained single-period optimizations",colormap='gist_rainbow')
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd19e590250>
In [13]:
BootstrapSampleSize = 6*4*5 # roughly six months of returns per sample
BootstrapSamplesPer = 100

randstate = np.random.RandomState()

def bootstrap_single_sample(returns):
    std = returns.std()
    # minor lookahead bias here, but we're in the middle of a bootstrap, so it's not the real future
    standardized_returns = returns / std 
    standardized_corr = floor_corr(standardized_returns.corr())
    x = foothill_constrained_minvar(standardized_corr, std)
    return x

def sample_returns(returns, n):
    rdf = pd.DataFrame(returns)
    df = rdf.sample(n, replace=False, random_state = randstate)
    return df

def bootstrapped_allocation(returns, sample_size, sample_count):
    raw_allocs = [ bootstrap_single_sample(sample_returns(returns, sample_size)) for i in range(0,sample_count)]
    allocs = pd.DataFrame(raw_allocs)
    return allocs.mean()

# may as well start this on the standardized returns, re-standardizing won't hurt
window_ends = list(range(BootstrapSampleSize,len(standardized_returns),BootstrapSampleSize/4))

raw_bootstrap_allocs = [ bootstrapped_allocation(standardized_returns.iloc[0:x], 
                                                 BootstrapSampleSize, 
                                                 BootstrapSamplesPer)
                         for x in window_ends ]
In [14]:
index = [ standardized_returns.index[x] for x in window_ends ]
bootstrap_allocs = pd.DataFrame(raw_bootstrap_allocs, index=index)
bootstrap_allocs.plot(kind="area", title="constrained bootstrapped optimizations", colormap='gist_rainbow')
bootstrap_allocs.iloc[-1]
Out[14]:
Equity(23921 [TLT])    0.490056
Equity(25903 [VDC])    0.509944
Name: 2013-12-30 00:00:00+00:00, dtype: float64
In [15]:
# we really shouldn't be bothering with allocations under 2%, so lets floor them
massaged_allocs = bootstrap_allocs.copy()
massaged_allocs[massaged_allocs<0.02] = 0.02
# and renormalize
massaged_allocs = massaged_allocs / massaged_allocs.sum(axis=1)
massaged_allocs.plot(kind="area", title="constrained bootstrapped optimizations", colormap='gist_rainbow')
massaged_allocs.iloc[-1]
Out[15]:
Equity(23921 [TLT])    0.490056
Equity(25903 [VDC])    0.509944
Name: 2013-12-30 00:00:00+00:00, dtype: float64
In [16]:
# that actually looked pretty reasonable!  a little heavy on the gold, but perhaps that is my bias!
# How does the portfolio perform?
port = massaged_allocs.copy()
port_rets = (port.resample("1B",fill_method='ffill') * returns).fillna(0).sum(axis=1)
port_rets.cumsum().plot()
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd19e2d74d0>
In [ ]: