Notebook
In [20]:
from collections import deque
In [5]:
start = '2015-09-01'
end = '2016-04-20'
asset = get_pricing('MSFT', fields='price', start_date=start, end_date=end)
dates = asset.index
In [59]:
def getBottoms(prices, n=1):
    q = deque()
    bottoms = []
    for idx, p in enumerate(prices):
        if len(q) == 0:
            q.append(p)
        elif len(q) < n:
            if p < q[-1]:
                q.append(p)
            else:
                q.clear()
        elif len(q) == n:
            if p < q[-1]:
                q.append(p)
                bottoms.append((idx,p))
            else:
                q.clear()
        elif len(q) > n:
            if len(q) == 2*n+1:
                q.clear()
                q.append(p)
            else:
                if p > q[-1]:
                    q.append(p)
                else:
                    q.clear()
                    del bottoms[-1]
    return bottoms
        
In [58]:
def getTops(prices, n=1):
    q = deque()
    tops = []
    for idx, p in enumerate(prices):
        if len(q) == 0:
            q.append(p)
        elif len(q) < n:
            if p > q[-1]:
                q.append(p)
            else:
                q.clear()
        elif len(q) == n:
            if p > q[-1]:
                q.append(p)
                tops.append((idx,p))
            else:
                q.clear()
        elif len(q) > n:
            if len(q) == 2*n+1:
                q.clear()
                q.append(p)
            else:
                if p < q[-1]:
                    q.append(p)
                else:
                    q.clear()
                    del tops[-1]
    return tops
In [54]:
import numpy as np
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt

def plotTurningPoints(price_series, n=1):
    ''' Returns Tops and Bottoms of the inputed price series as a tuple
    '''
    # quick and dirty way to get bottoms and tops without calculating 2nd derivative
    bottoms = getBottoms(price_series,n)
    tops = getTops(price_series,n)

    _, ax = plt.subplots(1, sharex=True)
    
    ax.plot(price_series)
    ticks = ax.get_xticks()
    ax.set_xticklabels([price_series.index[i].date() for i in ticks[:-1]]) # Label x-axis with dates
    ax.set_title('Share Price with Turning Points')
    ax.plot([b[0] for b in bottoms], price_series[[b[0] for b in bottoms]], 'gD')
    ax.plot([t[0] for t in tops], price_series[[t[0] for t in tops]], 'rD')

    return (bottoms, tops)
In [60]:
bottoms, tops = plotTurningPoints(asset, 1)
In [139]:
asset.values.shape
Out[139]:
(412,)
In [26]:
max = asset[bottoms].max()
min = asset[bottoms].min()
In [28]:
max, min
Out[28]:
(55.7, 40.71)
In [29]:
from sklearn.cluster import DBSCAN
In [45]:
# Compute DBSCAN
X = asset[bottoms]
db = DBSCAN(eps=2, min_samples=2).fit(X.values.T)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print('Estimated number of clusters: %d' % n_clusters_)
Estimated number of clusters: 0
In [87]:
from sklearn.neighbors import KernelDensity
In [95]:
def fit_KDE(X,num_bins=10):
    bins = np.linspace(X.min(),X.max(),num_bins)
    X_plot = np.linspace(min, max, 1000)[:,np.newaxis]
    fig, ax = plt.subplots(2,2, sharex=True, sharey=True)
    #fig.subplots_adjust(hspace=0.05, wspace=0.05)

    # histogram 1
    ax[0, 0].hist(X[:, 0], bins=bins, fc='#AAAAFF', normed=True)
    ax[0, 0].set_title("Histogram")

    # histogram 2
    ax[0, 1].hist(X[:, 0], bins=bins + 0.75, fc='#AAAAFF', normed=True)
    ax[0, 1].set_title("Histogram, bins shifted")

    # tophat KDE
    kde = KernelDensity(kernel='tophat', bandwidth=0.75).fit(X)
    log_dens = kde.score_samples(X_plot)
    #ax[1,0].plot(X_plot, np.exp(log_dens))
    #ax[1, 0].fill(X_plot[:, 0], np.exp(log_dens), fc='#AAAAFF')
    ax[1, 0].fill_between(X_plot[:,0], np.exp(log_dens), facecolor='#AAAAFF')
    ax[1, 0].set_title("Tophat Kernel Density")

    # Gaussian KDE
    kde = KernelDensity(kernel='gaussian', bandwidth=0.75).fit(X)
    log_dens = kde.score_samples(X_plot)
    ax[1, 1].fill_between(X_plot[:, 0], np.exp(log_dens), facecolor='#AAAAFF')
    ax[1, 1].set_title("Gaussian Kernel Density")
    return kde
In [96]:
X_bot = asset[bottoms][:,np.newaxis]
X_top = asset[tops][:,np.newaxis]

bottom_kde = fit_KDE(X_bot)
top_kde = fit_KDE(X_top)
In [106]:
bottom_dist = np.exp(bottom_kde.score_samples(np.linspace(40, 60, 1000)[:,np.newaxis]))
plt.plot(bottom_dist)
Out[106]:
[<matplotlib.lines.Line2D at 0x7f0834ac1c50>]
In [140]:
#bottom_dist.values
x = np.arange(0,len(bottom_dist),15)
x.shape == bottom_dist[::15].shape
np.array(bottom_dist).shape
#len(x) == shape(bottom_dist[::15])
#bottoms, tops = TurningPoints(bottom_dist, step_size=15, plot=False)
Out[140]:
(1000,)
In [63]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn.neighbors import KernelDensity


#----------------------------------------------------------------------
# Plot the progression of histograms to kernels
np.random.seed(1)
N = 20
X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
                    np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
bins = np.linspace(-5, 10, 10)

fig, ax = plt.subplots(2, 2, sharex=True, sharey=True)
fig.subplots_adjust(hspace=0.05, wspace=0.05)

# histogram 1
ax[0, 0].hist(X[:, 0], bins=bins, fc='#AAAAFF', normed=True)
ax[0, 0].text(-3.5, 0.31, "Histogram")

# histogram 2
ax[0, 1].hist(X[:, 0], bins=bins + 0.75, fc='#AAAAFF', normed=True)
ax[0, 1].text(-3.5, 0.31, "Histogram, bins shifted")

# tophat KDE
kde = KernelDensity(kernel='tophat', bandwidth=0.75).fit(X)
log_dens = kde.score_samples(X_plot)
ax[1, 0].fill(X_plot[:, 0], np.exp(log_dens), fc='#AAAAFF')
ax[1, 0].text(-3.5, 0.31, "Tophat Kernel Density")

# Gaussian KDE
kde = KernelDensity(kernel='gaussian', bandwidth=0.75).fit(X)
log_dens = kde.score_samples(X_plot)
ax[1, 1].fill(X_plot[:, 0], np.exp(log_dens), fc='#AAAAFF')
ax[1, 1].text(-3.5, 0.31, "Gaussian Kernel Density")

for axi in ax.ravel():
    axi.plot(X[:, 0], np.zeros(X.shape[0]) - 0.01, '+k')
    axi.set_xlim(-4, 9)
    axi.set_ylim(-0.02, 0.34)

for axi in ax[:, 0]:
    axi.set_ylabel('Normalized Density')

for axi in ax[1, :]:
    axi.set_xlabel('x')

#----------------------------------------------------------------------
# Plot all available kernels
X_plot = np.linspace(-6, 6, 1000)[:, None]
X_src = np.zeros((1, 1))

fig, ax = plt.subplots(2, 3, sharex=True, sharey=True)
fig.subplots_adjust(left=0.05, right=0.95, hspace=0.05, wspace=0.05)


def format_func(x, loc):
    if x == 0:
        return '0'
    elif x == 1:
        return 'h'
    elif x == -1:
        return '-h'
    else:
        return '%ih' % x

for i, kernel in enumerate(['gaussian', 'tophat', 'epanechnikov',
                            'exponential', 'linear', 'cosine']):
    axi = ax.ravel()[i]
    log_dens = KernelDensity(kernel=kernel).fit(X_src).score_samples(X_plot)
    axi.fill(X_plot[:, 0], np.exp(log_dens), '-k', fc='#AAAAFF')
    axi.text(-2.6, 0.95, kernel)

    axi.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
    axi.xaxis.set_major_locator(plt.MultipleLocator(1))
    axi.yaxis.set_major_locator(plt.NullLocator())

    axi.set_ylim(0, 1.05)
    axi.set_xlim(-2.9, 2.9)

ax[0, 1].set_title('Available Kernels')

#----------------------------------------------------------------------
# Plot a 1D density example
N = 100
np.random.seed(1)
X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
                    np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]

X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]

true_dens = (0.3 * norm(0, 1).pdf(X_plot[:, 0])
             + 0.7 * norm(5, 1).pdf(X_plot[:, 0]))

fig, ax = plt.subplots()
ax.fill(X_plot[:, 0], true_dens, fc='black', alpha=0.2,
        label='input distribution')

for kernel in ['gaussian', 'tophat', 'epanechnikov']:
    kde = KernelDensity(kernel=kernel, bandwidth=0.5).fit(X)
    log_dens = kde.score_samples(X_plot)
    ax.plot(X_plot[:, 0], np.exp(log_dens), '-',
            label="kernel = '{0}'".format(kernel))

ax.text(6, 0.38, "N={0} points".format(N))

ax.legend(loc='upper left')
ax.plot(X[:, 0], -0.005 - 0.01 * np.random.random(X.shape[0]), '+k')

ax.set_xlim(-4, 9)
ax.set_ylim(-0.02, 0.4)
plt.show()
In [ ]: