from quantopian.research import get_pricing
#from quantopian.pipeline.filters import Q1500US
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.factors import RollingSpearmanOfReturns, CustomFactor, SimpleMovingAverage, Latest, EWMA, EWMSTD, Returns, ExponentialWeightedMovingAverage, RSI, AverageDollarVolume
#from quantopian.pipeline.filters.morningstar import Q500US, Q1500US
#import math
from import USEquityPricing
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
from statsmodels import regression
import statsmodels.api as sm
def make_pipeline():
    pipe = Pipeline()
    # Screen out penny stocks and low liquidity securities.
    price = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=22)
    volume = SimpleMovingAverage(inputs=[USEquityPricing.volume], window_length=22)
    price_filter_high  = (price <= 60.0)
    price_filter_low = (price >= 20.0)
    volume_filter  = (volume >= 1000000)
    rolling_correlations = RollingSpearmanOfReturns(target=symbols(15622),returns_length=10,correlation_length=30) 
    pipe.set_screen(price_filter_high & price_filter_low & volume_filter & rolling_correlations.notnan())
    pipe.add(rolling_correlations,'Rolling Correlations')
    return pipe
my_pipe = make_pipeline()
start_date = '2017-02-01'
end_date = '2017-03-17'
result = run_pipeline(my_pipe, start_date, end_date)
result2 = result.ix[(result['Rolling Correlations'] != 1.0)]
r3 = result2.sort('Rolling Correlations', ascending = False)
print r3.head(10)
x = r3['Rolling Correlations'].max().get_values()#.get_level_values(1)[0] 

print x.head(5)
print r3[0,0].symbol

s_d = '2016-06-15'

hedge_hist = get_pricing('XRT', fields='price', start_date=s_d, end_date=end_date)
benchmark = get_pricing('ANF', fields='price', start_date=s_d, end_date=end_date)

# We have to take the percent changes to get to returns
# Get rid of the first (0th) element because it is NAN
r_a = hedge_hist.pct_change()[1:]
r_b = benchmark.pct_change()[1:]

# Let's plot them just for fun
plt.ylabel("Daily Return")
# Let's define everything in familiar regression terms
X = r_b.values # Get just the values, ignore the timestamps
Y = r_a.values

def linreg(x,y):
    # We add a constant so that we can also fit an intercept (alpha) to the model
    # This just adds a column of 1s to our data
    x = sm.add_constant(x)
    model = regression.linear_model.OLS(y,x).fit()
    # Remove the constant now that we're done
    x = x[:, 1]
    return model.params[0], model.params[1]

alpha, beta = linreg(X,Y)
print 'alpha: ' + str(alpha)
print 'beta: ' + str(beta)

print stats.spearmanr(X, Y)
alpha: 0.000673326962457
beta: 0.24580515016
SpearmanrResult(correlation=0.81334873542884845, pvalue=4.2364145200323183e-46)
