from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data import morningstar
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import Returns
from quantopian.pipeline.factors import SimpleMovingAverage
from quantopian.pipeline.factors import Latest
from quantopian.pipeline.factors import CustomFactor, AverageDollarVolume, RSI
from quantopian.pipeline.data.psychsignal import stocktwits
from quantopian.pipeline.data import USEquityPricing
from quantopian.pipeline.experimental import QTradableStocksUS
from quantopian.pipeline import CustomFactor
from quantopian.pipeline.factors.morningstar import MarketCap
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.data.psychsignal import aggregated_twitter_withretweets_stocktwits
from quantopian.research import symbols
from quantopian.pipeline.data.zacks import EarningsSurprises
from collections import OrderedDict
import alphalens as al
from quantopian.research import prices
import pandas as pd
import numpy as np
import talib
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy import stats
from statsmodels.tsa.stattools import adfuller
from time import time
from sklearn import linear_model, decomposition, ensemble, preprocessing, isotonic, metrics
def nanfill(arr):
mask = np.isnan(arr)
idx = np.where(~mask,np.arange(mask.shape[1]),0)
np.maximum.accumulate(idx,axis=1, out=idx)
arr[mask] = arr[np.nonzero(mask)[0], idx[mask]]
return arr
cfs = morningstar.cash_flow_statement
def make_factors():
class Capex_To_Cashflows(CustomFactor):
inputs=[cfs.capital_expenditure, cfs.free_cash_flow]
window_length=10
def compute(self, today, assets, out, capital_expenditure, free_cash_flow):
out[:] = (capital_expenditure[-1] * 4.) / (free_cash_flow[-1] * 4.)
class Capex_To_Cashflows_forwardfill(CustomFactor):
inputs=[cfs.capital_expenditure, cfs.free_cash_flow]
window_length=10
def compute(self, today, assets, out, capital_expenditure, free_cash_flow):
capital_expenditure = nanfill(capital_expenditure)
free_cash_flow = nanfill(free_cash_flow)
out[:] = (capital_expenditure[-1] * 4.) / (free_cash_flow[-1] * 4.)
#out[:] = nanfill((capital_expenditure * 4.) / (free_cash_flow * 4.))
all_factors = {
'Capex to Cashflows': Capex_To_Cashflows,
'Capex_To_Cashflows_forwardfill': Capex_To_Cashflows_forwardfill
}
return all_factors
def make_ml_pipeline(factors):
factors_pipe = OrderedDict()
for name, f in factors.iteritems():
factors_pipe[name] = f(window_length=240)
pipe = Pipeline(screen=QTradableStocksUS(), columns=factors_pipe)
return pipe
ml_pipe=make_ml_pipeline(make_factors())
start_timer = time()
start = pd.Timestamp("2016-02-26") # Can't choose a much longer time-period or we run out of RAM
end = pd.Timestamp("2016-03-01")
results = run_pipeline(ml_pipe, start_date=start, end_date=end)
end_timer = time()
results.iloc[50:100]
results.iloc[100:150]
results.iloc[150:200,:]