Hey,
I want to backtest some ML strategies and came across quantopian/zipline. I saw Simple Machine Learning Example post in quantopian which I want to port in zipline. I have used sentdex tutorials as a guiding tool for writing this. But its giving this error.
ValueError: Expected 2D array, got 1D array instead:
array=[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
The other question is how can I add my externally trained model to this code?
This is my whole code
import pandas as pd
from collections import OrderedDict
import pytz
from zipline.api import order, record, symbol, set_benchmark, order_target_percent, get_open_orders
import zipline
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from collections import deque
import numpy as np
full_file_path = "AAPL.csv"
data = OrderedDict()
data['AAPL'] = pd.read_csv(full_file_path, index_col=0, parse_dates=['date'])
data['AAPL'] = data['AAPL'][["open","high","low","close","volume"]]
print(data['AAPL'].head())
panel=pd.Panel(data)
panel.minor_axis =["open","high","low","close","volume"]
panel.major_axis = panel.major_axis.tz_localize(pytz.utc)
def initialize(context):
context.window_length=10
context.classifier=RandomForestClassifier()
context.recent_prices=deque(maxlen=context.window_length+2)
context.X=deque(maxlen=500)
context.Y=deque(maxlen=500)
context.predi=0
set_benchmark(symbol("AAPL"))
def handle_data(context, data):
context.recent_prices.append(data['AAPL'].price)
if len(context.recent_prices)==context.window_length+2:
changes=np.diff(context.recent_prices)>0
context.X.append(changes[:-1])
context.Y.append(changes[-1])
if len(context.Y)>=100:
context.classifier.fit(context.X,context.Y)
context.predi=context.classifier.predict(changes[1:])
order_target_percent('AAPL',context.predi)
record(prediction=int(context.predi))
perf = zipline.run_algorithm(start=datetime(2011, 1, 5, 0, 0, 0, 0, pytz.utc),
end=datetime(2012, 3, 1, 0, 0, 0, 0, pytz.utc),
initialize=initialize,
capital_base=100000,
handle_data=handle_data,
data=panel)
import matplotlib.pyplot as plt
from matplotlib import style
style.use("ggplot")
perf.portfolio_value.pct_change().fillna(0).add(1).cumprod().sub(1).plot(label='portfolio')
perf.AAPL.pct_change().fillna(0).add(1).cumprod().sub(1).plot(label='benchmark')
plt.legend(loc=2)
plt.show()