Hello there,
I am completely new to Quantopian and I am thinking of doing some machine learning on the dataset.
Below is the custom factor and the pipeline I've created but it seems that whenever I run it it always max out the research memory.
Did I make a mistake somewhere? Or is the period I would like to query simply too long?
Thanks!
from quantopian.pipeline import CustomFactor
import numpy as np
import pandas as pd
import talib
# custom factor for calculating % return
class pct_return(CustomFactor):
# Default inputs
inputs = [USEquityPricing.close]
def compute(self, today, asset_ids, out, close):
# Calculates the column-wise standard deviation, ignoring NaNs
out[:] = (close[-1]-close[0])*1.0/close[0]
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
# Pipeline instantiation & definition
# takes in two parameters pct_return and timeframe that together specifies
# the criteria needed to generate the target label
def basedata_pipeline():
# ---equity id---
symbol = company_reference.primary_symbol.latest
# ---equity pricing factors---
close = USEquityPricing.close.latest
volume = USEquityPricing.volume.latest
# ---percentage return---
pct_rt = pct_return(window_length=1)
return Pipeline(
columns = {
# id
'symbol': symbol,
# equity pricing data fields
'close': close,
'volume': volume,
# percentage return
'pct_return': pct_rt
}
)
result = run_pipeline(basedata_pipeline(), '2010-01-01', '2012-12-31')