Notebook
In [20]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.pipeline.data import Fundamentals as msf
from quantopian.research import run_pipeline
from quantopian.pipeline.filters import QTradableStocksUS
In [35]:
class FactorScaler(CustomFactor):
    window_length = 1
    window_safe = True
    scaler = MinMaxScaler(feature_range=(0, 1), copy=True)
    def compute(self, today, assets, out, factor):
        out[:] = scaler.fit_transform(factor[-1,:])
In [31]:
universe = QTradableStocksUS()

factor1 = msf.book_value_yield.latest
In [28]:
factor1_scaled = FactorScaler(inputs=[factor1.notnull()], mask=universe )
In [29]:
pipe = Pipeline(screen=universe, columns={'factor1_scaled': factor1_scaled})
In [30]:
start = pd.Timestamp("2017-01-05")
end = pd.Timestamp("2017-02-01")
results = run_pipeline(pipe, start_date=start, end_date=end)

/venvs/py35/lib/python3.5/site-packages/sklearn/utils/validation.py:498: UserWarning: MinMaxScaler assumes floating point values as input, got bool
  "got %s" % (estimator, X.dtype))
/venvs/py35/lib/python3.5/site-packages/sklearn/preprocessing/data.py:233: DeprecationWarning: numpy boolean subtract, the `-` operator, is deprecated, use the bitwise_xor, the `^` operator, or the logical_xor function instead.
  data_range = np.max(X, axis=0) - data_min
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-30-52ac7451e8b7> in <module>()
      1 start = pd.Timestamp("2017-01-05")
      2 end = pd.Timestamp("2017-02-01")
----> 3 results = run_pipeline(pipe, start_date=start, end_date=end)

/build/src/qexec_repo/qexec/research/api.py in run_pipeline(pipeline, start_date, end_date, show_progress, chunksize)
    486             show_progress=show_progress,
    487             pipeline_engine=pipeline_engine,
--> 488             holdout_manager=holdout_manager,
    489         )
    490     # The docstring is defined at module scope to get correct indentation.

/build/src/qexec_repo/qexec/research/_api.py in inner_run_pipeline(pipeline, start_date, end_date, show_progress, chunksize, pipeline_engine, holdout_manager)
    740         adjusted_end_date,
    741         chunksize=chunksize,
--> 742         hooks=hooks,
    743     )
    744 

/build/src/qexec_repo/zipline_repo/zipline/pipeline/engine.py in run_chunked_pipeline(self, pipeline, start_date, end_date, chunksize, hooks)
    343         run_pipeline = partial(self._run_pipeline_impl, pipeline, hooks=hooks)
    344         with hooks.running_pipeline(pipeline, start_date, end_date):
--> 345             chunks = [run_pipeline(s, e) for s, e in ranges]
    346 
    347         if len(chunks) == 1:

/build/src/qexec_repo/zipline_repo/zipline/pipeline/engine.py in <listcomp>(.0)
    343         run_pipeline = partial(self._run_pipeline_impl, pipeline, hooks=hooks)
    344         with hooks.running_pipeline(pipeline, start_date, end_date):
--> 345             chunks = [run_pipeline(s, e) for s, e in ranges]
    346 
    347         if len(chunks) == 1:

/build/src/qexec_repo/zipline_repo/zipline/pipeline/engine.py in _run_pipeline_impl(self, pipeline, start_date, end_date, hooks)
    440                 refcounts=refcounts,
    441                 execution_order=execution_order,
--> 442                 hooks=hooks,
    443             )
    444 

/build/src/qexec_repo/zipline_repo/zipline/pipeline/engine.py in compute_chunk(self, graph, dates, sids, workspace, refcounts, execution_order, hooks)
    711                         mask_dates,
    712                         sids,
--> 713                         mask,
    714                     )
    715                 if term.ndim == 2:

/build/src/qexec_repo/zipline_repo/zipline/pipeline/mixins.py in _compute(self, windows, dates, assets, mask)
    219                 inputs = format_inputs(windows, inputs_mask)
    220 
--> 221                 compute(date, masked_assets, out_row, *inputs, **params)
    222                 out[idx][out_mask] = out_row
    223         return out

<ipython-input-21-bc44f88d7310> in compute(self, today, assets, out, factor)
      4     scaler = MinMaxScaler()
      5     def compute(self, today, assets, out, factor):
----> 6         out[:] = scaler.fit_transform(factor[-1,:])

/venvs/py35/lib/python3.5/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
    431         if y is None:
    432             # fit method of arity 1 (unsupervised transformation)
--> 433             return self.fit(X, **fit_params).transform(X)
    434         else:
    435             # fit method of arity 2 (supervised transformation)

/venvs/py35/lib/python3.5/site-packages/sklearn/preprocessing/data.py in transform(self, X)
    254 
    255         X = check_array(X, copy=self.copy, ensure_2d=False)
--> 256         X *= self.scale_
    257         X += self.min_
    258         return X

TypeError: Cannot cast ufunc multiply output from dtype('float64') to dtype('bool') with casting rule 'same_kind'
In [36]:
scaler = MinMaxScaler(feature_range=(0, 1), copy=True)
factor1 = msf.book_value_yield.latest
In [37]:
factor1_scaled = scaler.fit_transform(factor1)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-37-3da59c8d4d1d> in <module>()
----> 1 factor1_scaled = scaler.fit_transform(factor1)

/venvs/py35/lib/python3.5/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
    431         if y is None:
    432             # fit method of arity 1 (unsupervised transformation)
--> 433             return self.fit(X, **fit_params).transform(X)
    434         else:
    435             # fit method of arity 2 (supervised transformation)

/venvs/py35/lib/python3.5/site-packages/sklearn/preprocessing/data.py in fit(self, X, y)
    224             used for later scaling along the features axis.
    225         """
--> 226         X = check_array(X, copy=self.copy, ensure_2d=False)
    227         warn_if_not_float(X, estimator=self)
    228         feature_range = self.feature_range

/venvs/py35/lib/python3.5/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features)
    345         # make sure we actually converted to numeric:
    346         if dtype_numeric and array.dtype.kind == "O":
--> 347             array = array.astype(np.float64)
    348         if not allow_nd and array.ndim >= 3:
    349             raise ValueError("Found array with dim %d. Expected <= 2" %

ValueError: setting an array element with a sequence.
In [ ]:
 
In [ ]: