from quantopian.pipeline.data import Fundamentals
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import Latest
from quantopian.pipeline.factors import CustomFactor
from quantopian.pipeline.experimental import QTradableStocksUS
from collections import OrderedDict
import pandas as pd
import numpy as np
from sklearn import preprocessing
class Nullfilling_minmaxscaled(CustomFactor):
window_length=1
def compute(self, today, assets, out, values):
values=np.nan_to_num(values)
scaling=preprocessing.MinMaxScaler()
values[-1]=scaling.fit_transform(values[-1])
out[:] = values[-1]
class Nullindicator_minmaxscaled(CustomFactor):
window_length=1
def compute(self, today, assets, out, values):
values=np.where(np.isnan(values),1.0,0.0)
scaling=preprocessing.MinMaxScaler()
values[-1]=scaling.fit_transform(values[-1])
out[:] = values[-1]
class Nullfilling_standardscaled(CustomFactor):
window_length=1
def compute(self, today, assets, out, values):
values=np.nan_to_num(values)
scaling=preprocessing.StandardScaler()
values[-1]=scaling.fit_transform(values[-1])
out[:] = values[-1]
class Nullindicator_standardscaled(CustomFactor):
window_length=1
def compute(self, today, assets, out, values):
values=np.where(np.isnan(values),1.0,0.0)
scaling=preprocessing.StandardScaler()
values[-1]=scaling.fit_transform(values[-1])
out[:] = values[-1]
class Nullindicator(CustomFactor):
window_length=1
def compute(self, today, assets, out, values):
values=np.where(np.isnan(values),1.0,0.0)
out[:] = values[-1]
def make_factors(mask):
def enterprise_value_original():
return Fundamentals.enterprise_value.latest
def enterprise_value_minmaxscaled():
return Nullfilling_minmaxscaled([Fundamentals.enterprise_value], mask=mask)
def enterprise_value_standardscaled():
return Nullfilling_standardscaled([Fundamentals.enterprise_value], mask=mask)
def enterprise_value_nulls():
return Nullindicator([Fundamentals.enterprise_value], mask=mask)
def enterprise_value_nulls_minmaxscaled():
return Nullindicator_minmaxscaled([Fundamentals.enterprise_value], mask=mask)
def enterprise_value_nulls_standardscaled():
return Nullindicator_standardscaled([Fundamentals.enterprise_value], mask=mask)
all_factors={
'enterprise_value_original':enterprise_value_original,
'enterprise_value_minmaxscaled':enterprise_value_minmaxscaled,
'enterprise_value_standardscaled':enterprise_value_standardscaled,
'enterprise_value_nulls':enterprise_value_nulls,
'enterprise_value_nulls_minmaxscaled':enterprise_value_nulls_minmaxscaled,
'enterprise_value_nulls_standardscaled':enterprise_value_nulls_standardscaled
}
return all_factors
def make_ml_pipeline(factors, universe):
factors_pipe = OrderedDict()
for name, f in factors.iteritems():
factors_pipe[name] = f()
pipe = Pipeline(screen=universe, columns=factors_pipe)
return pipe
# Add the same mask as the screen to ensure the same 'universe' for each
ml_pipe=make_ml_pipeline(make_factors(QTradableStocksUS()), QTradableStocksUS())
start = pd.Timestamp("2015-01-01") # Can't choose a much longer time-period or we run out of RAM
end = pd.Timestamp("2016-03-01")
results = run_pipeline(ml_pipe, start_date=start, end_date=end)
results[["enterprise_value_minmaxscaled","enterprise_value_nulls_minmaxscaled"]][results.index.get_level_values(0)==results.index.get_level_values(0).unique()[1]].min()
When min max scaling the minimum value should always be 0 - <font color=red> Why is enterprise value minmaxscaled not 0? </font>
results[["enterprise_value_minmaxscaled","enterprise_value_nulls_minmaxscaled"]][results.index.get_level_values(0)==results.index.get_level_values(0).unique()[1]].max()
As expected 1 for enterprise_value_minmaxscaled. 0 for enterprise_value_nulls_minmaxscaled, as no imputation of nulls was performed, therefore all values are 0. fine
results[["enterprise_value_standardscaled","enterprise_value_nulls_standardscaled"]][results.index.get_level_values(0)==results.index.get_level_values(0).unique()[1]].mean()
When standard scaling mean value should be 0 for enterprise_value_standardscaled. <font color=red> Why is enterprise value standardscaled not 0?</font> As for enterprise_value_nulls_standardscaled, the calculation should not be possible due to the fact that no value was imputed and therefore the column should only have 0 values inside. Therefore, standard deviation should be 0. Division by 0 is impossible. <font color=red> Why is the mean -0.757141?</font>
results[["enterprise_value_standardscaled","enterprise_value_nulls_standardscaled"]][results.index.get_level_values(0)==results.index.get_level_values(0).unique()[1]].std()
When standard scaling standard deviation value should be 1 for enterprise_value_standardscaled. <font color=red> Why is enterprise value standardscaled not 1?</font> As for enterprise_value_nulls_standardscaled, the calculation should not be possible due to the fact that no value was imputed and therefore the column should only have 0 values inside. Therefore, standard deviation should be 0. Division by 0 is impossible. <font color=red> Why is the standard deviation 2.665169e-15?</font>