Quantopian's community platform is shutting down. Please read this post for more information and download your code.
Back to Community
Future data holes in research

Hi,

I have noticed that there are several futures with missing data in research environment. For example for

cat_list=continuous_future('VX', offset=0, roll='volume', adjustment='mul')
dataE = history(cat_list,
fields='close_price',
frequency='daily',
start=dateI,
end=dateF)
there is no data when requesting it , but in the past it used to work(about one year ago, there were some missing symbols even back then), while for others it works. Now there seem to be many missing futures, for example SP. ER

Did anything change in the data set?

Thanks
J

2 responses

If I run this query in research for the list of 61 continuous contracts which are still traded (72-11 not traded anymore) I can only get data up to '2017-07-01
but according to the graphs it should be possible to go back at least till 2010(according to the graphs here https://www.quantopian.com/posts/continuous-future-data-lifespans). In any case how can the error be avoided? I can use dropna to clean the data after the data is returned but at the moment the error prevents to do anything. Using dateI=2017-06-01 i get the error below:

dateI='2017-07-01'
dateF='2018-03-05'

dataFC= history(
symbol_list[0:61],
fields='close_price',
frequency='daily',
start=dateI,
end=dateF,
handle_missing ='ignore'
)

NoDataForSidTraceback (most recent call last)
in ()
22 frequency='daily',
23 start=dateI,
---> 24 end=dateF
25 )
26

/build/src/qexec_repo/qexec/research/api.py in history(symbols, fields, start, end, frequency, symbol_reference_date, handle_missing, start_offset) 537 findata_dir=findata_dir,
538 user_id=user_id,
--> 539 start_offset=start_offset,
540 )
541

/build/src/qexec_repo/qexec/research/_api.pyc in inner_history(symbols, fields, frequency, start, end, symbol_reference_date, handle_missing, asset_data_dir, findata_dir, user_id, start_offset) 691 findata_dir=findata_dir,
692 user_id=user_id,
--> 693 start_offset=start_offset,
694 )
695

/build/src/qexec_repo/qexec/research/_api.pyc in inner_get_pricing(symbols, start_date, end_date, symbol_reference_date, frequency, fields, handle_missing, asset_data_dir, findata_dir, user_id, start_offset) 555
556 asset_specific_data = _get_pricing_internal(
--> 557 data_portal, assets, pinched_start, pinched_end, frequency, fields,
558 )
559

/build/src/qexec_repo/qexec/research/trades.pyc in _get_pricing_internal(data_portal, assets, start_date, end_date, frequency, fields) 109 start_date=ensure_timestamp(start_date),
110 end_date=ensure_timestamp(end_date),
--> 111 data_frequency=frequency,
112 )
113

/build/src/qexec_repo/qexec/research/trades.pyc in ohlcv_panel_from_source(data_portal, sids, start_date, end_date, data_frequency, fields) 183 freq,
184 _field,
--> 185 data_frequency,
186 )
187 # For date_indexes which extend past the last available dt, reindex

/build/src/qexec_repo/zipline_repo/zipline/data/data_portal.pyc in get_history_window(self, assets, end_dt, bar_count, frequency, field, data_frequency, ffill) 958 else:
959 df = self._get_history_daily_window(assets, end_dt, bar_count,
--> 960 field, data_frequency)
961 elif frequency == "1m":
962 if field == "price":

/build/src/qexec_repo/zipline_repo/zipline/data/data_portal.pyc in _get_history_daily_window(self, assets, end_dt, bar_count, field_to_use, data_frequency) 802
803 data = self._get_history_daily_window_data(
--> 804 assets, days_for_window, end_dt, field_to_use, data_frequency
805 )
806 return pd.DataFrame(

/build/src/qexec_repo/zipline_repo/zipline/data/data_portal.pyc in _get_history_daily_window_data(self, assets, days_for_window, end_dt, field_to_use, data_frequency) 825 field_to_use,
826 days_for_window,
--> 827 extra_slot=False
828 )
829 else:

/build/src/qexec_repo/zipline_repo/zipline/data/data_portal.pyc in _get_daily_window_data(self, assets, field, days_in_window, extra_slot) 1108 days_in_window,
1109 field,
-> 1110 extra_slot)
1111 if extra_slot:
1112 return_array[:len(return_array) - 1, :] = data

/build/src/qexec_repo/zipline_repo/zipline/data/history_loader.pyc in history(self, assets, dts, field, is_perspective_after) 548 dts,
549 field,
--> 550 is_perspective_after)
551 end_ix = self._calendar.searchsorted(dts[-1])
552

/build/src/qexec_repo/zipline_repo/zipline/data/history_loader.pyc in _ensure_sliding_windows(self, assets, dts, field, is_perspective_after) 430 adj_dts = prefetch_dts
431 prefetch_len = len(prefetch_dts)
--> 432 array = self._array(prefetch_dts, needed_assets, field)
433
434 if field == 'sid':

/build/src/qexec_repo/zipline_repo/zipline/data/history_loader.pyc in _array(self, dts, assets, field) 572 dts[0],
573 dts[-1],
--> 574 assets,
575 )[0]
576

/build/src/qexec_repo/zipline_repo/zipline/data/dispatch_bar_reader.pyc in load_raw_arrays(self, fields, start_dt, end_dt, sids) 118 end_dt,
119 sid_groups[t])
--> 120 for t in asset_types if sid_groups[t]}
121
122 results = []

/build/src/qexec_repo/zipline_repo/zipline/data/dispatch_bar_reader.pyc in ((t,)) 118 end_dt,
119 sid_groups[t])
--> 120 for t in asset_types if sid_groups[t]}
121
122 results = []

/build/src/qexec_repo/zipline_repo/zipline/data/continuous_future_reader.pyc in load_raw_arrays(self, columns, start_date, end_date, assets) 37 start_date,
38 end_date,
---> 39 asset.offset
40 )
41

/build/src/qexec_repo/zipline_repo/zipline/assets/roll_finder.pyc in get_rolls(self, root_symbol, start, end, offset) 124 if prev < prev_c.contract.auto_close_date:
125 break
--> 126 if back != self._active_contract(oc, front, back, prev):
127 # TODO: Instead of listing each contract with its roll date
128 # as tuples, create a series which maps every day to the

/build/src/qexec_repo/zipline_repo/zipline/assets/roll_finder.pyc in _active_contract(self, oc, front, back, dt) 214 return front
215
--> 216 front_vol = get_value(front, prev, 'volume')
217 back_vol = get_value(back, prev, 'volume')
218 if back_vol > front_vol:

/build/src/qexec_repo/zipline_repo/zipline/data/resample.pyc in get_value(self, sid, session, colname) 626 # This was developed to complete interface, but has not been tuned
627 # for real world use.
--> 628 return self._get_resampled([colname], session, session, [sid])[0][0][0]
629
630 @lazyval

/build/src/qexec_repo/zipline_repo/zipline/data/resample.pyc in _get_resampled(self, columns, start_session, end_session, assets) 577 range_open,
578 range_close,
--> 579 assets,
580 )
581

/build/src/qexec_repo/zipline_repo/zipline/data/minute_bars.pyc in load_raw_arrays(self, fields, start_dt, end_dt, sids) 1272
1273 for i, sid in enumerate(sids):
-> 1274 carray = self._open_minute_file(field, sid)
1275 values = carray[start_idx:end_idx + 1]
1276 if indices_to_exclude is not None:

/build/src/qexec_repo/zipline_repo/zipline/data/minute_bars.pyc in _open_minute_file(self, field, sid) 1080 )
1081 except IOError:
-> 1082 raise NoDataForSid('No minute data for sid {}.'.format(sid))
1083
1084 return carray

NoDataForSid: No minute data for sid 1072201706.

for dateI='2017-07-01' the query works, just to confirm the list of continuous futures is correct