content_image
content_image
#### Using Quantopian's 'get_pricing()', we create a pandas data frame below
### The 'spot_prices' variable will be the variable storing the data frame.
## The parameters neccesary to return the frame are described below
# After creating the variable, check to ensure you received the data you requested.
spot_prices = get_pricing(
['SOYB','USO','TLT','BSV','PSA',
'SPY','XIV','IBM','MCD','JNJ','XOM'], # A list of equity symbols to include
fields='close_price', # the 'field' we want to utilize
start_date='2017-01-01', # Starting date of the data
end_date = '2017-03-31', # Ending date of the data
frequency='daily', # Frequency of the data
)
spot_prices.head(3) # use the '.head()' method of pandas to view the first 3 rows of the data frame
content_image
print "Number of Rows:",len(spot_prices)
#Tom is my favorite Panda. He gets it...
spot_prices.tail(3) # Check to insure the last row matches with the end_date requested
spot_delta = spot_prices.pct_change(periods=1, axis=0) # Get the percent change between closings
spot_delta = spot_delta.dropna() # drop the nan (there will be one at the end)
spot_delta.plot() # quick code for plotting a data frame
References To Determine Futures Data To Utilize:
Lifespan of Data Available - https://www.quantopian.com/posts/continuous-future-data-lifespans#590a60804de40b0a781ddbc0
Jamie's Notebook (w/ contract symbols and month symbols) - https://www.quantopian.com/posts/futures-data-now-available-in-research
CME's expiration calendar - http://www.cmegroup.com/tools-information/calendars/expiration-calendar/
from quantopian.research.experimental import history # Dependency needed to pull in futures data
# See References above to determine which futures you want to use
sy_contracts = symbols(['SYK17', 'SYN17'])
# Pandas frame for futures is below, same work as we did with equities.
# (Excuse the lack of logic of the variable name, I was copying Jamie's code)
sy_consecutive_contract_price = history(
sy_contracts, # Instead of defining the symbols within this argument, I just called the list dd
fields='price',
frequency='daily',
start_date='2017-01-01',
end_date='2017-03-31'
)
sy_consecutive_contract_price.head()
bean_delta = sy_consecutive_contract_price.pct_change(periods=1, axis=0)
bean_delta = bean_delta.dropna()
bean_delta.plot(title='Soy Futures')
# Merging pandas can be a challenge, key with this method is to match up starting and ending dates of both frames
data = spot_prices.join(sy_consecutive_contract_price)
data.head(3)
print data.columns
data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot',
'SPY','XIV','IBM','MCD','JNJ','XOM',
'beans_may','beans_july']
print data.columns
import datetime
data['May_Delivery'] = datetime.datetime(2017,5,12) # May expiration date per CME
data['July_Delivery'] = datetime.datetime(2017,7,14) # July expiration date per CME
data['may_dtd'] = data.May_Delivery - data.index # Subtract to get days to delivery
data['july_dtd'] = data.July_Delivery - data.index # Subtract to get days to delivery
data['may_dtd'] = data['may_dtd'].astype(int) / 86400000000000 # Convert to days as an integer
data['july_dtd'] = data['july_dtd'].astype(int) / 86400000000000 # Convert to days as an integer
del data['May_Delivery'] # delete
del data['July_Delivery'] # delete
data.head() # know your pandas
data['dCoC_may'] = (data['beans_spot'] - data['beans_may']) / data['may_dtd']
data['dCoC_july'] = (data['beans_spot'] - data['beans_july']) / data['july_dtd']
Earlier plots used the '.plot()' method on a a pandas frame. Below is a five step method for the most important aspects of building a chart with matplotlib
# Step 1 - import the library (always use plt as the variable because it is easiest to find examples on the internet)
import matplotlib.pyplot as plt
# Step #2 - Select your X and Y data
# For the X values, we are using 'data.index' because that is the dates in our pandas frame
# For the Y values, we are using 'data.
# Addintionally, we add a label within each plot we want to see so we can map the color to the data
plt.plot(data.index,data['dCoC_may'],label="Beans For May Delivery")
plt.plot(data.index,data['dCoC_july'],label="Beans For July Delivery")
# Step #3 - add the legend, this will display the labels we added as the third argument above
plt.legend(loc='upper left')
# Step #4 - add a title to the chart, an X label, and a Y label
plt.title("Comparing Implied Cost of Carry Between May and July Soybeans")
plt.xlabel("Date")
plt.ylabel("Average Daily Implied Cost of Carry")
# Step #5 - Tell the python gods to show the chart on the screen
plt.show()
### Delete ME ????????
print ("Price of Spot Beans on Jan 3:",data['beans_spot'].iloc[0])
print ("Price of May Beans on Jan 3:",data['beans_may'].iloc[0])
print ('Beans shares bought on Jan 3rd ($10K value):',int(10000/18.93))
print ('May beans contracts sold on Jan 3rd ($10K value):',int(10000/10.045))
print ("Price of Spot Beans on March 31:",data['beans_spot'].iloc[-1])
print ("Price of May Beans on March 31:",data['beans_may'].iloc[-1])
print ('Spot Bean Trade Profit:',(10000-18.12*528)*-1)
print ('Futures Trade Profit:',10000-(995*9.448))
print ("Trade Profit:",599.23999999-432.6399999)
spot_prices = get_pricing(
['SOYB'],
fields='close_price',
start_date='2014-01-01',
end_date = '2014-03-31',
frequency='daily',
)
#spot_prices.head(3)
sy_contracts = symbols(['SYK14', 'SYN14'])
sy_consecutive_contract_price = history(
sy_contracts,
fields='price',
frequency='daily',
start_date='2014-01-01',
end_date='2014-03-31'
)
#sy_consecutive_contract_price.head()
data = spot_prices.join(sy_consecutive_contract_price)
#print (data.columns)
data.columns = ['beans_spot','beans_may','beans_july']
data['May_Delivery'] = datetime.datetime(2014,5,12) # fix with real date later
data['July_Delivery'] = datetime.datetime(2014,7,14) # fix with real date later
data['may_dtd'] = data.May_Delivery - data.index
data['july_dtd'] = data.July_Delivery - data.index
data['may_dtd'] = data['may_dtd'].astype(int) / 86400000000000
data['july_dtd'] = data['july_dtd'].astype(int) / 86400000000000
del data['May_Delivery']
del data['July_Delivery']
data['dCoC_may'] = (data['beans_spot'] - data['beans_may']) / data['may_dtd']
data['dCoC_july'] = (data['beans_spot'] - data['beans_july']) / data['july_dtd']
plt.plot(data.index,data['dCoC_may'],label="Beans For May Delivery")
plt.plot(data.index,data['dCoC_july'],label="Beans For July Delivery")
plt.legend(loc='upper left')
plt.title("Comparing Implied Cost of Carry Between May and July Soybeans")
plt.xlabel("Date")
plt.ylabel("Average Daily Implied Cost of Carry")
plt.show()
spot_prices = get_pricing(
['SOYB'],
fields='close_price',
start_date='2015-01-01',
end_date = '2015-03-31',
frequency='daily',
)
#spot_prices.head(3)
sy_contracts = symbols(['SYK15', 'SYN15'])
sy_consecutive_contract_price = history(
sy_contracts,
fields='price',
frequency='daily',
start_date='2015-01-01',
end_date='2015-03-31'
)
#sy_consecutive_contract_price.head()
data = spot_prices.join(sy_consecutive_contract_price)
#print (data.columns)
data.columns = ['beans_spot','beans_may','beans_july']
data['May_Delivery'] = datetime.datetime(2015,5,12) #fix later
data['July_Delivery'] = datetime.datetime(2015,7,14) # fix later
data['may_dtd'] = data.May_Delivery - data.index
data['july_dtd'] = data.July_Delivery - data.index
data['may_dtd'] = data['may_dtd'].astype(int) / 86400000000000
data['july_dtd'] = data['july_dtd'].astype(int) / 86400000000000
del data['May_Delivery']
del data['July_Delivery']
data['dCoC_may'] = (data['beans_spot'] - data['beans_may']) / data['may_dtd']
data['dCoC_july'] = (data['beans_spot'] - data['beans_july']) / data['july_dtd']
plt.plot(data.index,data['dCoC_may'],label="Beans For May Delivery")
plt.plot(data.index,data['dCoC_july'],label="Beans For July Delivery")
plt.legend(loc='upper left')
plt.title("Comparing Implied Cost of Carry Between May and July Soybeans")
plt.xlabel("Date")
plt.ylabel("Average Daily Implied Cost of Carry")
plt.show()
spot_prices = get_pricing(
['SOYB'],
fields='close_price',
start_date='2016-01-01',
end_date = '2016-03-31',
frequency='daily',
)
#spot_prices.head(3)
sy_contracts = symbols(['SYK16', 'SYN16'])
sy_consecutive_contract_price = history(
sy_contracts,
fields='price',
frequency='daily',
start_date='2016-01-01',
end_date='2016-03-31'
)
#sy_consecutive_contract_price.head()
data = spot_prices.join(sy_consecutive_contract_price)
#print (data.columns)
data.columns = ['beans_spot','beans_may','beans_july']
data['May_Delivery'] = datetime.datetime(2016,5,12) #fix later
data['July_Delivery'] = datetime.datetime(2016,7,14) # fix later
data['may_dtd'] = data.May_Delivery - data.index
data['july_dtd'] = data.July_Delivery - data.index
data['may_dtd'] = data['may_dtd'].astype(int) / 86400000000000
data['july_dtd'] = data['july_dtd'].astype(int) / 86400000000000
del data['May_Delivery']
del data['July_Delivery']
data['dCoC_may'] = (data['beans_spot'] - data['beans_may']) / data['may_dtd']
data['dCoC_july'] = (data['beans_spot'] - data['beans_july']) / data['july_dtd']
plt.plot(data.index,data['dCoC_may'],label="Beans For May Delivery")
plt.plot(data.index,data['dCoC_july'],label="Beans For July Delivery")
plt.legend(loc='upper left')
plt.title("Comparing Implied Cost of Carry Between May and July Soybeans")
plt.xlabel("Date")
plt.ylabel("Average Daily Implied Cost of Carry")
plt.show()
spot_prices = get_pricing(
['SOYB'],
fields='close_price',
start_date='2014-01-01',
end_date = '2017-03-31',
frequency='daily',
)
#spot_prices.head(3)
sy_contracts = symbols(['SYK17', 'SYN17','SYK16','SYN16',
'SYK15', 'SYN15','SYK14','SYN14'])
sy_consecutive_contract_price = history(
sy_contracts,
fields='price',
frequency='daily',
start_date='2014-01-01',
end_date='2017-03-31'
)
#sy_consecutive_contract_price.head()
data = spot_prices.join(sy_consecutive_contract_price)
data.columns = ['beans_spot','beans_may_17','beans_july_17','beans_may_16','beans_july_16',
'beans_may_15','beans_july_15','beans_may_14','beans_july_14']
data['May_Delivery_17'] = datetime.datetime(2017,5,12)
data['July_Delivery_17'] = datetime.datetime(2017,7,14)
data['may_dtd_17'] = data.May_Delivery_17 - data.index
data['july_dtd_17'] = data.July_Delivery_17 - data.index
data['may_dtd_17'] = data['may_dtd_17'].astype(int) / 86400000000000
data['july_dtd_17'] = data['july_dtd_17'].astype(int) / 86400000000000
del data['May_Delivery_17']
del data['July_Delivery_17']
data['dCoC_may_17'] = (data['beans_spot'] - data['beans_may_17']) / data['may_dtd_17']
data['dCoC_july_17'] = (data['beans_spot'] - data['beans_july_17']) / data['july_dtd_17']
data['May_Delivery_16'] = datetime.datetime(2016,5,12)
data['July_Delivery_16'] = datetime.datetime(2016,7,14)
data['may_dtd_16'] = data.May_Delivery_16 - data.index
data['july_dtd_16'] = data.July_Delivery_16 - data.index
data['may_dtd_16'] = data['may_dtd_16'].astype(int) / 86400000000000
data['july_dtd_16'] = data['july_dtd_16'].astype(int) / 86400000000000
del data['May_Delivery_16']
del data['July_Delivery_16']
data['dCoC_may_16'] = (data['beans_spot'] - data['beans_may_16']) / data['may_dtd_16']
data['dCoC_july_16'] = (data['beans_spot'] - data['beans_july_16']) / data['july_dtd_16']
data['May_Delivery_15'] = datetime.datetime(2015,5,12)
data['July_Delivery_15'] = datetime.datetime(2015,7,14)
data['may_dtd_15'] = data.May_Delivery_15 - data.index
data['july_dtd_15'] = data.July_Delivery_15 - data.index
data['may_dtd_15'] = data['may_dtd_15'].astype(int) / 86400000000000
data['july_dtd_15'] = data['july_dtd_15'].astype(int) / 86400000000000
del data['May_Delivery_15']
del data['July_Delivery_15']
data['dCoC_may_15'] = (data['beans_spot'] - data['beans_may_15']) / data['may_dtd_15']
data['dCoC_july_15'] = (data['beans_spot'] - data['beans_july_15']) / data['july_dtd_15']
data['May_Delivery_14'] = datetime.datetime(2014,5,12)
data['July_Delivery_14'] = datetime.datetime(2014,7,14)
data['may_dtd_14'] = data.May_Delivery_14 - data.index
data['july_dtd_14'] = data.July_Delivery_14 - data.index
data['may_dtd_14'] = data['may_dtd_14'].astype(int) / 86400000000000
data['july_dtd_14'] = data['july_dtd_14'].astype(int) / 86400000000000
del data['May_Delivery_14']
del data['July_Delivery_14']
data['dCoC_may_14'] = (data['beans_spot'] - data['beans_may_14']) / data['may_dtd_14']
data['dCoC_july_14'] = (data['beans_spot'] - data['beans_july_14']) / data['july_dtd_14']
##### Charting Again.....
# Step 2
plt.plot(data.index,data['dCoC_may_17'],label="Beans For May 17 Delivery")
plt.plot(data.index,data['dCoC_july_17'],label="Beans For July 17 Delivery")
plt.plot(data.index,data['dCoC_may_16'],label="Beans For May 16 Delivery")
plt.plot(data.index,data['dCoC_july_16'],label="Beans For July 16 Delivery")
plt.plot(data.index,data['dCoC_may_15'],label="Beans For May 15 Delivery")
plt.plot(data.index,data['dCoC_july_15'],label="Beans For July 15 Delivery")
plt.plot(data.index,data['dCoC_may_14'],label="Beans For May 14 Delivery")
plt.plot(data.index,data['dCoC_july_14'],label="Beans For July 14 Delivery")
# Step 3
plt.legend(loc='upper left')
# Step 4
plt.title("Comparing Implied Cost of Carry Between May and July Soybeans")
plt.xlabel("Date")
plt.ylabel("Average Daily Implied Cost of Carry")
# Something new
plt.ylim(0,1) # Adjust y limits if the chart if not looking good on that scale
# Step 5
plt.show()
content_image
content_image
# Build a frame with in sample data
########## 2011 ###########################################################################
spot_prices = get_pricing(
['SOYB','USO','TLT','BSV','PSA',
'SPY','XIV','IBM','MCD','JNJ','XOM'],
fields='close_price',
start_date='2011-01-01',
end_date = '2011-03-31',
frequency='daily',
)
#spot_prices.head(3)
sy_contracts = symbols(['SYK11', 'SYN11'])
sy_consecutive_contract_price = history(
sy_contracts,
fields='price',
frequency='daily',
start_date='2011-01-01',
end_date='2011-03-31'
)
#sy_consecutive_contract_price.head()
data = spot_prices.join(sy_consecutive_contract_price)
#print (data.columns)
#data.columns = ['beans_spot','beans_may','beans_july']
#data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot','beans_may','beans_july']
data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot',
'SPY','XIV','IBM','MCD','JNJ','XOM',
'beans_may','beans_july']
data['May_Delivery'] = datetime.datetime(2011,5,12) # fix with real date later
data['July_Delivery'] = datetime.datetime(2011,7,14) # fix with real date later
data['may_dtd'] = data.May_Delivery - data.index
data['july_dtd'] = data.July_Delivery - data.index
data['may_dtd'] = data['may_dtd'].astype(int) / 86400000000000
data['july_dtd'] = data['july_dtd'].astype(int) / 86400000000000
del data['May_Delivery']
del data['July_Delivery']
data['dCoC_may'] = (data['beans_spot'] - data['beans_may']) / data['may_dtd']
data['dCoC_july'] = (data['beans_spot'] - data['beans_july']) / data['july_dtd']
#####################################################################################################
all_data = data # Make a different frame
########## 2012 ###########################################################################
spot_prices = get_pricing(
['SOYB','USO','TLT','BSV','PSA',
'SPY','XIV','IBM','MCD','JNJ','XOM'],
fields='close_price',
start_date='2012-01-01',
end_date = '2012-03-31',
frequency='daily',
)
#spot_prices.head(3)
sy_contracts = symbols(['SYK12', 'SYN12'])
sy_consecutive_contract_price = history(
sy_contracts,
fields='price',
frequency='daily',
start_date='2012-01-01',
end_date='2012-03-31'
)
#sy_consecutive_contract_price.head()
data = spot_prices.join(sy_consecutive_contract_price)
#print (data.columns)
#data.columns = ['beans_spot','beans_may','beans_july']
#data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot','beans_may','beans_july']
data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot',
'SPY','XIV','IBM','MCD','JNJ','XOM',
'beans_may','beans_july']
data['May_Delivery'] = datetime.datetime(2012,5,12) # fix with real date later
data['July_Delivery'] = datetime.datetime(2012,7,14) # fix with real date later
data['may_dtd'] = data.May_Delivery - data.index
data['july_dtd'] = data.July_Delivery - data.index
data['may_dtd'] = data['may_dtd'].astype(int) / 86400000000000
data['july_dtd'] = data['july_dtd'].astype(int) / 86400000000000
del data['May_Delivery']
del data['July_Delivery']
data['dCoC_may'] = (data['beans_spot'] - data['beans_may']) / data['may_dtd']
data['dCoC_july'] = (data['beans_spot'] - data['beans_july']) / data['july_dtd']
#####################################################################################################
#all_data = data # Make a different frame
all_data = all_data.append(data)
########## 2013 ###########################################################################
spot_prices = get_pricing(
['SOYB','USO','TLT','BSV','PSA',
'SPY','XIV','IBM','MCD','JNJ','XOM'],
fields='close_price',
start_date='2013-01-01',
end_date = '2013-03-31',
frequency='daily',
)
#spot_prices.head(3)
sy_contracts = symbols(['SYK13', 'SYN13'])
sy_consecutive_contract_price = history(
sy_contracts,
fields='price',
frequency='daily',
start_date='2013-01-01',
end_date='2013-03-31'
)
#sy_consecutive_contract_price.head()
data = spot_prices.join(sy_consecutive_contract_price)
#print (data.columns)
#data.columns = ['beans_spot','beans_may','beans_july']
#data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot','beans_may','beans_july']
data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot',
'SPY','XIV','IBM','MCD','JNJ','XOM',
'beans_may','beans_july']
data['May_Delivery'] = datetime.datetime(2013,5,12) # fix with real date later
data['July_Delivery'] = datetime.datetime(2013,7,14) # fix with real date later
data['may_dtd'] = data.May_Delivery - data.index
data['july_dtd'] = data.July_Delivery - data.index
data['may_dtd'] = data['may_dtd'].astype(int) / 86400000000000
data['july_dtd'] = data['july_dtd'].astype(int) / 86400000000000
del data['May_Delivery']
del data['July_Delivery']
data['dCoC_may'] = (data['beans_spot'] - data['beans_may']) / data['may_dtd']
data['dCoC_july'] = (data['beans_spot'] - data['beans_july']) / data['july_dtd']
#####################################################################################################
#all_data = data # Make a different frame
all_data = all_data.append(data)
########## 2014 ###########################################################################
spot_prices = get_pricing(
['SOYB','USO','TLT','BSV','PSA',
'SPY','XIV','IBM','MCD','JNJ','XOM'],
fields='close_price',
start_date='2014-01-01',
end_date = '2014-03-31',
frequency='daily',
)
#spot_prices.head(3)
sy_contracts = symbols(['SYK14', 'SYN14'])
sy_consecutive_contract_price = history(
sy_contracts,
fields='price',
frequency='daily',
start_date='2014-01-01',
end_date='2014-03-31'
)
#sy_consecutive_contract_price.head()
data = spot_prices.join(sy_consecutive_contract_price)
#print (data.columns)
#data.columns = ['beans_spot','beans_may','beans_july']
#data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot','beans_may','beans_july']
data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot',
'SPY','XIV','IBM','MCD','JNJ','XOM',
'beans_may','beans_july']
data['May_Delivery'] = datetime.datetime(2014,5,12) # fix with real date later
data['July_Delivery'] = datetime.datetime(2014,7,14) # fix with real date later
data['may_dtd'] = data.May_Delivery - data.index
data['july_dtd'] = data.July_Delivery - data.index
data['may_dtd'] = data['may_dtd'].astype(int) / 86400000000000
data['july_dtd'] = data['july_dtd'].astype(int) / 86400000000000
del data['May_Delivery']
del data['July_Delivery']
data['dCoC_may'] = (data['beans_spot'] - data['beans_may']) / data['may_dtd']
data['dCoC_july'] = (data['beans_spot'] - data['beans_july']) / data['july_dtd']
#####################################################################################################
#all_data = data # Make a different frame
all_data = all_data.append(data)
########## 2015 ###########################################################################
spot_prices = get_pricing(
['SOYB','USO','TLT','BSV','PSA',
'SPY','XIV','IBM','MCD','JNJ','XOM'],
fields='close_price',
start_date='2015-01-01',
end_date = '2015-03-31',
frequency='daily',
)
#spot_prices.head(3)
sy_contracts = symbols(['SYK15', 'SYN15'])
sy_consecutive_contract_price = history(
sy_contracts,
fields='price',
frequency='daily',
start_date='2015-01-01',
end_date='2015-03-31'
)
#sy_consecutive_contract_price.head()
data = spot_prices.join(sy_consecutive_contract_price)
#print (data.columns)
#data.columns = ['beans_spot','beans_may','beans_july']
#data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot','beans_may','beans_july']
data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot',
'SPY','XIV','IBM','MCD','JNJ','XOM',
'beans_may','beans_july']
data['May_Delivery'] = datetime.datetime(2015,5,12) # fix with real date later
data['July_Delivery'] = datetime.datetime(2015,7,14) # fix with real date later
data['may_dtd'] = data.May_Delivery - data.index
data['july_dtd'] = data.July_Delivery - data.index
data['may_dtd'] = data['may_dtd'].astype(int) / 86400000000000
data['july_dtd'] = data['july_dtd'].astype(int) / 86400000000000
del data['May_Delivery']
del data['July_Delivery']
data['dCoC_may'] = (data['beans_spot'] - data['beans_may']) / data['may_dtd']
data['dCoC_july'] = (data['beans_spot'] - data['beans_july']) / data['july_dtd']
#####################################################################################################
####### Append 2015 to 2014 ########################
all_data = all_data.append(data)
#####################################################
########## 2016 ###########################################################################
spot_prices = get_pricing(
['SOYB','USO','TLT','BSV','PSA',
'SPY','XIV','IBM','MCD','JNJ','XOM'],
fields='close_price',
start_date='2016-01-01',
end_date = '2016-03-31',
frequency='daily',
)
#spot_prices.head(3)
sy_contracts = symbols(['SYK16', 'SYN16'])
sy_consecutive_contract_price = history(
sy_contracts,
fields='price',
frequency='daily',
start_date='2016-01-01',
end_date='2016-03-31'
)
#sy_consecutive_contract_price.head()
data = spot_prices.join(sy_consecutive_contract_price)
#print (data.columns)
#data.columns = ['beans_spot','beans_may','beans_july']
#data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot','beans_may','beans_july']
data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot',
'SPY','XIV','IBM','MCD','JNJ','XOM',
'beans_may','beans_july']
data['May_Delivery'] = datetime.datetime(2016,5,12) # fix with real date later
data['July_Delivery'] = datetime.datetime(2016,7,14) # fix with real date later
data['may_dtd'] = data.May_Delivery - data.index
data['july_dtd'] = data.July_Delivery - data.index
data['may_dtd'] = data['may_dtd'].astype(int) / 86400000000000
data['july_dtd'] = data['july_dtd'].astype(int) / 86400000000000
del data['May_Delivery']
del data['July_Delivery']
data['dCoC_may'] = (data['beans_spot'] - data['beans_may']) / data['may_dtd']
data['dCoC_july'] = (data['beans_spot'] - data['beans_july']) / data['july_dtd']
#####################################################################################################
####### Append 2016 to 2014 and 2015 ########################
all_data = all_data.append(data)
#####################################################
# play heads and tails to confirm
all_data.head(3)
all_data.tail(3)
content_image # Change font on 'NaN'
print "Some Nans in the ETF:",all_data.beans_spot.isnull().sum()
print ("Dropping First 70 Rows...")
all_data = all_data.dropna()
print "New Count of Nans in Data:",all_data.beans_spot.isnull().sum()
all_data.head(2)
all_data.tail(2)
content_image
all_data['beans_next_day'] = all_data['beans_spot'].shift(-1) # Move all data one row down
all_data = all_data.dropna() # delete last row
all_data.head(3) # Know your pandas
all_data.tail(3)
Reference Seong's Notebook - https://www.quantopian.com/posts/research-clonable-do-you-want-parameter-optimization-click-here-to-get-started-heat-maps-included
import numpy as np
import matplotlib.pyplot as pyplot
import pandas as pd
from collections import defaultdict
X_axis = [x for x in all_data.columns]
Y_axis = [x for x in all_data.columns]
Pearson = defaultdict(dict)
for feature_x in X_axis:
for feature_y in Y_axis:
Pearson_R = np.corrcoef(all_data[feature_x],all_data[feature_y])[0,1]
Pearson[feature_x][feature_y] = Pearson_R
Pearson = pd.DataFrame(Pearson)
Pearson.index.name = "Y Axis"
Pearson.columns.name = "X Axis"
def heat_map(df):
fig = pyplot.figure()
ax = fig.add_subplot(111)
axim = ax.imshow(df.values,cmap = pyplot.get_cmap('RdYlGn'), interpolation = 'nearest')
ax.set_xlabel(df.columns.name)
ax.set_xticks(np.arange(len(df.columns)))
ax.set_xticklabels(list(df.columns))
ax.set_ylabel(df.index.name)
ax.set_yticks(np.arange(len(df.index)))
ax.set_yticklabels(list(df.index))
ax.set_title("Pearson R Between All Data")
plt.xticks(rotation=90)
pyplot.colorbar(axim)
#: Plot our heatmap
heat_map(Pearson)
content_image
print "Know your Pandas!"
print "Length of Data Frame:",len(all_data)
# Checking which models can be imported
from sklearn.ensemble import ExtraTreesRegressor # Extra Trees Model #1
#from sklearn.neural_network import MLPClassifier # Not this one yet :(
from sklearn import linear_model # Lasso model #2
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn.naive_bayes import GaussianNB
from sklearn import tree # Decision Tree Model #3
from sklearn.ensemble import RandomForestClassifier
from sklearn.isotonic import IsotonicRegression
from sklearn.ensemble import GradientBoostingRegressor # Gradient Boosting Regressor #4
all_data = all_data.dropna()
cols_to_use = ['beans_spot','beans_may','beans_july','may_dtd','july_dtd','dCoC_may','dCoC_july',
'oil_spot','lt_rate_spot','st_rate_spot','storage_spot',
'SPY','XIV','IBM','MCD','JNJ','XOM'] # These columns will be plugged into the model
print (len(all_data))
#1
#http://scikit-learn.org/stable/modules/generated/sklearn.tree.ExtraTreeRegressor.html
rfr = ExtraTreesRegressor(n_estimators=15, max_depth=4, n_jobs=-1, random_state=17, verbose=0)
model_1 = rfr.fit(all_data[cols_to_use], all_data['beans_next_day'].values)
all_data['trees_pred'] = model_1.predict(all_data[cols_to_use])
#2
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html
reg = linear_model.Lasso(alpha = 0.1)
model_2 = reg.fit(all_data[cols_to_use], all_data['beans_next_day'].values)
all_data['lasso_pred'] = model_2.predict(all_data[cols_to_use])
#3
# http://scikit-learn.org/stable/auto_examples/tree/plot_tree_regression.html
clf = tree.DecisionTreeRegressor()
model_3 = clf.fit(all_data[cols_to_use], all_data['beans_next_day'].values)
all_data['d_tree_pred'] = model_3.predict(all_data[cols_to_use])
#4
# http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html
gbr = GradientBoostingRegressor(n_estimators=25, max_depth=1)
model_4 = gbr.fit(all_data[cols_to_use], all_data['beans_next_day'].values)
all_data['gbr_pred'] = model_4.predict(all_data[cols_to_use])
all_data.head()
stack_cols = ['beans_spot','beans_may','beans_july','may_dtd','july_dtd','dCoC_may','dCoC_july',
'trees_pred','lasso_pred','d_tree_pred','gbr_pred',
'oil_spot','lt_rate_spot','st_rate_spot','storage_spot',
'SPY','XIV','IBM','MCD','JNJ','XOM']
stacked = ExtraTreesRegressor(n_estimators=15, max_depth=4, n_jobs=-1, random_state=17, verbose=0)
stacked_model = stacked.fit(all_data[stack_cols], all_data['beans_next_day'].values)
all_data['stack_pred'] = stacked_model.predict(all_data[stack_cols])
all_data.head(3)
#print stacked_model.feature_importances_ # Not working for extratrees
#print model_1.feature_importances_ # Not working for extratrees
#print model_2.feature_importances_ # Not an attribute for lasso
print model_3.feature_importances_ #.format('.65f') # Decision Tree Regressor
print model_4.feature_importances_ # Gradient Boosting Regressor
print ("Use these lists for bar charts")
### Reference - https://pythonspot.com/en/matplotlib-bar-chart/
objects = cols_to_use ##### ALTER THIS WITH YOUR COLUMNS
y_pos = np.arange(len(objects))
performance = [ 1.08052637e-01, 6.75205609e-02, 8.02818292e-01, 6.20421619e-04,
1.41083053e-03, 1.47561752e-03, 1.06339815e-02, 4.64172859e-04,
5.71266355e-04, 3.62098658e-04, 1.02916181e-03, 3.59705139e-04,
4.59235080e-04, 1.81326427e-03, 1.39243322e-03, 3.48512822e-04,
6.67808510e-04] ##### ALTER THIS WITH YOUR DATA
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
#### ALTER DESCRIPTIONS ############
plt.ylabel('Numeric Importance To Model')
plt.title('Feature Importance - Decision Tree Regressor Model')
plt.xticks(rotation=90)
plt.show()
import matplotlib.pyplot as plt # plt.rcdefaults()
import numpy as np
#import matplotlib.pyplot as plt
objects =cols_to_use
y_pos = np.arange(len(objects))
#performance = [0.48, 0.04, 0.08, 0, 0, 0, 0, 0.12, 0, 0, 0.08, 0, 0,
# 0.08, 0.08, 0, 0.04]
performance = [ 0.28, 0.4, 0.04, 0., 0., 0., 0., 0.08, 0., 0., 0.04, 0., 0.,
0., 0.16, 0., 0., ]
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Numeric Importance To Model')
plt.title('Feature Importance - Gradient Boosting Regressor Model')
plt.xticks(rotation=90)
plt.show()
plt.scatter(all_data.index,all_data['beans_spot'],color='red',label="Beans Spot Price")
plt.scatter(all_data.index,all_data['stack_pred'],color='blue',label="Predicted Spot Price For Beans")
plt.legend(loc='upper left')
plt.title("Comparing Stacked Model Predictions Against Actual Bean Prices")
plt.xlabel("Date")
plt.ylabel("Price")
plt.show()
spot_prices = get_pricing(
['SOYB','USO','TLT','BSV','PSA',
'SPY','XIV','IBM','MCD','JNJ','XOM'],
fields='close_price',
start_date='2017-01-01',
end_date = '2017-03-31',
frequency='daily',
)
#spot_prices.head(3)
sy_contracts = symbols(['SYK17', 'SYN17'])
sy_consecutive_contract_price = history(
sy_contracts,
fields='price',
frequency='daily',
start_date='2017-01-01',
end_date='2017-03-31'
)
#sy_consecutive_contract_price.head()
data = spot_prices.join(sy_consecutive_contract_price)
#print (data.columns)
#data.columns = ['beans_spot','beans_may','beans_july']
#data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot','beans_may','beans_july']
data.columns = ['beans_spot','oil_spot','lt_rate_spot','st_rate_spot','storage_spot',
'SPY','XIV','IBM','MCD','JNJ','XOM',
'beans_may','beans_july']
data['May_Delivery'] = datetime.datetime(2017,5,12) # fix with real date later
data['July_Delivery'] = datetime.datetime(2017,7,14) # fix with real date later
data['may_dtd'] = data.May_Delivery - data.index
data['july_dtd'] = data.July_Delivery - data.index
data['may_dtd'] = data['may_dtd'].astype(int) / 86400000000000
data['july_dtd'] = data['july_dtd'].astype(int) / 86400000000000
del data['May_Delivery']
del data['July_Delivery']
data['dCoC_may'] = (data['beans_spot'] - data['beans_may']) / data['may_dtd']
data['dCoC_july'] = (data['beans_spot'] - data['beans_july']) / data['july_dtd']
# Start Predictions
data['trees_pred'] = model_1.predict(data[cols_to_use])
data['lasso_pred'] = model_2.predict(data[cols_to_use])
data['d_tree_pred'] = model_3.predict(data[cols_to_use])
data['gbr_pred'] = model_4.predict(data[cols_to_use])
# stacked Prediction
data['stack_pred'] = stacked_model.predict(data[stack_cols])
test_score = data
test_score['beans_next_day'] = test_score['beans_spot'].shift(-1)
test_score = test_score.dropna()
print "R2 of Model:",stacked_model.score(test_score[stack_cols],test_score['beans_next_day'])
#Returns the coefficient of determination R^2 of the prediction.
#The coefficient R^2 is defined as (1 - u/v), where u is the regression sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual sum of squares ((y_true - y_true.mean()) ** 2).sum(). Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A constant model that always predicts the expected value of y, disregarding the input features, would get a R^2 score of 0.0.
plt.scatter(data.index,data['beans_spot'],color='red',label="Beans Spot Price")
plt.scatter(data.index,data['stack_pred'],color='blue',label="Predicted Spot Price For Beans")
plt.legend(loc='upper left')
plt.title("Comparing Stacked Model Predictions Against Actual Bean Prices")
plt.xlabel("Date")
plt.ylabel("Price")
plt.show()
data['signal_delta'] = data['stack_pred'] - data['stack_pred'].shift(1)
data = data.dropna()
data.tail(2)
plt.plot(data.index,data['signal_delta'],color='red',label="Signal Delta")
#plt.scatter(data.index,data['stack_pred'],color='blue',label="Predicted Spot Price For Beans")
plt.legend(loc='upper left')
plt.title("Examining Large Deltas in Model Predictions")
plt.xlabel("Date")
plt.ylabel("Price")
plt.show()