Notebook

Talib example

Apply a talib function to multiple securities returned by the get_pricing method

In [2]:
# Import the whole talib library
import talib
In [3]:
# Set some dates for ease of use
start_date = '2009-08-1'
end_date = '2009-10-1'

# Set some securities. Again just to make it easier to reference later on
aapl = symbols('AAPL')
ibm = symbols('IBM')
In [6]:
# Use get_pricing method to get some data
# the result is a Pandas dataframe
aapl_data = get_pricing(symbols=aapl, 
                        start_date=start_date, 
                        end_date=end_date, 
                        frequency='daily', 
                        fields=['high', 'low', 'open_price', 'close_price'])
In [7]:
# display the resulting dataframe
aapl_data.head(5)
Out[7]:
high low open_price close_price
2009-08-03 00:00:00+00:00 166.64 164.87 165.21 166.42
2009-08-04 00:00:00+00:00 165.57 164.21 164.93 165.50
2009-08-05 00:00:00+00:00 167.39 164.21 165.75 165.00
2009-08-06 00:00:00+00:00 166.51 163.68 165.58 163.91
2009-08-07 00:00:00+00:00 166.60 164.80 165.49 165.58
In [8]:
# Lets see what type each column is
type(aapl_data['high'])
Out[8]:
<class 'pandas.core.series.Series'>
In [20]:
# Good. Each column is a series. All the Talib functions expect a series as inputs
# Let's run the talib function

aapl_CDLENGULFING = talib.CDLENGULFING(open=aapl_data.open_price,
                              high=aapl_data.high,
                              low=aapl_data.low,
                              close=aapl_data.close_price,
                             )
aapl_CDLENGULFING
Out[20]:
array([   0,    0,    0,    0,    0, -100,    0,  100,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,  100,    0,    0,    0,
          0,    0,    0,    0, -100,    0,    0,    0,    0,    0], dtype=int32)
In [21]:
# The Talib functions return a series which is the same length as the inputs
# Lets check the type
type(aapl_CDLENGULFING)
Out[21]:
<type 'numpy.ndarray'>
In [22]:
# Now check the length (the result should be the same length as the inputs)
len(aapl_CDLENGULFING)
Out[22]:
43
In [25]:
# Check the input length
len(aapl_data)
Out[25]:
43
In [27]:
# Good. All checks out.
# Notice that the talib functions return a numpy array
# To get the most current (or the last) value simply use Python indexing of -1
aapl_CDLENGULFING[-1]
Out[27]:
0
In [32]:
# Everything works as expected.
# However, we were just getting data for a single security
# Let's try the exact thing with two securities
aapl_ibm_data = get_pricing(symbols=[aapl, ibm],
                        start_date=start_date, 
                        end_date=end_date, 
                        frequency='daily', 
                        fields=['high', 'low', 'open_price', 'close_price'])
In [33]:
# With 2 securities the result is a pandas panel 
# It's really much easier to work with dataframes 
# So let's turn it into a dataframe using the to_frame() method
aapl_ibm_data_df = aapl_ibm_data.to_frame()
display(type(aapl_ibm_data), type(aapl_ibm_data_df))
<class 'pandas.core.panel.Panel'>
<class 'pandas.core.frame.DataFrame'>
In [34]:
# Lets see what aapl_ibm_data_df looks like
aapl_ibm_data_df.head(5)
Out[34]:
high low open_price close_price
major minor
2009-08-03 00:00:00+00:00 Equity(24 [AAPL]) 166.640 164.870 165.210 166.420
Equity(3766 [IBM]) 119.403 118.139 118.328 119.323
2009-08-04 00:00:00+00:00 Equity(24 [AAPL]) 165.570 164.210 164.930 165.500
Equity(3766 [IBM]) 119.244 118.467 118.617 119.154
2009-08-05 00:00:00+00:00 Equity(24 [AAPL]) 167.390 164.210 165.750 165.000
In [35]:
# We can get one security (or a slice) at a time by using the XS method
aapl_slice = aapl_ibm_data_df.xs(aapl, level=1)
talib.CDLENGULFING(open=aapl_slice.open_price,
                   high=aapl_slice.high,
                   low=aapl_slice.low,
                    close=aapl_slice.close_price,
                   )
Out[35]:
array([   0,    0,    0,    0,    0, -100,    0,  100,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,  100,    0,    0,    0,
          0,    0,    0,    0, -100,    0,    0,    0,    0,    0], dtype=int32)
In [40]:
# Great. That is what we got before.
# However, we probably want to get all the results back at once. Not one at a time
# To do that we want to group by security and then apply our talib function to each group
# One little problem...
# The function in the apply method gets passed a dataframe. Our talib functions expect series.
# The simplest way around that is to make small helper function
def my_CDLENGULFING(df):
    # Return the latest value of CDLENGULFING
    return talib.CDLENGULFING(
        open=df.open_price,
        high=df.high,
        low=df.low,
        close=df.close_price,
        )[-1]
In [41]:
# Now we can apply our talib function to each security
cdlengulfing_latest = aapl_ibm_data_df.groupby(level=1).apply(my_CDLENGULFING)
cdlengulfing_latest
Out[41]:
minor
Equity(24 [AAPL])     0
Equity(3766 [IBM])    0
dtype: int64
In [42]:
# The result above is a series indexed by security. The values are the latest CDLENGULFING
In [ ]: