from quantopian.pipeline.data.accern import alphaone_free as alphaone
from quantopian.pipeline.data.eventvestor import CEOChangeAnnouncements as cc
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
import pandas as pd
# Number of days after CEO change that articles are considered.
SINCE_CHANGE = 5
# Number of trading days since article that we will keep track of price data.
SINCE_ARTICLE = 40
Let's set up a pipeline to get some data to work with. This takes a minute to run.
announcement_date = cc.asof_date.latest
pipe = Pipeline(
columns={
'sentiment': alphaone.article_sentiment.latest,
'impact_score': alphaone.impact_score.latest,
'announcement_date': announcement_date,
'price': USEquityPricing.close.latest
},
screen=announcement_date.notnull()
)
factor = run_pipeline(pipe, start_date='2012-08-26', end_date='2014-12-31')
factor.head(5)
Now we'll filter out the data points that aren't close enough to a CEO change.
data = factor
# Constructs a new column with values from the first index, stripped of the time information.
data['asof_date'] = map(lambda x: pd.Timestamp(x.date()), factor.index.get_level_values(0))
# Filters out data points too long after the last CEO change.
data = data[data['asof_date'] - data['announcement_date'] <=
pd.Timedelta((SINCE_CHANGE + SINCE_ARTICLE) * 1.6, unit='d')]
data.head(5)
Here we make a new DataFrame with a row for each article fewer than 5 days after a CEO change.
# Filter for a non-null sentiment value and a date within 5 days of the last CEO change.
sentiment = data[pd.notnull(data['sentiment']) &
(data['asof_date'] - data['announcement_date'] < pd.Timedelta(SINCE_CHANGE, unit='d'))]
sentiment.head(10)
Now we append columns containing the relative price difference from the day of article publication.
# This function is applied to our sentiment DataFrame right below its definition.
# Given an event (equity and article's date of publication),
# it returns a Series containing pricing data for the equity following the event.
def get_prices(s):
# Drops all data before the article's publication date, using a slice.
data_after_article = data.loc[s['asof_date']:]
# Isolates the data for the specific equity.
# The equity is stored in the series name automatically by the .apply function below.
equity_data = data_after_article.loc(axis=0)[:, s.name[1]]
# Gets the equity's prices for a specified number of days after the event,
# by dropping pricing data that is too old.
equity_pricing = equity_data['price'].head(SINCE_ARTICLE + 1)
# Returns the equity's relative price differences from the day of the event,
# with the indices of the series replaced by numbers to indicate days after the event.
return ((equity_pricing - s['price']) / s['price']).reset_index(drop=True)
# Concatenates the price data for all stocks in the DataFrame as new columns.
matches = pd.concat([sentiment, sentiment.apply(get_prices, axis=1)], axis=1)
matches.head(5)
print(len(matches))
import scipy.stats as stats
import matplotlib.pyplot as plt
import numpy as np
Here's a plot for rank correlation between the sentiment rating and the relative price change down the road. In the first 12 days, the stock price tends to move in the opposite direction. A good strategy might be to go long or short in a stock for 12 days after an article, going in the opposite direction of the sentiment.
scores = np.zeros(SINCE_ARTICLE + 1)
for i in range(1, SINCE_ARTICLE + 1):
score, pvalue = stats.spearmanr(matches['sentiment'],
matches[i])
scores[i] = score
plt.bar(range(1, SINCE_ARTICLE + 1), scores[1:])
plt.xlabel('Trading days afterward')
plt.xlim((1, SINCE_ARTICLE + 1))
plt.ylabel('Rank correlation between article sentiment and returns');
Here's the spread of 12-day returns, with equities sorted into baskets by sentiment score. This shows how the signal might be usable in a long-short strategy. It might be wise to exclude sentiment scores under -0.2 for a smooth spread.
num_baskets = 10
delta = 2. / num_baskets
avgs = np.zeros(num_baskets + 1)
for i in range(num_baskets):
basket = matches[(matches['sentiment'] >= -1 + i * delta) &
(matches['sentiment'] < -1 + (i + 1) * delta)]
avgs[i] = basket[12].mean()
avgs[num_baskets] = matches[matches['sentiment'] == 1][12].mean()
plt.bar(range(num_baskets + 1), avgs)
plt.xticks(range(num_baskets + 1), np.around(np.arange(-1, 1.2, 0.2), decimals=1))
plt.xlabel('Article sentiment')
plt.ylabel('Mean 12-day returns');
We can try to incorporate Accern's impact score but it doesn't make much difference.
multi_signal = matches['sentiment'] * matches['impact_score']
scores = np.zeros(SINCE_ARTICLE + 1)
pvalues = np.zeros(SINCE_ARTICLE + 1)
for i in range(1, SINCE_ARTICLE + 1):
score, pvalue = stats.spearmanr(multi_signal,
matches[i])
pvalues[i] = pvalue
scores[i] = score
plt.bar(range(1,SINCE_ARTICLE + 1), scores[1:])
plt.xlabel('Trading days afterward')
plt.xlim((1, SINCE_ARTICLE + 1))
plt.ylabel('Rank correlation between article sentiment times impact score and returns');
num_baskets = 10
delta = 200. / num_baskets
avgs = np.zeros(num_baskets + 1)
for i in range(num_baskets):
basket = matches[(multi_signal >= -100 + i * delta) &
(multi_signal < -100 + (i + 1) * delta)]
avgs[i] = basket[12].mean()
avgs[num_baskets] = matches[multi_signal == 100][12].mean()
plt.bar(range(num_baskets + 1), avgs)
plt.xticks(range(num_baskets + 1), np.around(np.arange(-1, 1.2, 0.2), decimals=1));
plt.xlabel('Article sentiment times impact score')
plt.ylabel('Mean 12-day returns');