from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.factors import CustomFactor
from quantopian.pipeline.filters import Q1500US
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.data.psychsignal import stocktwits
import alphalens as al
import numpy as np
Psychsignal's StockTwits Trader Mood analyzes trader's messages posted on StockTwits, and provides a measure of bull/bear intensity for securities based on message sentiment.
In this notebook, we will construct a couple of pipeline factors based on this dataset and analyze them using Alphalens to determine if the can effectively predict returns. After, we will develop an algorithm based on the results of our analysis.
Psychsignal's factors used in this notebook:
bull_minus_bear - subtracts the bearish intesity from the bullish intensity [BULL - BEAR] to provide an immediate net score. bull_scored_messages - total count of bullish sentiment messages scored by PsychSignal's algorithm bear_scored_messages - total count of bearish sentiment messages scored by PsychSignal's algorithm
The following custom factors calculate the average [BULL - BEAR] intensity over the past 3 days, and the average number of messages on a 30 day period. We will use [BULL - BEAR] intensity to rank securities based on trader mood, and we will only consider the top 1000 securities by average number of messages over a 30 day period.
class BullBearIntensity(CustomFactor):
"""
Baseline PsychSignal Factor
"""
inputs = [stocktwits.bull_minus_bear]
window_length = 3
def compute(self, today, assets, out, bull_minus_bear):
np.nanmean(bull_minus_bear, axis=0, out=out)
class PsychSignalMessages(CustomFactor):
"""
Created to rank each security by message coverage
"""
inputs = [stocktwits.bull_scored_messages, stocktwits.bear_scored_messages]
window_length = 30
def compute(self, today, assets, out, bull_msgs, bear_msgs):
np.nanmean(bull_msgs + bear_msgs, axis=0, out=out)
First, we need to run our pipeline over the period of time that we want to analyze. We will look at a 1 year period, between 2014-01-01 and 2015-01-01.
# Run pipeline over 1 year period
def make_pipeline():
"""
Create our pipeline.
"""
message_rank = PsychSignalMessages().rank(ascending=False)
universe = Q1500US() & (1000 > message_rank)
sector = Sector()
sentiment = BullBearIntensity().rank()
return Pipeline(
columns={
'sentiment': sentiment,
'sector': sector
},
screen = universe
)
results = run_pipeline(make_pipeline(), '2014-01-01', '2015-01-01')
results.fillna(value=0);
results.head(5)
Next, we need pricing data for securities that were present in our trading universe over our 1 year period
# Get list of unique assets present at any given time
asset_list = results.index.levels[1].unique()
# Get pricing data over 1 year period + an extra month of out-of-sample
prices = get_pricing(asset_list, start_date='2014-01-01', end_date='2015-02-01', fields='price')
prices.head(5)
Alphalens allows us to group our assets by sector, so we will use Morningstar's sector map and the sector codes returned by our pipeline.
# Extract Sector mappings from pipeline output
sectors = results['sector']
# Instantiate a sector code to sector name map.
# We will provide this to Alphalens as sector labels
sector_names = Sector.SECTOR_NAMES
sector_names[Sector.missing_value] = 'None'
Now we will use Alphalens to get the forward returns of our factor for perios of 1, 5 and 10 holding days. Alphalens does not take into account commissions nor slippage, it just gives us a rough idea of what the returns would have been if we happened to hold a position for a given asset during the holding period.
factor_data = al.utils.get_clean_factor_and_forward_returns(factor=results['sentiment'],
prices=prices,
groupby=sectors,
groupby_labels=sector_names,
periods=(1,5,10))
factor_data.head(5)
Notice factor_data also includes a factor_quantile column which classifies securities based on their returns for a given date.
Let's use Alphalens to compute mean returns by quantile, and plot the corresponding buckets. If our factor is a good predictor of returns, higher quantiles should have higher returns, and lower quantiles should have lower returns. This will help us build our Long/Short strategy later.
mean_return_by_q, std_err_by_q = al.performance.mean_return_by_quantile(factor_data,
by_group=False)
al.plotting.plot_quantile_returns_bar(mean_return_by_q.apply(al.utils.rate_of_return, axis=0));
This confirms our pipeline factor is a pretty good predictor of returns.
This plot can also give us a rough idea of what would be a good turnover frequency for our strategy. A 5 day holding period seems to be a good choice here since it has the highest returns for quantile 5, and decently low returns for quantile 1.
Let's now look at the returns over time by quantile for a 5 day holding period.
mean_return_by_q_daily, std_err_by_q_daily = al.performance.mean_return_by_quantile(factor_data,
by_date=True)
al.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q_daily, period=5);
We can see returns for quantile 5 consistently increase. This is good since it represents the return stream of securities with highest alpha value. Returns in quantile 1 have an up trend bewtween mid May, 2014 and late July, 2014. This might have a negative effect on our strategy, so it would be interesting to see how our strategy behaves over that period.
Using what we have learned about our factor using Alphalens, let's build our strategy.