Calculating Sharpe Ratio

@Peter, I ran the algo with the code you had suggested and it works, but it still needs work overall. Allow me to explain what I am looking to do:

For each security in the list, calculate it's Sharpe Ratio
Build a list of all sharpe values after they are calculated
Build a covariate matrix to determine which portfolio will have the smallest sum of correlations
Pick a portfolio of the top 10 stocks with the highest sharpe ratio and smallest sum of correlations as the 'best portfolio'

Here is some code that works with getting files, here is the code I'd like to implement
best_sharpe.py


""" Construct a portfolio of equities that have high sharpe ratios  
    and low correlation to each other.  Script proceeds by:  
    1. Reading data from every .csv file in the execution directory  
    2. Calculating the sharpe ratios for each equity and sorting  
    3. Taking the top _n_ equities according to sharpe ratio  
    4. Computing a correlation matrix for those n equities  
    5. Selecting a portfolio that minimizes the sum of cross-correlations  
"""
import numpy as np  
from numpy import recfromcsv  
from itertools import combinations  
import os

# Where to find the csv files  
csv_dir = "."  
#csv_dir = os.environ['QSDATA'] + "/Yahoo"

# Require a minimum average daily volume  
min_average_volume = 100000

# Portfolio size is the number of instruments included in our 'best portfolio'  
portfolio_size=4

# Stocks are sorted by sharpe ratio, then the top n stocks are analysed for cross-correlation  
top_n_equities=10

# Get an array of file names in the current directory ending with csv  
files = [fi for fi in os.listdir(csv_dir) if fi.endswith(".csv")]

# Grab a second array with just the names, for convenience.  These are used  
# to name columns of data.  
symbols = [os.path.splitext(fi)[0] for fi in files]

# Create a dictionary to get a symbols index from a symbol name.  
symbol_to_symbols_index = {}  
for i, symbol in enumerate(symbols):  
    symbol_to_symbols_index[symbol] = i

# Load one file so we can find out how many days of data are in it.  
firstfile = recfromcsv(csv_dir+"/"+files[0])  
datalength = len(firstfile['close'])

# Creates a 'record array', which is like a spreadsheet with a header.  The header is the  
# symbol (filename without the csv extension), and the data is all floats.  
closes = np.recarray((datalength,), dtype=[(symbol, 'float') for symbol in symbols])  
volume = np.recarray((datalength,), dtype=[(symbol, 'float') for symbol in symbols])

# Do the same for daily returns, except one row smaller than closes.  
daily_ret = np.recarray((datalength-1,), dtype=[(symbol, 'float') for symbol in symbols])

# Initialize some arrays for storing data  
average_returns = np.zeros(len(files))  
return_stdev = np.zeros(len(files))  
sharpe_ratios = np.zeros(len(files))  
cumulative_returns = np.recarray((datalength,), dtype=[(symbol, 'float') for symbol in symbols])  
average_volume = np.zeros(len(files))

#  This loops over every filename.  'i' is the index into the array, which we use to  
# add data to the other data structures we initialized.  That way the data for 'aapl' is  
# at the same index in every data structure.  
for i, file in enumerate(files):  
    # Reads in the data from the file  
    data = recfromcsv(csv_dir+"/"+file)  
    # Skip it if there isn't enough data - simplifies everything else  
    if(len(data) != datalength):  
        continue  
    # Read the 'close' column of the data and reverse the numbers  
    closes[symbols[i]] = data['close'][::-1]  
    volume[symbols[i]] = data['volume'][::-1]

    # Get the closing price for the symbol - remember the columns are named by the symbol,  
    # thus symbols[i] is the index into closes.  Tacking on the [1:] means from index 1 to the end,  
    # adding [:-1] means from the second-last to the first, so we're subtracting day 1 from day 2, etc.  
    daily_ret[symbols[i]] = (closes[symbols[i]][1:]-closes[symbols[i]][:-1])/closes[symbols[i]][:-1]

    # Now that we have the daily returns in %, calculate the relevant stats.  
    average_returns[i] = np.mean(daily_ret[symbols[i]])  
    return_stdev[i] = np.std(daily_ret[symbols[i]])  
    sharpe_ratios[i] = (average_returns[i] / return_stdev[i]) * np.sqrt(datalength)  
    average_volume[i] = np.mean(volume[symbols[i]])

# Now we have all ratios for all equities.  The next line doesn't sort them by sharpe, but it  
# gives us the indexes of the sharpe_ratios array in order.  That is, an array [5, 3, 9, 2, 0] would  
# return [4, 3, 1, 0, 2].  
#sorted_sharpe_indices = np.argsort(sharpe_ratios)[::-1][0:top_n_equities]  
#  
# Instead of using an array slice to get the top_n_equities, loop through  
# the array manually to apply additional critera.  
sorted_sharpe_indices_complete = np.argsort(sharpe_ratios)[::-1]  
sorted_sharpe_indices = []  
for si in sorted_sharpe_indices_complete:  
    if average_volume[si] < min_average_volume:  
        continue  
    sorted_sharpe_indices.append(si)  
    if len(sorted_sharpe_indices) >= top_n_equities:  
        break

# Next we create a datastructure to hold the daily returns of the top n equities  
cov_data = np.zeros((datalength-1, top_n_equities))

# The sorted_sharpe_indices has the indices, in order, of the top n sharpe ratios.  Grab  
# the daily returns for those stocks and put them in our cov_data index (cov stands for  
# covariate)  
for i, symbol_index in enumerate(sorted_sharpe_indices):  
    cov_data[:,i] = daily_ret[symbols[symbol_index]]

# Now make a correlation matrix for the top n equities  
cormat = np.corrcoef(cov_data.transpose())

# Create all possible combinations of the n top equites for the given portfolio size.  
portfolios = list(combinations(range(0, top_n_equities), portfolio_size))

# For each possible combination of the top n equities, add up all the correlations  
# between the four instruments  
total_corr = [sum([cormat[x[0]][x[1]] for x in combinations(p, 2)]) for p in portfolios]

# Find the portfolio with the smallest sum of correlations, and convert that back into  
# the instrument names via a lookup in the symbols array  
best_portfolio=[symbols[sorted_sharpe_indices[i]] for i in portfolios[total_corr.index(np.nanmin(total_corr))]]  
print(best_portfolio)

for symbol in best_portfolio:  
    print "symbol={} average_return={} ret_stddev={} sharpe={} avg_vol={}".format(  
        symbol,  
        average_returns[symbol_to_symbols_index[symbol]],  
        return_stdev[symbol_to_symbols_index[symbol]],  
        sharpe_ratios[symbol_to_symbols_index[symbol]],  
        average_volume[symbol_to_symbols_index[symbol]])

2008-02-15 PRINT There are 96 securities and 210 possible portfolios. 2008-02-15 PRINT [u'AEP', u'BMY', u'INTC', u'QCOM'] 2008-04-01 PRINT [u'DOW', u'UTX', u'SBUX', u'BAX'] 2008-05-13 PRINT [u'DIS', u'C', u'BRK', u'FOXA'] 2008-06-25 PRINT [u'APC', u'DVN', u'ALL', u'CVS'] 2008-08-07 PRINT [u'MS', u'HPQ', u'DD', u'DOW'] 2008-09-19 PRINT [u'FOXA', u'AEP', u'TGT', u'KO'] End of logs.