update for PyCharm files, and don't add quiz files

ericsomdahl · Oct 18, 2014 · 7b0281a · 7b0281a
1 parent 235d250
commit 7b0281a
Show file tree

Hide file tree

Showing 20 changed files with 1,228 additions and 0 deletions.
diff --git a/HW1/optimizer.py b/HW1/optimizer.py
@@ -0,0 +1,236 @@
+'''
+Created on Sep 28, 2014
+
+HW1: Find an optimal buy-and-hold portfolio for an arbitrary time period in the past.
+Based on code copied from Tutorial 1
+
+@author: Eric Somdahl
+@contact: [email protected]
+@summary: HW1
+'''
+
+# QSTK Imports
+import QSTK.qstkutil.qsdateutil as du
+import QSTK.qstkutil.tsutil as tsu
+import QSTK.qstkutil.DataAccess as da
+
+# Third Party Imports
+import datetime as dt
+import pandas as pd
+import numpy
+import matplotlib.pyplot as plt
+
+print "Pandas Version", pd.__version__
+
+
+def init_data(dt_start, dt_end, ls_symbols):
+    # We need closing prices so the timestamp should be hours=16.
+    dt_timeofday = dt.timedelta(hours=16)
+
+    # Get a list of trading days between the start and the end.
+    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
+
+    # Creating an object of the dataaccess class with Yahoo as the source.
+    c_dataobj = da.DataAccess('Yahoo')
+
+    # Keys to be read from the data, it is good to read everything in one go.
+    ls_keys = ['close']
+
+    # Reading the data, now d_data is a dictionary with the keys above.
+    # Timestamps and symbols are the ones that were specified before.
+    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
+    d_data = dict(zip(ls_keys, ldf_data))
+
+    # Filling the data for NAN
+    for s_key in ls_keys:
+        d_data[s_key] = d_data[s_key].fillna(method='ffill')
+        d_data[s_key] = d_data[s_key].fillna(method='bfill')
+        d_data[s_key] = d_data[s_key].fillna(1.0)
+
+    return d_data
+
+
+def calc_basic_sim_info(dt_start, dt_end, ls_symbols, lf_allocations):
+    d_data = init_data(dt_start, dt_end, ls_symbols)
+
+    # Getting the numpy ndarray of close prices.
+    na_price = d_data['close'].values
+
+    # Normalizing the prices to start at 1 and see relative returns
+    na_normalized_price = na_price / na_price[0, :]
+
+    # multiply each price by the allocation
+    na_allocated_price = na_normalized_price * lf_allocations
+
+    #calc cumulative portfolio value for each day
+    na_daily_cumulative_val = numpy.apply_along_axis(numpy.sum, 1, na_allocated_price)
+
+    # Copy the normalized cumulative value to a new ndarry to find returns.
+    na_rets = na_daily_cumulative_val.copy()
+
+    # Calculate the daily returns of the prices. (Inplace calculation)
+    # returnize0 works on ndarray and not dataframes.
+    tsu.returnize0(na_rets)
+
+    return na_price, na_daily_cumulative_val, na_rets
+
+
+def compare_vs_sp500(dt_start, dt_end, ls_symbols, lf_allocations):
+    na_price,  na_daily_cumulative_val, na_rets = calc_basic_sim_info(dt_start, dt_end, ls_symbols, lf_allocations)
+
+    spy_price, spy_daily_cumulative_val, spy_rets = calc_basic_sim_info(dt_start, dt_end, ['SPY'], [1.0])
+
+    # we only want to graph the cumulative returns
+    plt.clf()
+    fig = plt.figure()
+    fig.add_subplot(111)
+    plt.plot(spy_daily_cumulative_val, alpha=0.4)
+    plt.plot(na_daily_cumulative_val)
+    ls_names = ['SPY', 'Portfolio']
+    plt.legend(ls_names)
+    plt.ylabel('Cumulative Returns')
+    plt.xlabel('Trading Day')
+    fig.autofmt_xdate(rotation=45)
+    plt.savefig('hw1.pdf', format='pdf')
+
+
+def simulate(dt_start, dt_end, ls_symbols, lf_allocations):
+    na_price, na_daily_cumulative_val, na_rets = calc_basic_sim_info(dt_start, dt_end, ls_symbols, lf_allocations)
+
+    # compute std deviation of daily returns
+    f_daily_std_deviation = numpy.std(na_rets)
+
+    #compute average of daily returns
+    f_avg_daily_return = numpy.mean(na_rets)
+
+    #get cumulative return from the last element of daily returns
+    f_cumulative_return = na_daily_cumulative_val[-1]
+
+    #get num of trading days (252 is assumed for assignment but check because
+    #the date range can change in other invocations)
+    i_trading_days = na_price.shape[0]
+
+    #calc the Sharpe Ratio
+    f_sharpe_ratio = (numpy.sqrt(i_trading_days) * f_avg_daily_return) / f_daily_std_deviation
+
+    return f_daily_std_deviation, f_avg_daily_return, f_sharpe_ratio, f_cumulative_return
+
+
+def main():
+    # List of symbols
+    ls_symbols = ['AXP', 'HPQ', 'IBM', 'HNZ']
+
+    # Start and End date of the charts
+    dt_start = dt.datetime(2010, 1, 1)
+    dt_end = dt.datetime(2010, 12, 31)
+
+    lf_best_allocation = None
+    best_output = (0, 0, 0, 0)
+
+    # get all legal permutations
+    lf_legal_allocations = get_possible_allocations(ls_symbols)
+
+    #range over all possible allocation permutations
+    for an_allocation in lf_legal_allocations:
+        vol, daily_ret, sharpe, cum_ret = simulate(dt_start, dt_end, ls_symbols, an_allocation)
+        if sharpe > best_output[2]:
+            best_output = (vol, daily_ret, sharpe, cum_ret)
+            lf_best_allocation = an_allocation
+
+    print "Start Date: {0:%B %d, %Y}".format(dt_start)
+    print "End Date: {0:%B %d, %Y}".format(dt_end)
+    print "Symbols: {0}".format(ls_symbols)
+    print "Optimal Allocations: {0}".format(lf_best_allocation)
+    print "Sharpe Ratio: {0}".format(best_output[2])
+    print "Volatility (stdev of daily returns): {0}".format(best_output[0])
+    print "Average Daily Return: {0}".format(best_output[1])
+    print "Cumulative Return: {0}".format(best_output[3])
+
+    compare_vs_sp500(dt_start, dt_end, ls_symbols, lf_best_allocation)
+
+
+def get_possible_allocations(ls_symbols):
+    # list of possible allocations
+    lf_range = numpy.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
+
+    #accumulator for input to cartesian product
+    lf_lists = []
+
+    for symbol in ls_symbols:
+        #one range per symbol in list
+        lf_lists.append(lf_range.copy())
+
+    lf_all = cartesian(lf_lists)
+
+    #accumulator for legal allocations
+    lf_legal_allocations = []
+
+    #validate each allocation
+    for an_allocation in lf_all:
+        #adds up to 1?
+        if numpy.sum(an_allocation) == 1:
+            lf_legal_allocations.append(an_allocation)
+
+    return lf_legal_allocations
+
+
+def cartesian(arrays, out=None):
+    """
+    Generate a cartesian product of input arrays.
+
+    Parameters
+    ----------
+    arrays : list of array-like
+        1-D arrays to form the cartesian product of.
+    out : ndarray
+        Array to place the cartesian product in.
+
+    Returns
+    -------
+    out : ndarray
+        2-D array of shape (M, len(arrays)) containing cartesian products
+        formed of input arrays.
+
+    Examples
+    --------
+    >>> cartesian(([1, 2, 3], [4, 5], [6, 7]))
+    array([[1, 4, 6],
+           [1, 4, 7],
+           [1, 5, 6],
+           [1, 5, 7],
+           [2, 4, 6],
+           [2, 4, 7],
+           [2, 5, 6],
+           [2, 5, 7],
+           [3, 4, 6],
+           [3, 4, 7],
+           [3, 5, 6],
+           [3, 5, 7]])
+
+    """
+
+    arrays = [numpy.asarray(x) for x in arrays]
+    dtype = arrays[0].dtype
+
+    n = numpy.prod([x.size for x in arrays])
+    if out is None:
+        out = numpy.zeros([n, len(arrays)], dtype=dtype)
+
+    m = n / arrays[0].size
+    out[:, 0] = numpy.repeat(arrays[0], m)
+    if arrays[1:]:
+        cartesian(arrays[1:], out=out[0:m, 1:])
+        for j in xrange(1, arrays[0].size):
+            out[j * m:(j + 1) * m, 1:] = out[0:m, 1:]
+    return out
+
+
+def is_valid_allocation(lf_allocation):
+    if numpy.sum(lf_allocation) == 1.0:
+        return True
+    else:
+        return False
+
+
+if __name__ == '__main__':
+    main()
diff --git a/HW2/2008Study.pdf b/HW2/2008Study.pdf
diff --git a/HW2/2008Study2.pdf b/HW2/2008Study2.pdf
diff --git a/HW2/2012Study.pdf b/HW2/2012Study.pdf
diff --git a/HW2/2012Study2.pdf b/HW2/2012Study2.pdf
diff --git a/HW2/hw2.py b/HW2/hw2.py
@@ -0,0 +1,83 @@
+__author__ = 'eric'
+
+import pandas as pd
+import numpy as np
+import math
+import copy
+import QSTK.qstkutil.qsdateutil as du
+import datetime as dt
+import QSTK.qstkutil.DataAccess as da
+import QSTK.qstkutil.tsutil as tsu
+import QSTK.qstkstudy.EventProfiler as ep
+
+dataObj = da.DataAccess('Yahoo')
+
+
+def find_events(ls_symbols, d_data):
+    ''' Finding the event dataframe '''
+    df_close = d_data['actual_close']
+
+    print "Finding Events"
+
+    # Creating an empty dataframe
+    df_events = copy.deepcopy(df_close)
+    df_events = df_events * np.NAN
+
+    # Time stamps for the event range
+    ldt_timestamps = df_close.index
+
+    for s_sym in ls_symbols:
+        for i in range(1, len(ldt_timestamps)):
+            # Calculating the returns for this timestamp
+            f_symprice_today = df_close[s_sym].ix[ldt_timestamps[i]]
+            f_symprice_yest = df_close[s_sym].ix[ldt_timestamps[i - 1]]
+
+            # Event is found if on 2 consecutive closes the price went from
+            # greater than or equal to 5.00 to less than 5.00
+            if f_symprice_yest >= 8.0 and f_symprice_today < 8.0:
+                df_events[s_sym].ix[ldt_timestamps[i]] = 1
+
+    return df_events
+
+
+def create_study(ls_symbols, ldt_timestamps, s_study_name):
+    global dataObj
+
+    print "Grabbing data to perform {0}".format(s_study_name)
+    ls_keys = ['close', 'actual_close']
+    ldf_data = dataObj.get_data(ldt_timestamps, ls_symbols, ls_keys)
+
+    print "Got data for study {0}".format(s_study_name)
+    d_data = dict(zip(ls_keys, ldf_data))
+
+    for s_key in ls_keys:
+        d_data[s_key] = d_data[s_key].fillna(method='ffill')
+        d_data[s_key] = d_data[s_key].fillna(method='bfill')
+        d_data[s_key] = d_data[s_key].fillna(1.0)
+
+    df_events = find_events(ls_symbols, d_data)
+
+    print "Creating Study"
+    ep.eventprofiler(df_events, d_data, i_lookback=20, i_lookforward=20,
+                     s_filename=s_study_name, b_market_neutral=True, b_errorbars=True,
+                     s_market_sym='SPY')
+
+
+def main():
+    dt_start = dt.datetime(2008, 1, 1)
+    dt_end = dt.datetime(2009, 12, 31)
+    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))
+
+    global dataObj
+
+    ls_symbols_2012 = dataObj.get_symbols_from_list('sp5002012')
+    ls_symbols_2012.append('SPY')
+
+    ls_symbols_2008 = dataObj.get_symbols_from_list('sp5002008')
+    ls_symbols_2008.append('SPY')
+
+    #create_study(ls_symbols_2008, ldt_timestamps, '2008Study2.pdf')
+    create_study(ls_symbols_2012, ldt_timestamps, '2012Study2.pdf')
+
+if __name__ == '__main__':
+    main()
diff --git a/HW3/analyze/__init__.py b/HW3/analyze/__init__.py
@@ -0,0 +1,3 @@
+__author__ = 'eric'
+import analyze
+import port_input
diff --git a/HW3/analyze/analyze.py b/HW3/analyze/analyze.py
@@ -0,0 +1,72 @@
+__author__ = 'eric'
+
+from port_input import PortfolioInput
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def look_at(portfolio_input):
+    i_trading_days = 252
+
+    dt_start, dt_end = portfolio_input.get_start_end_dates()
+    s_date_format = "%Y-%m-%d %H:%M:%S"
+
+    na_benchmark_returns = portfolio_input.df_analysis_data['benchmark_returns']
+    f_benchmark_stddev = np.std(na_benchmark_returns)
+    f_benchmark_avg_daily_return = np.mean(na_benchmark_returns)
+    f_benchmark_sharpe_ratio = (np.sqrt(i_trading_days) * f_benchmark_avg_daily_return) / f_benchmark_stddev
+
+    na_portfolio_returns = portfolio_input.df_analysis_data['portfolio_returns']
+    f_portfolio_stddev = np.std(na_portfolio_returns)
+    f_portfolio_avg_daily_return = np.mean(na_portfolio_returns)
+    f_portfolio_sharpe_ratio = (np.sqrt(i_trading_days) * f_portfolio_avg_daily_return) / f_portfolio_stddev
+
+    na_benchmark_total_returns = portfolio_input.df_analysis_data['benchmark_normalized']
+    f_benchmark_total_return = na_benchmark_total_returns[-1]
+    na_portfolio_total_returns = portfolio_input.df_analysis_data['portfolio_normalized']
+    f_portfolio_total_return = na_portfolio_total_returns[-1]
+
+    # we only want to graph the cumulative returns
+    plt.clf()
+    fig = plt.figure()
+    fig.add_subplot(111)
+    plt.plot(portfolio_input.df_analysis_data['portfolio_values'])
+    plt.plot(portfolio_input.df_analysis_data['benchmark_values'], alpha=0.4)
+    ls_names = ['Portfolio', portfolio_input.symbol]
+    plt.legend(ls_names)
+    plt.ylabel('Cumulative Returns')
+    plt.xlabel('Trading Day')
+    fig.autofmt_xdate(rotation=45)
+    plt.savefig('hw3.pdf', format='pdf')
+
+    print "The final value of the portfolio using the sample file is -- {0:s}".format(portfolio_input.na_raw_input[0][-1])
+    print "Details of the Performance of the portfolio :"
+    print "Date Range: {0:s} to {1:s}".format(dt_start.strftime(s_date_format), dt_end.strftime(s_date_format))
+    print "\nSharpe Ratio of Fund: {0:f}\nSharpe Ratio of {1:s}: {2:f}"\
+        .format(f_portfolio_sharpe_ratio, portfolio_input.symbol, f_benchmark_sharpe_ratio)
+    print "\nTotal Return of Fund: {0:f}\nTotal Return of {1:s}: {2:f}"\
+        .format(f_portfolio_total_return, portfolio_input.symbol, f_benchmark_total_return)
+    print "\nStandard Deviation of Fund: {0:f}\nStandard Deviation of {1:s}: {2:f}"\
+        .format(f_portfolio_stddev, portfolio_input.symbol, f_benchmark_stddev)
+    print "\nAverage Daily Return of Fund: {0:f}\nAverage Daily Return of {1:s}: {2:f}"\
+        .format(f_portfolio_avg_daily_return, portfolio_input.symbol, f_benchmark_avg_daily_return)
+
+
+def main(input_args):
+    import os
+    values_csv = '{0:s}/{1:s}'.format(os.path.dirname(os.path.realpath(__file__)), input_args.values_csv)
+    portfolio_input = PortfolioInput(values_csv, input_args.benchmark)
+    look_at(portfolio_input)
+    pass
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser(
+        description='Analyze a portfolios returns and compare it against a specified benchmark')
+    parser.add_argument('values_csv', help='(input) CSV file specifying the daily value of the portfolio')
+    parser.add_argument('benchmark', help='symbol of the benchmark to use in comparison')
+
+    args = parser.parse_args()
+
+    main(args)
diff --git a/HW3/analyze/hw3.pdf b/HW3/analyze/hw3.pdf