diff --git a/Examples/Basic/numpy-tutorial.py b/Examples/Basic/numpy-tutorial.py index dcadb3c55..d7b7b7e72 100644 --- a/Examples/Basic/numpy-tutorial.py +++ b/Examples/Basic/numpy-tutorial.py @@ -35,33 +35,33 @@ # # ## Lets get started! -print "Importing numpy" +print("Importing numpy") import numpy as np ## This loads the numpy library and lets us refer to it by the shorthand "np", ## which is the convention used in the numpy documentation and in many ## online tutorials/examples -print "Creating arrays" +print("Creating arrays") ## Now lets make an array to play around with. You can make numpy arrays in ## a number of ways, ## Filled with zeros: zeroArray = np.zeros( (2,3) ) # [[ 0. 0. 0.] -print zeroArray # [ 0. 0. 0.]] +print(zeroArray) # [ 0. 0. 0.]] ## Or ones: oneArray = np.ones( (2,3) ) # [[ 1. 1. 1.] -print oneArray # [ 1. 1. 1.]] +print(oneArray) # [ 1. 1. 1.]] ## Or filled with junk: emptyArray = np.empty( (2,3) ) -print emptyArray +print(emptyArray) ## Note, emptyArray might look random, but it's just uninitialized which means ## you shouldn't count on it having any particular data in it, even random ## data! If you do want random data you can use random(): randomArray = np.random.random( (2,3) ) -print randomArray +print(randomArray) ## If you're following along and trying these commands out, you should have ## noticed that making randomArray took a lot longer than emptyArray. That's @@ -74,29 +74,29 @@ [4,5,6]] myArray = np.array(foo) # [[1 2 3] -print myArray # [4 5 6]] +print(myArray) # [4 5 6]] -print "Reshaping arrays" +print("Reshaping arrays") ## Of course, if you're typing out a range for a larger matrix, it's easier to ## use arange(...): rangeArray = np.arange(6,12).reshape( (2,3) ) # [[ 6 7 8] -print rangeArray # [ 9 10 11]] +print(rangeArray) # [ 9 10 11]] ## there's two things going on here. First, the arange(...) function returns a ## 1D array similar to what you'd get from using the built-in python function ## range(...) with the same arguments, except it returns a numpy array ## instead of a list. -print np.arange(6,12) # [ 6 7 8 9 10 11 12] +print(np.arange(6,12)) # [ 6 7 8 9 10 11 12] ## the reshape method takes the data in an existing array, and stuffs it into ## an array with the given shape and returns it. -print rangeArray.reshape( (3,2) ) # [[ 6 7] +print(rangeArray.reshape( (3,2) )) # [[ 6 7] # [ 8 9] # [10 11]] #The original array doesn't change though. -print rangeArray # [[ 6 7 8] +print(rangeArray) # [[ 6 7 8] # [ 9 10 11] ## When you use reshape(...) the total number of things in the array must stay @@ -106,69 +106,69 @@ squareArray = np.arange(1,10).reshape( (3,3) ) #this is fine, 9 elements -print "Accessing array elements" +print("Accessing array elements") ## Accessing an array is also pretty straight forward. You access a specific ## spot in the table by referring to its row and column inside square braces ## after the array: -print rangeArray[0,1] #7 +print(rangeArray[0,1]) #7 ## Note that row and column numbers start from 0, not 1! Numpy also lets you ## refer to ranges inside an array: -print rangeArray[0,0:2] #[6 7] -print squareArray[0:2,0:2] #[[1 2] # the top left corner of squareArray +print(rangeArray[0,0:2]) #[6 7] +print(squareArray[0:2,0:2]) #[[1 2] # the top left corner of squareArray # [4 5]] ## These ranges work just like slices and python lists. n:m:t specifies a range ## that starts at n, and stops before m, in steps of size t. If any of these ## are left off, they're assumed to be the start, the end+1, and 1 respectively -print squareArray[:,0:3:2] #[[1 3] #skip the middle column +print(squareArray[:,0:3:2]) #[[1 3] #skip the middle column # [4 6] # [7 9]] ## Also like python lists, you can assign values to specific positions, or ## ranges of values to slices -squareArray[0,:] = np.array(range(1,4)) #set the first row to 1,2,3 +squareArray[0,:] = np.array(list(range(1,4))) #set the first row to 1,2,3 squareArray[1,1] = 0 # set the middle spot to zero squareArray[2,:] = 1 # set the last row to ones -print squareArray # [[1 2 3] +print(squareArray) # [[1 2 3] # [4 0 6] # [1 1 1]] ## Something new to numpy arrays is indexing using an array of indices: fibIndices = np.array( [1, 1, 2, 3] ) randomRow = np.random.random( (10,1) ) # an array of 10 random numbers -print randomRow -print randomRow[fibIndices] # the first, first, second and third element of +print(randomRow) +print(randomRow[fibIndices]) # the first, first, second and third element of # randomRow ## You can also use an array of true/false values to index: boolIndices = np.array( [[ True, False, True], [False, True, False], [ True, False, True]] ) -print squareArray[boolIndices] # a 1D array with the selected values +print(squareArray[boolIndices]) # a 1D array with the selected values # [1 3 0 1 1] ## It gets a little more complicated with 2D (and higher) arrays. You need ## two index arrays for a 2D array: rows = np.array( [[0,0],[2,2]] ) #get the corners of our square array cols = np.array( [[0,2],[0,2]] ) -print squareArray[rows,cols] #[[1 3] +print(squareArray[rows,cols]) #[[1 3] # [1 1]] boolRows = np.array( [False, True, False] ) # just the middle row boolCols = np.array( [True, False, True] ) # Not the middle column -print squareArray[boolRows,boolCols] # [4 6] +print(squareArray[boolRows,boolCols]) # [4 6] -print "Operations on arrays" +print("Operations on arrays") ## One useful trick is to create a boolean matrix based on some test and use ## that as an index in order to get the elements of a matrix that pass the ## test: sqAverage = np.average(squareArray) # average(...) returns the average of all # the elements in the given array betterThanAverage = squareArray > sqAverage -print betterThanAverage #[[False False True] +print(betterThanAverage) #[[False False True] # [ True False True] # [False False False]] -print squareArray[betterThanAverage] #[3 4 6] +print(squareArray[betterThanAverage]) #[3 4 6] ## Indexing like this can also be used to assign values to elements of the ## array. This is particularly useful if you want to filter an array, say by @@ -188,24 +188,24 @@ # truncate them down to integers. clampedSqArray[ (squareArray-sqAverage) > sqStdDev ] = sqAverage+sqStdDev clampedSqArray[ (squareArray-sqAverage) < -sqStdDev ] = sqAverage-sqStdDev -print clampedSqArray # [[ 1. 2. 3. ] +print(clampedSqArray) # [[ 1. 2. 3. ] # [ 3.90272394 0.31949828 3.90272394] # [ 1. 1. 1. ]] ## Multiplying and dividing arrays by numbers does what you'd expect. It ## multiples/divides element-wise -print squareArray * 2 # [[ 2 4 6] +print(squareArray * 2) # [[ 2 4 6] # [ 8 0 12] # [ 2 2 2]] ## Addition works similarly: -print squareArray + np.ones( (3,3) ) #[[2 3 4] +print(squareArray + np.ones( (3,3) )) #[[2 3 4] # [5 1 7] # [2 2 2]] ## Multiplying two arrays together (of the same size) is also element wise -print squareArray * np.arange(1,10).reshape( (3,3) ) #[[ 1 4 9] +print(squareArray * np.arange(1,10).reshape( (3,3) )) #[[ 1 4 9] # [16 0 36] # [ 7 8 9]] @@ -213,7 +213,7 @@ ## from linear algebra: matA = np.array( [[1,2],[3,4]] ) matB = np.array( [[5,6],[7,8]] ) -print np.dot(matA,matB) #[[19 22] +print(np.dot(matA,matB)) #[[19 22] # [43 50]] ## And thats it! There's a lot more to the numpy library, and there are a few diff --git a/Examples/Basic/pandas-tutorial.py b/Examples/Basic/pandas-tutorial.py index d687eb7ce..7d536d035 100644 --- a/Examples/Basic/pandas-tutorial.py +++ b/Examples/Basic/pandas-tutorial.py @@ -20,67 +20,67 @@ for i in range(1, 6): ldt_timestamps.append(dt.datetime(2011, 1, i, 16)) -print "The index we created has the following dates : " -print ldt_timestamps -print +print("The index we created has the following dates : ") +print(ldt_timestamps) +print() -## TimeSeries -ts_single_value = pd.TimeSeries(0.0, index=ldt_timestamps) -print "A timeseries initialized to one single value : " +## Series +ts_single_value = pd.Series(0.0, index=ldt_timestamps) +print("A timeseries initialized to one single value : ") na_vals = np.arange(len(ldt_timestamps)) -print "Dummy initialized array : " -print na_vals -print +print("Dummy initialized array : ") +print(na_vals) +print() -ts_array = pd.TimeSeries(na_vals, index=ldt_timestamps) -print "A timeseries initialized using a numpy array : " -print ts_array -print +ts_array = pd.Series(na_vals, index=ldt_timestamps) +print("A timeseries initialized using a numpy array : ") +print(ts_array) +print() -print "Reading the timeseries for a particular date" -print "Date : ", ldt_timestamps[1] -print "Value : ", ts_array[ldt_timestamps[1]] -print +print("Reading the timeseries for a particular date") +print("Date : ", ldt_timestamps[1]) +print("Value : ", ts_array[ldt_timestamps[1]]) +print() -print "Initializing a list of symbols : " +print("Initializing a list of symbols : ") ls_symbols = ['AAPL', 'GOOG', 'MSFT', 'IBM'] -print ls_symbols -print +print(ls_symbols) +print() -print "Initializing a dataframe with one value : " +print("Initializing a dataframe with one value : ") df_single = pd.DataFrame(index=ldt_timestamps, columns=ls_symbols) df_single = df_single.fillna(0.0) -print df_single -print +print(df_single) +print() -print "Initializing a dataframe with a numpy array : " +print("Initializing a dataframe with a numpy array : ") na_vals_2 = np.random.randn(len(ldt_timestamps), len(ls_symbols)) df_vals = pd.DataFrame(na_vals_2, index=ldt_timestamps, columns=ls_symbols) -print df_vals -print +print(df_vals) +print() -print "Access the timeseries of a particular symbol : " -print df_vals[ls_symbols[1]] -print +print("Access the timeseries of a particular symbol : ") +print(df_vals[ls_symbols[1]]) +print() -print "Access the timeseries of a particular date : " -print df_vals.ix[ldt_timestamps[1]] -print +print("Access the timeseries of a particular date : ") +print(df_vals.ix[ldt_timestamps[1]]) +print() -print "Access the value for a specific symbol on a specific date: " -print df_vals[ls_symbols[1]].ix[ldt_timestamps[1]] -print +print("Access the value for a specific symbol on a specific date: ") +print(df_vals[ls_symbols[1]].ix[ldt_timestamps[1]]) +print() -print "Reindexing the dataframe" +print("Reindexing the dataframe") ldt_new_dates = [dt.datetime(2011, 1, 3, 16), dt.datetime(2011, 1, 5, 16), dt.datetime(2011, 1, 7, 16)] ls_new_symbols = ['AAPL', 'IBM', 'XOM'] df_new = df_vals.reindex(index=ldt_new_dates, columns=ls_new_symbols) -print df_new -print "Observe that reindex carried over whatever values it could find and set the rest to NAN" -print +print(df_new) +print("Observe that reindex carried over whatever values it could find and set the rest to NAN") +print() -print "For pandas rolling statistics please refer : http://pandas.pydata.org/pandas-docs/dev/computation.html#moving-rolling-statistics-moments" +print("For pandas rolling statistics please refer : http://pandas.pydata.org/pandas-docs/dev/computation.html#moving-rolling-statistics-moments") diff --git a/Examples/Basic/tutorial1.py b/Examples/Basic/tutorial1.py index 8e183d0a7..a5b1c96eb 100644 --- a/Examples/Basic/tutorial1.py +++ b/Examples/Basic/tutorial1.py @@ -21,7 +21,7 @@ import matplotlib.pyplot as plt import pandas as pd -print "Pandas Version", pd.__version__ +print("Pandas Version", pd.__version__) def main(): @@ -49,7 +49,7 @@ def main(): # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) - d_data = dict(zip(ls_keys, ldf_data)) + d_data = dict(list(zip(ls_keys, ldf_data))) # Filling the data for NAN for s_key in ls_keys: diff --git a/Examples/Basic/tutorial2.py b/Examples/Basic/tutorial2.py index d8018f19c..c38b627a2 100755 --- a/Examples/Basic/tutorial2.py +++ b/Examples/Basic/tutorial2.py @@ -33,11 +33,11 @@ def main(): ls_symbols = ['$SPX', 'XOM', 'GOOG', 'GLD'] # Printing the first 5 rows - print "First 5 rows of Price Data:" - print na_price[:5, :] - print - print "First 5 rows of Dates:" - print na_dates[:5, :] + print("First 5 rows of Price Data:") + print(na_price[:5, :]) + print() + print("First 5 rows of Dates:") + print(na_dates[:5, :]) # Creating the timestamps from dates read ldt_timestamps = [] diff --git a/Examples/Basic/tutorial3.py b/Examples/Basic/tutorial3.py index 422fce947..08d3f6427 100644 --- a/Examples/Basic/tutorial3.py +++ b/Examples/Basic/tutorial3.py @@ -1,107 +1,107 @@ -''' -(c) 2011, 2012 Georgia Tech Research Corporation -This source code is released under the New BSD license. Please see -http://wiki.quantsoftware.org/index.php?title=QSTK_License -for license details. - -Created on January, 24, 2013 - -@author: Sourabh Bajaj -@contact: sourabhbajaj@gatech.edu -@summary: Example tutorial code. -''' - -# QSTK Imports -import QSTK.qstkutil.qsdateutil as du -import QSTK.qstkutil.tsutil as tsu -import QSTK.qstkutil.DataAccess as da - -# Third Party Imports -import datetime as dt -import matplotlib.pyplot as plt -import pandas as pd -import numpy as np - - -def main(): - ''' Main Function''' - # Reading the portfolio - na_portfolio = np.loadtxt('tutorial3portfolio.csv', dtype='S5,f4', - delimiter=',', comments="#", skiprows=1) - print na_portfolio - - # Sorting the portfolio by symbol name - na_portfolio = sorted(na_portfolio, key=lambda x: x[0]) - print na_portfolio - - # Create two list for symbol names and allocation - ls_port_syms = [] - lf_port_alloc = [] - for port in na_portfolio: - ls_port_syms.append(port[0]) - lf_port_alloc.append(port[1]) - - # Creating an object of the dataaccess class with Yahoo as the source. - c_dataobj = da.DataAccess('Yahoo') - ls_all_syms = c_dataobj.get_all_symbols() - # Bad symbols are symbols present in portfolio but not in all syms - ls_bad_syms = list(set(ls_port_syms) - set(ls_all_syms)) - - if len(ls_bad_syms) != 0: - print "Portfolio contains bad symbols : ", ls_bad_syms - - for s_sym in ls_bad_syms: - i_index = ls_port_syms.index(s_sym) - ls_port_syms.pop(i_index) - lf_port_alloc.pop(i_index) - - # Reading the historical data. - dt_end = dt.datetime(2011, 1, 1) - dt_start = dt_end - dt.timedelta(days=1095) # Three years - # We need closing prices so the timestamp should be hours=16. - dt_timeofday = dt.timedelta(hours=16) - - # Get a list of trading days between the start and the end. - ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) - - # Keys to be read from the data, it is good to read everything in one go. - ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] - - # Reading the data, now d_data is a dictionary with the keys above. - # Timestamps and symbols are the ones that were specified before. - ldf_data = c_dataobj.get_data(ldt_timestamps, ls_port_syms, ls_keys) - d_data = dict(zip(ls_keys, ldf_data)) - - # Copying close price into separate dataframe to find rets - df_rets = d_data['close'].copy() - # Filling the data. - df_rets = df_rets.fillna(method='ffill') - df_rets = df_rets.fillna(method='bfill') - df_rets = df_rets.fillna(1.0) - - # Numpy matrix of filled data values - na_rets = df_rets.values - # returnize0 works on ndarray and not dataframes. - tsu.returnize0(na_rets) - - # Estimate portfolio returns - na_portrets = np.sum(na_rets * lf_port_alloc, axis=1) - na_port_total = np.cumprod(na_portrets + 1) - na_component_total = np.cumprod(na_rets + 1, axis=0) - - # Plotting the results - plt.clf() - fig = plt.figure() - fig.add_subplot(111) - plt.plot(ldt_timestamps, na_component_total, alpha=0.4) - plt.plot(ldt_timestamps, na_port_total) - ls_names = ls_port_syms - ls_names.append('Portfolio') - plt.legend(ls_names) - plt.ylabel('Cumulative Returns') - plt.xlabel('Date') - fig.autofmt_xdate(rotation=45) - plt.savefig('tutorial3.pdf', format='pdf') - -if __name__ == '__main__': - main() +''' +(c) 2011, 2012 Georgia Tech Research Corporation +This source code is released under the New BSD license. Please see +http://wiki.quantsoftware.org/index.php?title=QSTK_License +for license details. + +Created on January, 24, 2013 + +@author: Sourabh Bajaj +@contact: sourabhbajaj@gatech.edu +@summary: Example tutorial code. +''' + +# QSTK Imports +import QSTK.qstkutil.qsdateutil as du +import QSTK.qstkutil.tsutil as tsu +import QSTK.qstkutil.DataAccess as da + +# Third Party Imports +import datetime as dt +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np + + +def main(): + ''' Main Function''' + # Reading the portfolio + na_portfolio = np.loadtxt('tutorial3portfolio.csv', dtype='S5,f4', + delimiter=',', comments="#", skiprows=1) + print(na_portfolio) + + # Sorting the portfolio by symbol name + na_portfolio = sorted(na_portfolio, key=lambda x: x[0]) + print(na_portfolio) + + # Create two list for symbol names and allocation + ls_port_syms = [] + lf_port_alloc = [] + for port in na_portfolio: + ls_port_syms.append(port[0]) + lf_port_alloc.append(port[1]) + + # Creating an object of the dataaccess class with Yahoo as the source. + c_dataobj = da.DataAccess('Yahoo') + ls_all_syms = c_dataobj.get_all_symbols() + # Bad symbols are symbols present in portfolio but not in all syms + ls_bad_syms = list(set(ls_port_syms) - set(ls_all_syms)) + + if len(ls_bad_syms) != 0: + print("Portfolio contains bad symbols : ", ls_bad_syms) + + for s_sym in ls_bad_syms: + i_index = ls_port_syms.index(s_sym) + ls_port_syms.pop(i_index) + lf_port_alloc.pop(i_index) + + # Reading the historical data. + dt_end = dt.datetime(2011, 1, 1) + dt_start = dt_end - dt.timedelta(days=1095) # Three years + # We need closing prices so the timestamp should be hours=16. + dt_timeofday = dt.timedelta(hours=16) + + # Get a list of trading days between the start and the end. + ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) + + # Keys to be read from the data, it is good to read everything in one go. + ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] + + # Reading the data, now d_data is a dictionary with the keys above. + # Timestamps and symbols are the ones that were specified before. + ldf_data = c_dataobj.get_data(ldt_timestamps, ls_port_syms, ls_keys) + d_data = dict(list(zip(ls_keys, ldf_data))) + + # Copying close price into separate dataframe to find rets + df_rets = d_data['close'].copy() + # Filling the data. + df_rets = df_rets.fillna(method='ffill') + df_rets = df_rets.fillna(method='bfill') + df_rets = df_rets.fillna(1.0) + + # Numpy matrix of filled data values + na_rets = df_rets.values + # returnize0 works on ndarray and not dataframes. + tsu.returnize0(na_rets) + + # Estimate portfolio returns + na_portrets = np.sum(na_rets * lf_port_alloc, axis=1) + na_port_total = np.cumprod(na_portrets + 1) + na_component_total = np.cumprod(na_rets + 1, axis=0) + + # Plotting the results + plt.clf() + fig = plt.figure() + fig.add_subplot(111) + plt.plot(ldt_timestamps, na_component_total, alpha=0.4) + plt.plot(ldt_timestamps, na_port_total) + ls_names = ls_port_syms + ls_names.append('Portfolio') + plt.legend(ls_names) + plt.ylabel('Cumulative Returns') + plt.xlabel('Date') + fig.autofmt_xdate(rotation=45) + plt.savefig('tutorial3.pdf', format='pdf') + +if __name__ == '__main__': + main() diff --git a/Examples/Basic/tutorial4.py b/Examples/Basic/tutorial4.py index c9b165d34..b4ecff48f 100644 --- a/Examples/Basic/tutorial4.py +++ b/Examples/Basic/tutorial4.py @@ -1,78 +1,78 @@ -''' -(c) 2011, 2012 Georgia Tech Research Corporation -This source code is released under the New BSD license. Please see -http://wiki.quantsoftware.org/index.php?title=QSTK_License -for license details. - -Created on January, 24, 2013 - -@author: Sourabh Bajaj -@contact: sourabhbajaj@gatech.edu -@summary: Example tutorial code. -''' - -# QSTK Imports -import QSTK.qstkutil.qsdateutil as du -import QSTK.qstkutil.tsutil as tsu -import QSTK.qstkutil.DataAccess as da - -# Third Party Imports -import datetime as dt -import matplotlib.pyplot as plt -import pandas as pd -import numpy as np -import cPickle - - -def main(): - ''' Main Function''' - - # Start and End date of the charts - dt_start = dt.datetime(2004, 1, 1) - dt_end = dt.datetime(2009, 12, 31) - - # We need closing prices so the timestamp should be hours=16. - dt_timeofday = dt.timedelta(hours=16) - - # Get a list of trading days between the start and the end. - ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) - - # Creating an object of the dataaccess class with Yahoo as the source. - c_dataobj = da.DataAccess('Yahoo') - - # List of symbols - First 20 - ls_symbols = c_dataobj.get_symbols_from_list('sp5002012') - ls_symbols = ls_symbols[:20] - ls_symbols.append('_CASH') - - # Creating the first allocation row - na_vals = np.random.randint(0, 1000, len(ls_symbols)) - # Normalize the row - Typecasting as everything is int. - na_vals = na_vals / float(sum(na_vals)) - # Reshape to a 2D matrix to append into dataframe. - na_vals = na_vals.reshape(1, -1) - - # Creating Allocation DataFrames - df_alloc = pd.DataFrame(na_vals, index=[ldt_timestamps[0]], - columns=ls_symbols) - - dt_last_date = ldt_timestamps[0] - # Looping through all dates and creating monthly allocations - for dt_date in ldt_timestamps[1:]: - if dt_last_date.month != dt_date.month: - # Create allocation - na_vals = np.random.randint(0, 1000, len(ls_symbols)) - na_vals = na_vals / float(sum(na_vals)) - na_vals = na_vals.reshape(1, -1) - # Append to the dataframe - df_new_row = pd.DataFrame(na_vals, index=[dt_date], - columns=ls_symbols) - df_alloc = df_alloc.append(df_new_row) - dt_last_date = dt_date - - # Create the outpul pickle file for the dataframe. - output = open('allocation.pkl', 'wb') - cPickle.dump(df_alloc, output) - -if __name__ == '__main__': - main() +''' +(c) 2011, 2012 Georgia Tech Research Corporation +This source code is released under the New BSD license. Please see +http://wiki.quantsoftware.org/index.php?title=QSTK_License +for license details. + +Created on January, 24, 2013 + +@author: Sourabh Bajaj +@contact: sourabhbajaj@gatech.edu +@summary: Example tutorial code. +''' + +# QSTK Imports +import QSTK.qstkutil.qsdateutil as du +import QSTK.qstkutil.tsutil as tsu +import QSTK.qstkutil.DataAccess as da + +# Third Party Imports +import datetime as dt +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np +import pickle + + +def main(): + ''' Main Function''' + + # Start and End date of the charts + dt_start = dt.datetime(2004, 1, 1) + dt_end = dt.datetime(2009, 12, 31) + + # We need closing prices so the timestamp should be hours=16. + dt_timeofday = dt.timedelta(hours=16) + + # Get a list of trading days between the start and the end. + ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) + + # Creating an object of the dataaccess class with Yahoo as the source. + c_dataobj = da.DataAccess('Yahoo') + + # List of symbols - First 20 + ls_symbols = c_dataobj.get_symbols_from_list('sp5002012') + ls_symbols = ls_symbols[:20] + ls_symbols.append('_CASH') + + # Creating the first allocation row + na_vals = np.random.randint(0, 1000, len(ls_symbols)) + # Normalize the row - Typecasting as everything is int. + na_vals = na_vals / float(sum(na_vals)) + # Reshape to a 2D matrix to append into dataframe. + na_vals = na_vals.reshape(1, -1) + + # Creating Allocation DataFrames + df_alloc = pd.DataFrame(na_vals, index=[ldt_timestamps[0]], + columns=ls_symbols) + + dt_last_date = ldt_timestamps[0] + # Looping through all dates and creating monthly allocations + for dt_date in ldt_timestamps[1:]: + if dt_last_date.month != dt_date.month: + # Create allocation + na_vals = np.random.randint(0, 1000, len(ls_symbols)) + na_vals = na_vals / float(sum(na_vals)) + na_vals = na_vals.reshape(1, -1) + # Append to the dataframe + df_new_row = pd.DataFrame(na_vals, index=[dt_date], + columns=ls_symbols) + df_alloc = df_alloc.append(df_new_row) + dt_last_date = dt_date + + # Create the outpul pickle file for the dataframe. + output = open('allocation.pkl', 'wb') + pickle.dump(df_alloc, output) + +if __name__ == '__main__': + main() diff --git a/Examples/Basic/tutorial5.py b/Examples/Basic/tutorial5.py index 50bec0827..26554c5c9 100644 --- a/Examples/Basic/tutorial5.py +++ b/Examples/Basic/tutorial5.py @@ -80,12 +80,12 @@ def main(): f_slippage=0.0005, f_minimumcommision=5.0, f_commision_share=0.0035, i_target_leverage=1, f_rate_borrow=3.5, log="transaction.csv") - print "Simulated Fund Time Series : " - print ts_funds - print "Transaction Costs : " - print "Commissions : ", f_commission - print "Slippage : ", f_slippage - print "Borrowing Cost : ", f_borrow_cost + print("Simulated Fund Time Series : ") + print(ts_funds) + print("Transaction Costs : ") + print("Commissions : ", f_commission) + print("Slippage : ", f_slippage) + print("Borrowing Cost : ", f_borrow_cost) if __name__ == '__main__': main() diff --git a/Examples/DataAccess/setexample.py b/Examples/DataAccess/setexample.py index 26ded7c97..c9859b0a7 100644 --- a/Examples/DataAccess/setexample.py +++ b/Examples/DataAccess/setexample.py @@ -1,65 +1,65 @@ -''' -(c) 2011, 2012 Georgia Tech Research Corporation -This source code is released under the New BSD license. Please see -http://wiki.quantsoftware.org/index.php?title=QSTK_License -for license details. - -Created on January, 24, 2013 - -@author: Sourabh Bajaj -@contact: sourabhbajaj@gatech.edu -@summary: An example to show how dataAccess works. -''' - -# QSTK Imports -import QSTK.qstkutil.qsdateutil as du -import QSTK.qstkutil.tsutil as tsu -import QSTK.qstkutil.DataAccess as da - -# Third Party Imports -import datetime as dt -import matplotlib.pyplot as plt -import pandas as pd - - -def main(): - ''' Main Function''' - # Creating an object of DataAccess Class - c_dataobj = da.DataAccess('Yahoo') - - # Getting a list of symbols from Lists - # Lists : S&P5002012, S&P5002008, Index - ls_symbols = c_dataobj.get_symbols_from_list('sp5002012') - print "Symbols from the list : ", ls_symbols - - # All symbols possible - ls_all_syms = c_dataobj.get_all_symbols() - print "All symbols : ", ls_all_syms - - ls_syms_toread = ['AAPL', 'GOOG'] - - # List of TimeStamps to read - ldt_timestamps = [] - ldt_timestamps.append(dt.datetime(2010, 10, 14, 16)) - ldt_timestamps.append(dt.datetime(2010, 10, 15, 16)) - ldt_timestamps.append(dt.datetime(2010, 11, 21, 16)) - ldt_timestamps.append(dt.datetime(2010, 11, 22, 16)) - ldt_timestamps.append(dt.datetime(2010, 11, 23, 16)) - ldt_timestamps.append(dt.datetime(2010, 11, 24, 16)) - ldt_timestamps.append(dt.datetime(2010, 11, 25, 16)) - ldt_timestamps.append(dt.datetime(2010, 11, 26, 16)) - ldt_timestamps.append(dt.datetime(2010, 11, 27, 10)) - ldt_timestamps.append(dt.datetime(2010, 11, 27, 16)) - ldt_timestamps.append(dt.datetime(2020, 11, 27, 16)) - ldt_timestamps.append(dt.datetime(2020, 11, 27, 18)) - - # Reading the data - # By default it'll read data from the default data provided, - # But a path can be provided using either an environment variable or - # as a prarameter. - df_close = c_dataobj.get_data(ldt_timestamps, ls_syms_toread, "close") - print df_close - - -if __name__ == '__main__': - main() +''' +(c) 2011, 2012 Georgia Tech Research Corporation +This source code is released under the New BSD license. Please see +http://wiki.quantsoftware.org/index.php?title=QSTK_License +for license details. + +Created on January, 24, 2013 + +@author: Sourabh Bajaj +@contact: sourabhbajaj@gatech.edu +@summary: An example to show how dataAccess works. +''' + +# QSTK Imports +import QSTK.qstkutil.qsdateutil as du +import QSTK.qstkutil.tsutil as tsu +import QSTK.qstkutil.DataAccess as da + +# Third Party Imports +import datetime as dt +import matplotlib.pyplot as plt +import pandas as pd + + +def main(): + ''' Main Function''' + # Creating an object of DataAccess Class + c_dataobj = da.DataAccess('Yahoo') + + # Getting a list of symbols from Lists + # Lists : S&P5002012, S&P5002008, Index + ls_symbols = c_dataobj.get_symbols_from_list('sp5002012') + print("Symbols from the list : ", ls_symbols) + + # All symbols possible + ls_all_syms = c_dataobj.get_all_symbols() + print("All symbols : ", ls_all_syms) + + ls_syms_toread = ['AAPL', 'GOOG'] + + # List of TimeStamps to read + ldt_timestamps = [] + ldt_timestamps.append(dt.datetime(2010, 10, 14, 16)) + ldt_timestamps.append(dt.datetime(2010, 10, 15, 16)) + ldt_timestamps.append(dt.datetime(2010, 11, 21, 16)) + ldt_timestamps.append(dt.datetime(2010, 11, 22, 16)) + ldt_timestamps.append(dt.datetime(2010, 11, 23, 16)) + ldt_timestamps.append(dt.datetime(2010, 11, 24, 16)) + ldt_timestamps.append(dt.datetime(2010, 11, 25, 16)) + ldt_timestamps.append(dt.datetime(2010, 11, 26, 16)) + ldt_timestamps.append(dt.datetime(2010, 11, 27, 10)) + ldt_timestamps.append(dt.datetime(2010, 11, 27, 16)) + ldt_timestamps.append(dt.datetime(2020, 11, 27, 16)) + ldt_timestamps.append(dt.datetime(2020, 11, 27, 18)) + + # Reading the data + # By default it'll read data from the default data provided, + # But a path can be provided using either an environment variable or + # as a prarameter. + df_close = c_dataobj.get_data(ldt_timestamps, ls_syms_toread, "close") + print(df_close) + + +if __name__ == '__main__': + main() diff --git a/Examples/EventProfiler/tutorial.py b/Examples/EventProfiler/tutorial.py index 4f639e558..1c3959f5c 100644 --- a/Examples/EventProfiler/tutorial.py +++ b/Examples/EventProfiler/tutorial.py @@ -44,7 +44,7 @@ def find_events(ls_symbols, d_data): df_close = d_data['close'] ts_market = df_close['SPY'] - print "Finding Events" + print("Finding Events") # Creating an empty dataframe df_events = copy.deepcopy(df_close) @@ -82,7 +82,7 @@ def find_events(ls_symbols, d_data): ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) - d_data = dict(zip(ls_keys, ldf_data)) + d_data = dict(list(zip(ls_keys, ldf_data))) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') @@ -90,7 +90,7 @@ def find_events(ls_symbols, d_data): d_data[s_key] = d_data[s_key].fillna(1.0) df_events = find_events(ls_symbols, d_data) - print "Creating Study" + print("Creating Study") ep.eventprofiler(df_events, d_data, i_lookback=20, i_lookforward=20, s_filename='MyEventStudy.pdf', b_market_neutral=True, b_errorbars=True, s_market_sym='SPY') diff --git a/Examples/FeatureSearch/code.py b/Examples/FeatureSearch/code.py index 0153f179d..b4cbdb3b7 100644 --- a/Examples/FeatureSearch/code.py +++ b/Examples/FeatureSearch/code.py @@ -37,7 +37,7 @@ lsSym = list(set(lsSymTrain).union(set(lsSymTest))) - dtStart = dt.datetime(2008,01,01) + dtStart = dt.datetime(2008,0o1,0o1) dtEnd = dt.datetime(2010,12,31) norObj = da.DataAccess('Norgate') @@ -59,8 +59,8 @@ for temp in ldfDataTest: temp.fillna(method="ffill").fillna(method="bfill") - dDataTrain = dict(zip(lsKeys, ldfDataTrain)) - dDataTest = dict(zip(lsKeys, ldfDataTest)) + dDataTrain = dict(list(zip(lsKeys, ldfDataTrain))) + dDataTest = dict(list(zip(lsKeys, ldfDataTest))) ''' Imported functions from qstkfeat.features, NOTE: last function is classification ''' lfcFeatures = [ featMA, featMA, featMA, featMA, featMA, featMA, \ @@ -98,9 +98,9 @@ ldfFeaturesTest = ftu.applyFeatures( dDataTest, lfcFeatures, ldArgs, '$SPX') ''' Pick Test and Training Points ''' - dtStartTrain = dt.datetime(2008,01,01) + dtStartTrain = dt.datetime(2008,0o1,0o1) dtEndTrain = dt.datetime(2009,12,31) - dtStartTest = dt.datetime(2010,01,01) + dtStartTest = dt.datetime(2010,0o1,0o1) dtEndTest = dt.datetime(2010,12,31) ''' Stack all information into one Numpy array ''' @@ -113,7 +113,7 @@ ftu.normQuery( naFeatTest[:,:-1], ltWeights ) - lFeatures = range(0,len(lfcFeatures)-1) + lFeatures = list(range(0,len(lfcFeatures)-1)) classLabelIndex = len(lfcFeatures) - 1 funccall = sys.argv[1] + '(naFeatTrain,naFeatTest,lFeatures,classLabelIndex)' diff --git a/Examples/Features/featuretest.py b/Examples/Features/featuretest.py index de20d4446..3f565b4df 100644 --- a/Examples/Features/featuretest.py +++ b/Examples/Features/featuretest.py @@ -33,7 +33,7 @@ def learnerTest( naTrain, naTest ): @summary: Takes testing and training data and computes average error over the test set This is compared to a baseline guess which is just the average of the training set ''' - llRange = range(5,51,5) + llRange = list(range(5,51,5)) lfRes = [] for lK in llRange: @@ -70,7 +70,7 @@ def learnerTest( naTrain, naTest ): #lsSym = ['XOM'] ''' Get data for 2009-2010 ''' - dtStart = dt.datetime(2010,8,01) + dtStart = dt.datetime(2010,8,0o1) dtEnd = dt.datetime(2010,12,31) norObj = da.DataAccess('Yahoo') @@ -78,7 +78,7 @@ def learnerTest( naTrain, naTest ): lsKeys = ['open', 'high', 'low', 'close', 'volume'] ldfData = norObj.get_data( ldtTimestamps, lsSym, lsKeys ) - dData = dict(zip(lsKeys, ldfData)) + dData = dict(list(zip(lsKeys, ldfData))) ''' Imported functions from qstkfeat.features, NOTE: last function is classification ''' lfcFeatures = [ featMA, featRSI, class_fut_ret ] diff --git a/Examples/Validation.py b/Examples/Validation.py index 7b6c80ed2..11008640c 100644 --- a/Examples/Validation.py +++ b/Examples/Validation.py @@ -14,80 +14,80 @@ # Printing what Python Version is installed : QSTK uses 2.7 import sys import platform -print "Python Details : " -print sys.version -print "Your Python Version is : ", platform.python_version() -print "QSTK uses Python 2.7.X (2.7.3 recommended and supported)" -print "Please make sure you're using the correct python version." -print +print("Python Details : ") +print(sys.version) +print("Your Python Version is : ", platform.python_version()) +print("QSTK uses Python 2.7.X (2.7.3 recommended and supported)") +print("Please make sure you're using the correct python version.") +print() # Printing the directory you are in import os -print "Current Directory : ", os.path.abspath('.') -print +print("Current Directory : ", os.path.abspath('.')) +print() # Printing files in the current directory. -print "Files in the current directory" +print("Files in the current directory") ls_files = os.listdir('.') for s_file in ls_files: - print s_file -print + print(s_file) +print() # Testing the dependencies # Testing numpy try: import numpy - print "Numpy is installed and the version used is : ", numpy.__version__ - print "Please make sure you're using version >= 1.6.1" + print("Numpy is installed and the version used is : ", numpy.__version__) + print("Please make sure you're using version >= 1.6.1") except ImportError: sys.exit("Error : Numpy can not be imported or not installed.") -print +print() # Testing matplotlib try: import matplotlib - print "Matplotlib is installed and version is : ", matplotlib.__version__ - print "Please make sure you're using version >= 1.1.0" + print("Matplotlib is installed and version is : ", matplotlib.__version__) + print("Please make sure you're using version >= 1.1.0") except ImportError: sys.exit("Error : Matplotlib can not be imported or not installed.") -print +print() # Testing Pandas try: import pandas - print "Pandas is installed and the version used is : ", pandas.__version__ - print "Please make sure you're using version >= 0.7.3" + print("Pandas is installed and the version used is : ", pandas.__version__) + print("Please make sure you're using version >= 0.7.3") except ImportError: sys.exit("Error : Pandas can not be imported or not installed.") -print +print() # Testing Scipy try: import scipy - print "Scipy is installed and the version used is : ", scipy.__version__ - print "Please make sure you're using version >= 0.9.0" + print("Scipy is installed and the version used is : ", scipy.__version__) + print("Please make sure you're using version >= 0.9.0") except ImportError: sys.exit("Error : Scipy can not be imported or not installed.") -print +print() # Testing Dateutil try: import dateutil - print "Dateutil is installed and the version used is : ", dateutil.__version__ - print "Please make sure you're using version == 1.5" + print("Dateutil is installed and the version used is : ", dateutil.__version__) + print("Please make sure you're using version == 1.5") except ImportError: sys.exit("Error : Dateutil can not be imported or not installed.") -print +print() # Testing Setuptools try: import setuptools - print "Setuptools is installed and the version used is : ", setuptools.__version__ - print "Please make sure you're using version >= 0.6" + print("Setuptools is installed and the version used is : ", setuptools.__version__) + print("Please make sure you're using version >= 0.6") except ImportError: sys.exit("Error : Setuptools can not be imported or not installed.") -print +print() # # Testing CVXOPT # try: @@ -100,40 +100,40 @@ # Testing datetime try: import datetime as dt - print "datetime is installed and can be imported" + print("datetime is installed and can be imported") except ImportError: sys.exit("Error : datetime can not be imported or not installed.") -print +print() # All dependencies are installed and working -print "All dependencies are installed and working\n" +print("All dependencies are installed and working\n") # Testing import of QSTK # Testing QSTK try: import QSTK - print "QSTK is installed and can be imported" + print("QSTK is installed and can be imported") except ImportError: sys.exit("Error : QSTK can not be imported or not installed.") -print +print() # Testing QSTK.qstkutil try: import QSTK.qstkutil.tsutil as tsu import QSTK.qstkutil.qsdateutil as du import QSTK.qstkutil.DataAccess as da - print "QSTK.qstkutil is installed and can be imported" + print("QSTK.qstkutil is installed and can be imported") except ImportError: exit("Error : QSTK.qstkutil can not be imported.") -print +print() # Testing QSTK.qstkstudy try: import QSTK.qstkstudy.EventProfiler - print "QSTK.qstkstudy is installed and can be imported" + print("QSTK.qstkstudy is installed and can be imported") except ImportError: exit("Error : QSTK.qstkstudy can not be imported.") -print +print() # Checking that the data installed is correct. # Start and End date of the charts @@ -149,29 +149,29 @@ c_dataobj = da.DataAccess('Yahoo', verbose=True) # Reading adjusted_close prices df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close") -print df_close -print -print "\nCorrect Output using the Default Data should be : " -print "Assignments use this data for grading" -print " MSFT GOOG" -print "2012-02-10 16:00:00 29.90 605.91" -print "2012-02-13 16:00:00 29.98 612.20" -print "2012-02-14 16:00:00 29.86 609.76" -print "2012-02-15 16:00:00 29.66 605.56" -print "2012-02-16 16:00:00 30.88 606.52" -print "2012-02-17 16:00:00 30.84 604.64" -print "2012-02-21 16:00:00 31.03 614.00" -print "2012-02-22 16:00:00 30.86 607.94" -print "2012-02-23 16:00:00 30.96 606.11" -print +print(df_close) +print() +print("\nCorrect Output using the Default Data should be : ") +print("Assignments use this data for grading") +print(" MSFT GOOG") +print("2012-02-10 16:00:00 29.90 605.91") +print("2012-02-13 16:00:00 29.98 612.20") +print("2012-02-14 16:00:00 29.86 609.76") +print("2012-02-15 16:00:00 29.66 605.56") +print("2012-02-16 16:00:00 30.88 606.52") +print("2012-02-17 16:00:00 30.84 604.64") +print("2012-02-21 16:00:00 31.03 614.00") +print("2012-02-22 16:00:00 30.86 607.94") +print("2012-02-23 16:00:00 30.96 606.11") +print() dt_test = dt.datetime(2012, 2, 15, 16) -print "Close price of MSFT on 2012/2/15 is : ", df_close['MSFT'].ix[dt_test] +print("Close price of MSFT on 2012/2/15 is : ", df_close['MSFT'].ix[dt_test]) if df_close['MSFT'].ix[dt_test] == 29.66: - print "Data looks correct as the close price in default data is 29.66" + print("Data looks correct as the close price in default data is 29.66") else: - print "Default data used in the assisgnments has close price as 29.66" + print("Default data used in the assisgnments has close price as 29.66") sys.exit("Error : Data has changed so does not match data used in Assignments") -print +print() -print "Everything works fine: You're all set." +print("Everything works fine: You're all set.") diff --git a/Legacy/Legacy/EventProfiler/EventProfiler.py b/Legacy/Legacy/EventProfiler/EventProfiler.py index d8288d317..d52fa9ce9 100644 --- a/Legacy/Legacy/EventProfiler/EventProfiler.py +++ b/Legacy/Legacy/EventProfiler/EventProfiler.py @@ -59,7 +59,7 @@ def __init__(self,eventMatrix,startday,endday,\ self.timestamps = du.getNYSEdays(startday,endday,self.timeofday) self.verbose = verbose if verbose: - print __name__ + " reading historical data" + print(__name__ + " reading historical data") self.close = self.dataobj.get_data(self.timestamps,\ self.symbols, "close", verbose=self.verbose) self.close = (self.close.fillna()).fillna(method='backfill') @@ -116,7 +116,7 @@ def study(self,filename,method="mean", \ # step through each column in event matrix for col in range(0,events.shape[1]): if (self.verbose and col%20==0): - print __name__ + " study: " + str(col) + " of " + str(numcols) + print(__name__ + " study: " + str(col) + " of " + str(numcols)) # search each column for events for row in range(0,events.shape[0]): # when we find an event @@ -137,7 +137,7 @@ def study(self,filename,method="mean", \ # prepare data for plot studystat = mean(impact,axis=1) studystd = std(impact,axis=1) - studyrange = range(-self.lookback_days,self.lookforward_days+1) + studyrange = list(range(-self.lookback_days,self.lookforward_days+1)) # plot baby plt.clf() diff --git a/Legacy/Legacy/OldDataAccess.py b/Legacy/Legacy/OldDataAccess.py index d7f31d413..9725bd788 100644 --- a/Legacy/Legacy/OldDataAccess.py +++ b/Legacy/Legacy/OldDataAccess.py @@ -1,269 +1,269 @@ -''' -Created on Jun 2, 2010 - -@author: Shreyas Joshi -@contact: shreyasj@gatech.edu -''' - -import tables as pt -#import sys - - -class DataAccess: - ''' - @attention: Assumption is that the data has symbols and timestamps. Also assumes that data is read in from low timestamp to high timestamp. - data is not sorted after reading in. - @warning: No checks are perform to prevent this function from returning future data. You will get what you ask for! (assuming its there!) - @summary: The purpose of this class is to be a general way to access any data about stocks- that is in a 2-D array with one dimension - being stock symbols and the other being time. Each element of data can be an encapsulation of things like opening price, closing - price, adj_close etc... or it can just be a single value. - ''' - - def __init__(self, fileIterator, noisy, dataItemsList=None, SYMBOL='symbol', TIMESTAMP='timestamp'): - ''' - @param fileIterator: [filename].[root].[group name].[table name] as needed to read in and hdf5 file - @param noisy: is it noisy? - @param dataItemList: should be a list of all the data items that need to be read in from the file. If None then all will data will be read - @param SYMBOL: just in case that the name of the "symbol" data item is not exactly "symbol" this can be changed. - ''' - - self.allDataList=[] - self.dataItemsList=[] -# self.SYMBOL=[] -# self.TIMESTAMP=[] - self.SYMBOL= SYMBOL - self.TIMESTAMP= TIMESTAMP - self.noisy=noisy - - - #Making sure dataItemsList has symbols and timestamps - if (dataItemsList is not None): - try: - dataItemsList.index("symbol") - except ValueError: - print "adding SYMBOL" - dataItemsList.append(self.SYMBOL) - - try: - dataItemsList.index("timestamp") - except ValueError: - print "adding TIMESTAMP" - dataItemsList.append(self.TIMESTAMP) - - else: - #adding all known items to the list- change this list to change default behaviour (ie when dataItemsList is none) - dataItemsList= list() - dataItemsList.append('symbol') - dataItemsList.append('timestamp') - dataItemsList.append('exchange') - dataItemsList.append('adj_open') - dataItemsList.append('adj_close') - dataItemsList.append('adj_high') - dataItemsList.append('adj_low') - dataItemsList.append('close') - dataItemsList.append('volume') - - self.dataItemsList= dataItemsList - for row in fileIterator.iterrows(): -# print "SYM: "+str(row['symbol'])+", EX: "+ str(row['exchange'])+", ALPHA: "+str(row['alphaValue'])+", TIMESTAMP: "+str(row['timestamp']) - self.allDataList.append(self.cloneRow(row, dataItemsList)) -# print self.allDataList[len(self.allDataList)-1] - self.allDataList.sort(cmp=None, key=None, reverse=False) - # constructor ends - - - def getData (self, stockList=None, dataList=None, beginTS=None, endTS=None): - ''' - @param stockList: If data about only 1 stock is needed then this param can be a string- else a list of strings of names of all the stocks that you want data about. If not specified data about all stocks will be returned - @param dataList: If only one dataItem is needed (say adj_open only) then this param can be a string- else a list of strings of the names of all the data items you want. If not specified all data items will be returned. - @param beginTS: If specified- only rows with timestamp greater than or equal to this will be returned - @param endTS: If specified- only rows with timestamp smaller than or equal to this will be returned - - @warning: function does not check if beginTS < endTS- but violating this will result in None being returned. - @summary: this function just traverses over the data. It assumes that all the data fits into memory. - The real reading from disk is done in the constructor - To get data for one timestamp only- set beginTS=endTS= the timestamp you want. - @return: returns the requested data as a list. NOTE: the returned list will always have symbol and timestamp- - even if they weren't explicitly asked for in the dataItemsList. If no data is found then an empty list is returned. - ''' - - #stockList (despite its name) can be a string or a list - if (stockList is not None): - if (type(stockList) is not list): - if (type (stockList) is not str): - print "Stocks must either be a string (if you want only 1 stock) or a list of strings" - raise TypeError - else: - #its not a list but its a string - tempStr= str(stockList) - stockList= list() - stockList.append(tempStr) -# print "changed stockList from str to list!" -# print "Printing first in the list: " + stockList[0] - - - #dataList (despite its name) can be a string or a list - if (dataList is not None): - if (type(dataList) is not list): - if (type (dataList) is not str): - print "data items you want must either be a string (of you want only one data item) or a list of strings" - raise TypeError - else: - #its not a list but its a string - tempStr= str(dataList) - dataList= list() - dataList.append(tempStr) -# print "changed dataList from str to list!" -# print "Printing first in the list: " + dataList[0] - else: - #dataList is None - dataList= self.dataItemsList - - #Making sure dataList has symbols and timestamps - try: - dataList.index("symbol") - except ValueError: -# print "adding SYMBOL" - dataList.append(self.SYMBOL) - - try: - dataList.index("timestamp") - except ValueError: -# print "adding TIMESTAMP" - dataList.append(self.TIMESTAMP) - - #Now, filter out the data from allDataList, put it in another list (inefficient?) and return the other list - tempFilteredList=[] - for item in self.allDataList: - if (beginTS is not None): - # which means we need to reject all rows with timestamp < beginTS - if item[self.TIMESTAMP]< beginTS: # so = will be included -# print "rejecting because of beginning TS" - continue #skipping this item - - if (endTS is not None): - # which means we need to reject all rows with timestamp > endTS - if item[self.TIMESTAMP]> endTS: # so = will be included -# print "rejecting because of ending TS" - continue #skipping this item - - if (stockList is not None): - # We need to return this item only if its name is present in the stockList - nameFound= False - for item2 in stockList: - if (item2== item [self.SYMBOL]): - nameFound= True - #searching done - if (nameFound== False): -# print "rejecting because of stock name not found" - continue #skipping this item - - # if we got till here then the row must be returned. Hence adding to list - if (dataList is None): - tempFilteredList.append(self.cloneRow(item, self.dataItemsList)) - else: - tempFilteredList.append(self.cloneRow(item, dataList)) - # for item in self.allDataList done - - if (len (tempFilteredList)==0): - if self.noisy is True: - print "Warning: no data found" -# sys.stdout.flush() - - - return tempFilteredList - - # getData ends - - def getDataList(self, stockName, dataItemName, beginTS=None, endTS=None): - ''' - @param stockName: The name of the stock whose data you need. This has to be a string. One stock only - @param dataItemName:The data item that you need like open, close, volume etc. This has to be a string. Only one can be specified. - @param beginTS: Optional parameter. If specified only data for timestamp >= beginTS will be considered. - @param endTS: Optional paramter. If specified only data for timestamp <= endTS will be considered. - - @warning: function does not check if beginTS < endTS- but violating this will result in None being returned. - @summary: Use this function to get a list of values of some dataItem of a particular stock. Unlike the getData function this function - does not return a list of dictionaries with the stock symbol and timestamp. - To get data for one timestamp only- set beginTS=endTS= the timestamp you want. - @return: A list of dataItemName values for stockName between beginTS and endTS- if specified - or values for all timestamps if not - specified. If no data is found then an empty list is returned. - ''' - - if (type(stockName) is not str): - print "stock name must be a string" - raise TypeError - - if (type(dataItemName) is not str): - print "data item must be a string" - raise TypeError - - tempList=[] - - for item in self.allDataList: - if beginTS is not None: - if item[self.TIMESTAMP]< beginTS: - continue #skipping this item - - if endTS is not None: - if item[self.TIMESTAMP] > endTS: - continue #skipping this item - - if item[self.SYMBOL] == stockName: - tempList.append(item[dataItemName]) - #for loop ends - - if (len (tempList)==0): - if self.noisy is True: - print "Warning: no data found" -# sys.stdout.flush() - - return tempList - #getDataList ends - - - def getDataItem (self, stockName, dataItemName, timestamp): - - ''' - @param stockName: The name of the stock whose data you need. This has to be a string. One stock only - @param dataItemName:The data item that you need like open, close, volume etc. This has to be a string. Only one can be specified. - @param timestamp: Required parameter. Only data for this timestamp will be considered. - - @summary: Use this function to get one value of some dataItem of a particular stock. Unlike the getData function, this function - does not return a list of dictionaries with the stock symbol and timestamp. Unlike the getDataList function, this - function does not return an array of values. - @return: The value of dataItemName value for stockName at the specified timestamp - ''' - - if (type(stockName) is not str): - print "stock name must be a string" - raise TypeError - - if (type(dataItemName) is not str): - print "data item must be a string" - raise TypeError - -# tempStr=str("") - - for item in self.allDataList: - if item[self.SYMBOL]== stockName: - if item[self.TIMESTAMP]== timestamp: - return item[dataItemName] - - if self.noisy is True: - print "Warning: no data found" -# sys.stdout.flush() - return None - #getDataitem ends - - - def cloneRow(self, row, itemsList): - - dct={} - for dataItem in itemsList: - try: - dct[str(dataItem)]= row[str(dataItem)] - except KeyError: - print "Error: "+str(dataItem)+" not available" - raise KeyError +''' +Created on Jun 2, 2010 + +@author: Shreyas Joshi +@contact: shreyasj@gatech.edu +''' + +import tables as pt +#import sys + + +class DataAccess: + ''' + @attention: Assumption is that the data has symbols and timestamps. Also assumes that data is read in from low timestamp to high timestamp. + data is not sorted after reading in. + @warning: No checks are perform to prevent this function from returning future data. You will get what you ask for! (assuming its there!) + @summary: The purpose of this class is to be a general way to access any data about stocks- that is in a 2-D array with one dimension + being stock symbols and the other being time. Each element of data can be an encapsulation of things like opening price, closing + price, adj_close etc... or it can just be a single value. + ''' + + def __init__(self, fileIterator, noisy, dataItemsList=None, SYMBOL='symbol', TIMESTAMP='timestamp'): + ''' + @param fileIterator: [filename].[root].[group name].[table name] as needed to read in and hdf5 file + @param noisy: is it noisy? + @param dataItemList: should be a list of all the data items that need to be read in from the file. If None then all will data will be read + @param SYMBOL: just in case that the name of the "symbol" data item is not exactly "symbol" this can be changed. + ''' + + self.allDataList=[] + self.dataItemsList=[] +# self.SYMBOL=[] +# self.TIMESTAMP=[] + self.SYMBOL= SYMBOL + self.TIMESTAMP= TIMESTAMP + self.noisy=noisy + + + #Making sure dataItemsList has symbols and timestamps + if (dataItemsList is not None): + try: + dataItemsList.index("symbol") + except ValueError: + print("adding SYMBOL") + dataItemsList.append(self.SYMBOL) + + try: + dataItemsList.index("timestamp") + except ValueError: + print("adding TIMESTAMP") + dataItemsList.append(self.TIMESTAMP) + + else: + #adding all known items to the list- change this list to change default behaviour (ie when dataItemsList is none) + dataItemsList= list() + dataItemsList.append('symbol') + dataItemsList.append('timestamp') + dataItemsList.append('exchange') + dataItemsList.append('adj_open') + dataItemsList.append('adj_close') + dataItemsList.append('adj_high') + dataItemsList.append('adj_low') + dataItemsList.append('close') + dataItemsList.append('volume') + + self.dataItemsList= dataItemsList + for row in fileIterator.iterrows(): +# print "SYM: "+str(row['symbol'])+", EX: "+ str(row['exchange'])+", ALPHA: "+str(row['alphaValue'])+", TIMESTAMP: "+str(row['timestamp']) + self.allDataList.append(self.cloneRow(row, dataItemsList)) +# print self.allDataList[len(self.allDataList)-1] + self.allDataList.sort(cmp=None, key=None, reverse=False) + # constructor ends + + + def getData (self, stockList=None, dataList=None, beginTS=None, endTS=None): + ''' + @param stockList: If data about only 1 stock is needed then this param can be a string- else a list of strings of names of all the stocks that you want data about. If not specified data about all stocks will be returned + @param dataList: If only one dataItem is needed (say adj_open only) then this param can be a string- else a list of strings of the names of all the data items you want. If not specified all data items will be returned. + @param beginTS: If specified- only rows with timestamp greater than or equal to this will be returned + @param endTS: If specified- only rows with timestamp smaller than or equal to this will be returned + + @warning: function does not check if beginTS < endTS- but violating this will result in None being returned. + @summary: this function just traverses over the data. It assumes that all the data fits into memory. + The real reading from disk is done in the constructor + To get data for one timestamp only- set beginTS=endTS= the timestamp you want. + @return: returns the requested data as a list. NOTE: the returned list will always have symbol and timestamp- + even if they weren't explicitly asked for in the dataItemsList. If no data is found then an empty list is returned. + ''' + + #stockList (despite its name) can be a string or a list + if (stockList is not None): + if (type(stockList) is not list): + if (type (stockList) is not str): + print("Stocks must either be a string (if you want only 1 stock) or a list of strings") + raise TypeError + else: + #its not a list but its a string + tempStr= str(stockList) + stockList= list() + stockList.append(tempStr) +# print "changed stockList from str to list!" +# print "Printing first in the list: " + stockList[0] + + + #dataList (despite its name) can be a string or a list + if (dataList is not None): + if (type(dataList) is not list): + if (type (dataList) is not str): + print("data items you want must either be a string (of you want only one data item) or a list of strings") + raise TypeError + else: + #its not a list but its a string + tempStr= str(dataList) + dataList= list() + dataList.append(tempStr) +# print "changed dataList from str to list!" +# print "Printing first in the list: " + dataList[0] + else: + #dataList is None + dataList= self.dataItemsList + + #Making sure dataList has symbols and timestamps + try: + dataList.index("symbol") + except ValueError: +# print "adding SYMBOL" + dataList.append(self.SYMBOL) + + try: + dataList.index("timestamp") + except ValueError: +# print "adding TIMESTAMP" + dataList.append(self.TIMESTAMP) + + #Now, filter out the data from allDataList, put it in another list (inefficient?) and return the other list + tempFilteredList=[] + for item in self.allDataList: + if (beginTS is not None): + # which means we need to reject all rows with timestamp < beginTS + if item[self.TIMESTAMP]< beginTS: # so = will be included +# print "rejecting because of beginning TS" + continue #skipping this item + + if (endTS is not None): + # which means we need to reject all rows with timestamp > endTS + if item[self.TIMESTAMP]> endTS: # so = will be included +# print "rejecting because of ending TS" + continue #skipping this item + + if (stockList is not None): + # We need to return this item only if its name is present in the stockList + nameFound= False + for item2 in stockList: + if (item2== item [self.SYMBOL]): + nameFound= True + #searching done + if (nameFound== False): +# print "rejecting because of stock name not found" + continue #skipping this item + + # if we got till here then the row must be returned. Hence adding to list + if (dataList is None): + tempFilteredList.append(self.cloneRow(item, self.dataItemsList)) + else: + tempFilteredList.append(self.cloneRow(item, dataList)) + # for item in self.allDataList done + + if (len (tempFilteredList)==0): + if self.noisy is True: + print("Warning: no data found") +# sys.stdout.flush() + + + return tempFilteredList + + # getData ends + + def getDataList(self, stockName, dataItemName, beginTS=None, endTS=None): + ''' + @param stockName: The name of the stock whose data you need. This has to be a string. One stock only + @param dataItemName:The data item that you need like open, close, volume etc. This has to be a string. Only one can be specified. + @param beginTS: Optional parameter. If specified only data for timestamp >= beginTS will be considered. + @param endTS: Optional paramter. If specified only data for timestamp <= endTS will be considered. + + @warning: function does not check if beginTS < endTS- but violating this will result in None being returned. + @summary: Use this function to get a list of values of some dataItem of a particular stock. Unlike the getData function this function + does not return a list of dictionaries with the stock symbol and timestamp. + To get data for one timestamp only- set beginTS=endTS= the timestamp you want. + @return: A list of dataItemName values for stockName between beginTS and endTS- if specified - or values for all timestamps if not + specified. If no data is found then an empty list is returned. + ''' + + if (type(stockName) is not str): + print("stock name must be a string") + raise TypeError + + if (type(dataItemName) is not str): + print("data item must be a string") + raise TypeError + + tempList=[] + + for item in self.allDataList: + if beginTS is not None: + if item[self.TIMESTAMP]< beginTS: + continue #skipping this item + + if endTS is not None: + if item[self.TIMESTAMP] > endTS: + continue #skipping this item + + if item[self.SYMBOL] == stockName: + tempList.append(item[dataItemName]) + #for loop ends + + if (len (tempList)==0): + if self.noisy is True: + print("Warning: no data found") +# sys.stdout.flush() + + return tempList + #getDataList ends + + + def getDataItem (self, stockName, dataItemName, timestamp): + + ''' + @param stockName: The name of the stock whose data you need. This has to be a string. One stock only + @param dataItemName:The data item that you need like open, close, volume etc. This has to be a string. Only one can be specified. + @param timestamp: Required parameter. Only data for this timestamp will be considered. + + @summary: Use this function to get one value of some dataItem of a particular stock. Unlike the getData function, this function + does not return a list of dictionaries with the stock symbol and timestamp. Unlike the getDataList function, this + function does not return an array of values. + @return: The value of dataItemName value for stockName at the specified timestamp + ''' + + if (type(stockName) is not str): + print("stock name must be a string") + raise TypeError + + if (type(dataItemName) is not str): + print("data item must be a string") + raise TypeError + +# tempStr=str("") + + for item in self.allDataList: + if item[self.SYMBOL]== stockName: + if item[self.TIMESTAMP]== timestamp: + return item[dataItemName] + + if self.noisy is True: + print("Warning: no data found") +# sys.stdout.flush() + return None + #getDataitem ends + + + def cloneRow(self, row, itemsList): + + dct={} + for dataItem in itemsList: + try: + dct[str(dataItem)]= row[str(dataItem)] + except KeyError: + print("Error: "+str(dataItem)+" not available") + raise KeyError return dct \ No newline at end of file diff --git a/Legacy/Legacy/alphaDataModel/AlphaDataModel.py b/Legacy/Legacy/alphaDataModel/AlphaDataModel.py index bc39c7986..5df3a0c22 100644 --- a/Legacy/Legacy/alphaDataModel/AlphaDataModel.py +++ b/Legacy/Legacy/alphaDataModel/AlphaDataModel.py @@ -1,106 +1,106 @@ -''' -Created on Jun 1, 2010 - -@author: Shreyas Joshi -@summary: The purpose of this module is to make it easy to create hdf5 files with "alpha" values in them -''' -import tables as pt -fileName="defaultAlphaFileName.h5" -h5f=[] -group=[] -table=[] -opened=False -ctr=float (0.0) - -class AlphaDataModelClass(pt.IsDescription): - symbol = pt.StringCol(30) - exchange = pt.StringCol(10) - alphaValue=pt.Float32Col() - timestamp= pt.Time64Col() - - - - def __init__(self): - print "In the AlphaDataModelClass constructor" - - #constructor done -#class ends! - - -def openFile (newFileName): - ''' - @param newFileName: Full path to the file and the name of the file. - @summary: This function creates a new file. If the length of the name passed =0 then a file called "defaultAlphaFileName.h5" will be created. - @warning: If a file of the same name already exists then that file will be overwritten. - ''' - global fileName, h5f, group, table, opened, ctr - ctr=float (0.0) - - if newFileName is None: - print "Using default name for alpha file" - else: - if (len(newFileName)>0): - fileName= str(newFileName) - else: - print "Using default name for alpha file" - - #Opening the file now... - if not opened: - h5f = pt.openFile(str(fileName), mode = "w") - group = h5f.createGroup("/", 'alphaData') - table = h5f.createTable(group, 'alphaData', AlphaDataModelClass) - opened=True - else: - print "File already opened. Doing nothing" - - # File opened - -def addRow (currSymbol, currExchange, currAlphaVal, currTS): - ''' - @param currSymbol: The symbol of the stock - @param currExchange: The exchange the stock trades on - @param currAlphaVal: The alpha value of the stock at the current timestamp - @param currTS: The current time stamp - @summary: Adds a row of data to the file- and writes it out do disk...eventually - @warning: File must be opened before calling this function - ''' - global ctr - - if opened: - ctr= ctr + 1 - row = table.row - row['symbol']= currSymbol - row['exchange']=currExchange - row['alphaValue']= currAlphaVal - row['timestamp']= currTS - row.append() - #print "Appending row " + str (currTS) - if (ctr==10000): #Might cause mem error - ctr=0 - table.flush() #write to disk - - - - else: - print "ERROR: File not open. Can not add row." - raise IOError -# addRow done - - -#def readAllData(): -## global h5f -## table2 = h5f.root.alphaData.alphaData -# -# for row in table.iterrows(): #for row in table2.iterrows(): -# print "SYM: "+str(row['symbol'])+", EX: "+ str(row['exchange'])+", ALPHA: "+str(row['alphaValue'])+", TIMESTAMP: "+str(row['timestamp']) - - -def closeFile(): - ''' - @summary: closes the file. - ''' - - table.flush() - h5f.close() - print str(fileName)+ " closed." +''' +Created on Jun 1, 2010 + +@author: Shreyas Joshi +@summary: The purpose of this module is to make it easy to create hdf5 files with "alpha" values in them +''' +import tables as pt +fileName="defaultAlphaFileName.h5" +h5f=[] +group=[] +table=[] +opened=False +ctr=float (0.0) + +class AlphaDataModelClass(pt.IsDescription): + symbol = pt.StringCol(30) + exchange = pt.StringCol(10) + alphaValue=pt.Float32Col() + timestamp= pt.Time64Col() + + + + def __init__(self): + print("In the AlphaDataModelClass constructor") + + #constructor done +#class ends! + + +def openFile (newFileName): + ''' + @param newFileName: Full path to the file and the name of the file. + @summary: This function creates a new file. If the length of the name passed =0 then a file called "defaultAlphaFileName.h5" will be created. + @warning: If a file of the same name already exists then that file will be overwritten. + ''' + global fileName, h5f, group, table, opened, ctr + ctr=float (0.0) + + if newFileName is None: + print("Using default name for alpha file") + else: + if (len(newFileName)>0): + fileName= str(newFileName) + else: + print("Using default name for alpha file") + + #Opening the file now... + if not opened: + h5f = pt.openFile(str(fileName), mode = "w") + group = h5f.createGroup("/", 'alphaData') + table = h5f.createTable(group, 'alphaData', AlphaDataModelClass) + opened=True + else: + print("File already opened. Doing nothing") + + # File opened + +def addRow (currSymbol, currExchange, currAlphaVal, currTS): + ''' + @param currSymbol: The symbol of the stock + @param currExchange: The exchange the stock trades on + @param currAlphaVal: The alpha value of the stock at the current timestamp + @param currTS: The current time stamp + @summary: Adds a row of data to the file- and writes it out do disk...eventually + @warning: File must be opened before calling this function + ''' + global ctr + + if opened: + ctr= ctr + 1 + row = table.row + row['symbol']= currSymbol + row['exchange']=currExchange + row['alphaValue']= currAlphaVal + row['timestamp']= currTS + row.append() + #print "Appending row " + str (currTS) + if (ctr==10000): #Might cause mem error + ctr=0 + table.flush() #write to disk + + + + else: + print("ERROR: File not open. Can not add row.") + raise IOError +# addRow done + + +#def readAllData(): +## global h5f +## table2 = h5f.root.alphaData.alphaData +# +# for row in table.iterrows(): #for row in table2.iterrows(): +# print "SYM: "+str(row['symbol'])+", EX: "+ str(row['exchange'])+", ALPHA: "+str(row['alphaValue'])+", TIMESTAMP: "+str(row['timestamp']) + + +def closeFile(): + ''' + @summary: closes the file. + ''' + + table.flush() + h5f.close() + print(str(fileName)+ " closed.") opened= False \ No newline at end of file diff --git a/Legacy/Legacy/alphaDataModel/alphaGenerator.py b/Legacy/Legacy/alphaDataModel/alphaGenerator.py index 860c0a51e..7c9d97f8d 100644 --- a/Legacy/Legacy/alphaDataModel/alphaGenerator.py +++ b/Legacy/Legacy/alphaDataModel/alphaGenerator.py @@ -1,65 +1,65 @@ -''' -Created on Jun 1, 2010 - -@author: Shreyas Joshi -@contact: shreyasj@gatech.edu -@summary: This module is used to generate random alpha values that will then be looked at by the simulator when running. The alpha values have to - be generated before the simulator starts. -''' -import tables as pt -import time -import random -#from AlphaDataModel import * - -import AlphaDataModel as adm -#Main begins - -#alpha val writing begins - -adm.openFile("randomAlpha.h5") - -#of ("myAlphaFile.h5") - -startDate=19840101 -endDate=20100101 - -tsStart= time.mktime(time.strptime(str(startDate),'%Y%m%d')) - -tsEnd= time.mktime(time.strptime(str(endDate),'%Y%m%d')) - -while (tsStart <= tsEnd): - adm.addRow("AAPL", "EXCHG", random.random(), tsStart) - - tsStart+=86400 - #While ends - -tsStart= time.mktime(time.strptime(str(startDate),'%Y%m%d')) -while (tsStart <= tsEnd): - adm.addRow("GOOG", "EXCHG", random.random(), tsStart) - - tsStart+=86400 - #While ends -tsStart= time.mktime(time.strptime(str(startDate),'%Y%m%d')) -while (tsStart <= tsEnd): - adm.addRow("MSFT", "EXCHG", random.random(), tsStart) - - tsStart+=86400 - #While ends - - - -tsStart= time.mktime(time.strptime(str(startDate),'%Y%m%d')) -while (tsStart <= tsEnd): - adm.addRow("YHOO", "EXCHG", random.random(), tsStart) - - tsStart+=86400 - #While ends - -print "Finished adding all data" - -#print "Reading it in now..." -#adm.readAllData() -adm.closeFile() -print "All done" -#Main ends +''' +Created on Jun 1, 2010 + +@author: Shreyas Joshi +@contact: shreyasj@gatech.edu +@summary: This module is used to generate random alpha values that will then be looked at by the simulator when running. The alpha values have to + be generated before the simulator starts. +''' +import tables as pt +import time +import random +#from AlphaDataModel import * + +from . import AlphaDataModel as adm +#Main begins + +#alpha val writing begins + +adm.openFile("randomAlpha.h5") + +#of ("myAlphaFile.h5") + +startDate=19840101 +endDate=20100101 + +tsStart= time.mktime(time.strptime(str(startDate),'%Y%m%d')) + +tsEnd= time.mktime(time.strptime(str(endDate),'%Y%m%d')) + +while (tsStart <= tsEnd): + adm.addRow("AAPL", "EXCHG", random.random(), tsStart) + + tsStart+=86400 + #While ends + +tsStart= time.mktime(time.strptime(str(startDate),'%Y%m%d')) +while (tsStart <= tsEnd): + adm.addRow("GOOG", "EXCHG", random.random(), tsStart) + + tsStart+=86400 + #While ends +tsStart= time.mktime(time.strptime(str(startDate),'%Y%m%d')) +while (tsStart <= tsEnd): + adm.addRow("MSFT", "EXCHG", random.random(), tsStart) + + tsStart+=86400 + #While ends + + + +tsStart= time.mktime(time.strptime(str(startDate),'%Y%m%d')) +while (tsStart <= tsEnd): + adm.addRow("YHOO", "EXCHG", random.random(), tsStart) + + tsStart+=86400 + #While ends + +print("Finished adding all data") + +#print "Reading it in now..." +#adm.readAllData() +adm.closeFile() +print("All done") +#Main ends \ No newline at end of file diff --git a/Legacy/Legacy/alphaGenerators/Bollingerbands.py b/Legacy/Legacy/alphaGenerators/Bollingerbands.py index d67f1e2b2..a56f74b8b 100644 --- a/Legacy/Legacy/alphaGenerators/Bollingerbands.py +++ b/Legacy/Legacy/alphaGenerators/Bollingerbands.py @@ -1,157 +1,157 @@ -''' -Created on Jul 30, 2010 - -@author: sjoshi42 -@summary: This module generates alpha values based on bollinger bands -''' - -import DataAccess -import dircache -import numpy -import alphaDataModel.AlphaDataModel as adm -#import alphaGenerator.AlphaDataModel as adm -from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas -from matplotlib.figure import Figure - -def getStocks(listOfPaths): - - listOfStocks=list() - #Path does not exist - print "Reading in all stock names..." - fileExtensionToRemove=".h5" - - for path in listOfPaths: - stocksAtThisPath=list () - - stocksAtThisPath= dircache.listdir(str(path)) - #Next, throw away everything that is not a .h5 And these are our stocks! - stocksAtThisPath = filter (lambda x:(str(x).find(str(fileExtensionToRemove)) > -1), stocksAtThisPath) - #Now, we remove the .h5 to get the name of the stock - stocksAtThisPath = map(lambda x:(x.partition(str(fileExtensionToRemove))[0]),stocksAtThisPath) - - for stock in stocksAtThisPath: - listOfStocks.append(stock) - return listOfStocks - #readStocksFromFile done - - - -def removeNaNs(numArray): - ctr=1 - #fill forward - while (ctr< numArray.size): - if (numpy.isnan(numArray[ctr])): - if not (numpy.isnan(numArray[ctr-1])): - numArray[ctr]= numArray[ctr-1] - #if not ends - #if ends - ctr+=1 - #while ends - - #fill back - ctr= numArray.size-2 - while (ctr>=0): - if (numpy.isnan(numArray[ctr])): - if not (numpy.isnan(numArray[ctr+1])): - numArray[ctr]= numArray[ctr+1] - #if not ends - #if ends - ctr-=1 - #while ends - return numArray -#removeNaNs ends - - -def main(): - ''' - @summary: Calculates Bollinger bands - ''' - - folderList= list() - folderList.append("C:\\tempoutput\\") - listOfStocks= list() -# listOfStocks.append("ACY") - listOfStocks.append("AAPL") - #listOfStocks= getStocks(folderList) - - - - dataAccess= DataAccess.DataAccess (True, folderList, "/StrategyData", "StrategyData", True, listOfStocks) - timestamps= list(dataAccess.getTimestampArray()) - adm.openFile("AAPLonlybollingerBandsAlphaVals.h5") - - period= 10 - stdMultiplier=2 - noOfDays= len (timestamps) #400 - - - centerband= numpy.zeros(noOfDays, dtype= float) #len(timestamps)- period + 1 #Just to make it the same length as the adj_close to make it easier to plot - upperBand= numpy.zeros(noOfDays, dtype= float) - lowerBand= numpy.zeros(noOfDays, dtype= float) - x= numpy.zeros(noOfDays, dtype= float) - - ctr=0 - while (ctr< noOfDays): - x[ctr]=ctr - ctr+=1 - #end while - - - for stock in listOfStocks: - print "Processing: " + str(stock) - #adj_close= dataAccess.getStockDataList(str(stock), 'adj_close') - adj_close= dataAccess.getStockDataList(stock, 'adj_close', timestamps[0], timestamps[noOfDays-1]) - - adj_close= removeNaNs(adj_close)#nan's removed, unless all are nans - - #Now calculating bollinger bands - for ctr in range (period, noOfDays): - - try: - centerband[ctr]= numpy.average(adj_close[ctr- period:ctr]) - stdDev= numpy.std(adj_close[ctr- period:ctr]) - upperBand[ctr]= centerband[ctr] + (stdMultiplier* stdDev) - lowerBand[ctr]= centerband[ctr] - (stdMultiplier* stdDev) - except IndexError: - print "ctr is: " + str(ctr) - - #writing alpha values to file - for ctr in range (0, noOfDays): - if (upperBand[ctr]== lowerBand[ctr])or (adj_close[ctr]== centerband[ctr]): - adm.addRow(str(stock), "blah", 0.0, timestamps[ctr]) - elif (adj_close[ctr] < centerband[ctr]): - alphaValue= lowerBand[ctr]/ adj_close[ctr] - adm.addRow (str(stock), "blah", alphaValue, timestamps[ctr]) - else: - alphaValue= - adj_close[ctr]/ upperBand[ctr] - adm.addRow (str(stock), "blah", alphaValue, timestamps[ctr]) - #done writing alpha values of this stock to file - - - - - #calculating bollinger bands done! - -# fig = Figure() -# canvas = FigureCanvas(fig) -# ax = fig.add_subplot(111) -# ax.plot(centerband) -# ax.plot (lowerBand) -# ax.plot (upperBand) -# ax.plot (adj_close) -# -# ax.set_title(str(stock)+' Bollinger bands') -# ax.grid(True) -# ax.set_xlabel('time') -# ax.set_ylabel('') -# canvas.print_figure(str(listOfStocks.index(stock))) - #for stock in listOfStocks: done - - adm.closeFile() - - -#Main done - - -if __name__ == '__main__': +''' +Created on Jul 30, 2010 + +@author: sjoshi42 +@summary: This module generates alpha values based on bollinger bands +''' + +import DataAccess +import dircache +import numpy +import alphaDataModel.AlphaDataModel as adm +#import alphaGenerator.AlphaDataModel as adm +from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas +from matplotlib.figure import Figure + +def getStocks(listOfPaths): + + listOfStocks=list() + #Path does not exist + print("Reading in all stock names...") + fileExtensionToRemove=".h5" + + for path in listOfPaths: + stocksAtThisPath=list () + + stocksAtThisPath= dircache.listdir(str(path)) + #Next, throw away everything that is not a .h5 And these are our stocks! + stocksAtThisPath = [x for x in stocksAtThisPath if (str(x).find(str(fileExtensionToRemove)) > -1)] + #Now, we remove the .h5 to get the name of the stock + stocksAtThisPath = [(x.partition(str(fileExtensionToRemove))[0]) for x in stocksAtThisPath] + + for stock in stocksAtThisPath: + listOfStocks.append(stock) + return listOfStocks + #readStocksFromFile done + + + +def removeNaNs(numArray): + ctr=1 + #fill forward + while (ctr< numArray.size): + if (numpy.isnan(numArray[ctr])): + if not (numpy.isnan(numArray[ctr-1])): + numArray[ctr]= numArray[ctr-1] + #if not ends + #if ends + ctr+=1 + #while ends + + #fill back + ctr= numArray.size-2 + while (ctr>=0): + if (numpy.isnan(numArray[ctr])): + if not (numpy.isnan(numArray[ctr+1])): + numArray[ctr]= numArray[ctr+1] + #if not ends + #if ends + ctr-=1 + #while ends + return numArray +#removeNaNs ends + + +def main(): + ''' + @summary: Calculates Bollinger bands + ''' + + folderList= list() + folderList.append("C:\\tempoutput\\") + listOfStocks= list() +# listOfStocks.append("ACY") + listOfStocks.append("AAPL") + #listOfStocks= getStocks(folderList) + + + + dataAccess= DataAccess.DataAccess (True, folderList, "/StrategyData", "StrategyData", True, listOfStocks) + timestamps= list(dataAccess.getTimestampArray()) + adm.openFile("AAPLonlybollingerBandsAlphaVals.h5") + + period= 10 + stdMultiplier=2 + noOfDays= len (timestamps) #400 + + + centerband= numpy.zeros(noOfDays, dtype= float) #len(timestamps)- period + 1 #Just to make it the same length as the adj_close to make it easier to plot + upperBand= numpy.zeros(noOfDays, dtype= float) + lowerBand= numpy.zeros(noOfDays, dtype= float) + x= numpy.zeros(noOfDays, dtype= float) + + ctr=0 + while (ctr< noOfDays): + x[ctr]=ctr + ctr+=1 + #end while + + + for stock in listOfStocks: + print("Processing: " + str(stock)) + #adj_close= dataAccess.getStockDataList(str(stock), 'adj_close') + adj_close= dataAccess.getStockDataList(stock, 'adj_close', timestamps[0], timestamps[noOfDays-1]) + + adj_close= removeNaNs(adj_close)#nan's removed, unless all are nans + + #Now calculating bollinger bands + for ctr in range (period, noOfDays): + + try: + centerband[ctr]= numpy.average(adj_close[ctr- period:ctr]) + stdDev= numpy.std(adj_close[ctr- period:ctr]) + upperBand[ctr]= centerband[ctr] + (stdMultiplier* stdDev) + lowerBand[ctr]= centerband[ctr] - (stdMultiplier* stdDev) + except IndexError: + print("ctr is: " + str(ctr)) + + #writing alpha values to file + for ctr in range (0, noOfDays): + if (upperBand[ctr]== lowerBand[ctr])or (adj_close[ctr]== centerband[ctr]): + adm.addRow(str(stock), "blah", 0.0, timestamps[ctr]) + elif (adj_close[ctr] < centerband[ctr]): + alphaValue= lowerBand[ctr]/ adj_close[ctr] + adm.addRow (str(stock), "blah", alphaValue, timestamps[ctr]) + else: + alphaValue= - adj_close[ctr]/ upperBand[ctr] + adm.addRow (str(stock), "blah", alphaValue, timestamps[ctr]) + #done writing alpha values of this stock to file + + + + + #calculating bollinger bands done! + +# fig = Figure() +# canvas = FigureCanvas(fig) +# ax = fig.add_subplot(111) +# ax.plot(centerband) +# ax.plot (lowerBand) +# ax.plot (upperBand) +# ax.plot (adj_close) +# +# ax.set_title(str(stock)+' Bollinger bands') +# ax.grid(True) +# ax.set_xlabel('time') +# ax.set_ylabel('') +# canvas.print_figure(str(listOfStocks.index(stock))) + #for stock in listOfStocks: done + + adm.closeFile() + + +#Main done + + +if __name__ == '__main__': main() \ No newline at end of file diff --git a/Legacy/Legacy/alphaGenerators/CurveFittingAlphaGenerator.py b/Legacy/Legacy/alphaGenerators/CurveFittingAlphaGenerator.py index b75bdd57d..5d1e2343d 100644 --- a/Legacy/Legacy/alphaGenerators/CurveFittingAlphaGenerator.py +++ b/Legacy/Legacy/alphaGenerators/CurveFittingAlphaGenerator.py @@ -1,135 +1,135 @@ -''' -Created on Jul 26, 2010 - -@author: Shreyas Joshi -@contact: shreyasj@gatech.edu -''' - -import tables -import DataAccess -import dircache -import numpy -import alphaGenerator.AlphaDataModel as adm - -def getStocks(listOfPaths): - - listOfStocks=list() - #Path does not exist - print "Reading in all stock names..." - fileExtensionToRemove=".h5" - - for path in listOfPaths: - stocksAtThisPath=list () - - stocksAtThisPath= dircache.listdir(str(path)) - #Next, throw away everything that is not a .h5 And these are our stocks! - stocksAtThisPath = filter (lambda x:(str(x).find(str(fileExtensionToRemove)) > -1), stocksAtThisPath) - #Now, we remove the .h5 to get the name of the stock - stocksAtThisPath = map(lambda x:(x.partition(str(fileExtensionToRemove))[0]),stocksAtThisPath) - - for stock in stocksAtThisPath: - listOfStocks.append(stock) - return listOfStocks - #readStocksFromFile done - - - -#Main begins -noOfDaysToUse=5 -daysAhead=5 -folderList=list() -folderList.append("C:\\tempoutput\\") -listOfStocks= getStocks(folderList) - -beginTS= 473490000 #2 Jan 1985 0500 hrs GMT -endTS= 1262235600 #31 DEc 2009 0500 hrs GMT - -print "list of stocks is: " + str(listOfStocks) -dataAccess= DataAccess.DataAccess(True, folderList, "/StrategyData", "StrategyData", True, listOfStocks, beginTS, endTS) -timestamps= list(dataAccess.getTimestampArray()) - -print "Printing all timestamps: " -for ts in timestamps: - print ts -print "Printing ts done" - - -#alpha= alphaGenerator.AlphaDataModel.AlphaDataModelClass() -adm.openFile("curveFittingAlphaVals_Jan_85_to_2010.h5") -daysArray= numpy.zeros ((noOfDaysToUse+1), dtype=float) - -ctr=0 -while (ctr<= noOfDaysToUse): #because we get back noOfDaysToUse+1 rows - daysArray[ctr]= ctr - ctr+=1 - #while ends - -try: - beginIndex= timestamps.index(beginTS) - endIndex= timestamps.index(endTS) -except ValueError: - print "beginTS or endTS not found!" - raise ValueError - - -#beginTS+= (noOfDaysToUse*day) -beginIndex+=noOfDaysToUse - - -while (beginIndex<=endIndex): - #closeData= dataAccess.getMatrixFromTS(listOfStocks, 'adj_close', beginTS, -noOfDaysToUse) - closeData= dataAccess.getMatrixBetweenIndex(listOfStocks, 'adj_close', beginIndex- noOfDaysToUse, beginIndex) - print "At ts: " + str (timestamps[beginIndex]) - - if (closeData is not None): - print "closeData is not none" - stockCtr=0 - while (stockCtr < len (listOfStocks)): - nanPresent=False - closeData= numpy.ma.masked_values(closeData, numpy.NaN) -# ctr=0 -# while (ctr<= noOfDaysToUse): -# try: -# if (numpy.isnan(closeData[ctr][stockCtr])): -# nanPresent=True -# zeroDueToNaNCtr+=1 -# adm.addRow(listOfStocks[stockCtr], "blah", 0.0, beginTS) -# break -# ctr+=1 -# -# -# except IndexError: -# print "stockCtr: " + str(stockCtr)+", ctr: "+str(ctr) -# print "Shape is: "+str(closeData.shape) - - - - #while (ctr<= noOfDaysToUse) ends - if (nanPresent is False): - #calculate the best fit 3 degree polynomial - #print "daysArray: "+str(daysArray.shape) +" closeData: " + str(blah.shape) + "the actual arr: " + str (closeData[:][stockCtr]) - polynomial= numpy.polyfit (daysArray, closeData[:, stockCtr], 3) - predictedClosingValue= numpy.polyval(polynomial, noOfDaysToUse + daysAhead -1) - #if (predictedClosingValue <0): - #print "predicted closing value negative! But that can't be!" - #predictedClosingValue=0 - #print "val: " + str(predictedClosingValue) + ", closingVal: "+ str(closeData[noOfDaysToUse][stockCtr])+", stock: " +str(listOfStocks[stockCtr]+ " ts: "+ str(timestamps[beginIndex])) - #print "val: " + str(predictedClosingValue) +", stock: " +str(listOfStocks[stockCtr]+ " ts: "+ str(timestamps[beginIndex])) - - valueToBeAdded= (predictedClosingValue- closeData[noOfDaysToUse][stockCtr])/ closeData[noOfDaysToUse][stockCtr] - adm.addRow(listOfStocks[stockCtr], "blah",valueToBeAdded , timestamps[beginIndex]) - #if ends - - stockCtr+=1 - #while ends - else: - #closeData is None - print "closeData is None" -# for stock in listOfStocks: -# adm.addRow(stock, "blah", 0.0, beginTS) -# zeroDueToNoneCtr+=1 - - beginIndex+=1 - #while ends - +''' +Created on Jul 26, 2010 + +@author: Shreyas Joshi +@contact: shreyasj@gatech.edu +''' + +import tables +import DataAccess +import dircache +import numpy +import alphaGenerator.AlphaDataModel as adm + +def getStocks(listOfPaths): + + listOfStocks=list() + #Path does not exist + print("Reading in all stock names...") + fileExtensionToRemove=".h5" + + for path in listOfPaths: + stocksAtThisPath=list () + + stocksAtThisPath= dircache.listdir(str(path)) + #Next, throw away everything that is not a .h5 And these are our stocks! + stocksAtThisPath = [x for x in stocksAtThisPath if (str(x).find(str(fileExtensionToRemove)) > -1)] + #Now, we remove the .h5 to get the name of the stock + stocksAtThisPath = [(x.partition(str(fileExtensionToRemove))[0]) for x in stocksAtThisPath] + + for stock in stocksAtThisPath: + listOfStocks.append(stock) + return listOfStocks + #readStocksFromFile done + + + +#Main begins +noOfDaysToUse=5 +daysAhead=5 +folderList=list() +folderList.append("C:\\tempoutput\\") +listOfStocks= getStocks(folderList) + +beginTS= 473490000 #2 Jan 1985 0500 hrs GMT +endTS= 1262235600 #31 DEc 2009 0500 hrs GMT + +print("list of stocks is: " + str(listOfStocks)) +dataAccess= DataAccess.DataAccess(True, folderList, "/StrategyData", "StrategyData", True, listOfStocks, beginTS, endTS) +timestamps= list(dataAccess.getTimestampArray()) + +print("Printing all timestamps: ") +for ts in timestamps: + print(ts) +print("Printing ts done") + + +#alpha= alphaGenerator.AlphaDataModel.AlphaDataModelClass() +adm.openFile("curveFittingAlphaVals_Jan_85_to_2010.h5") +daysArray= numpy.zeros ((noOfDaysToUse+1), dtype=float) + +ctr=0 +while (ctr<= noOfDaysToUse): #because we get back noOfDaysToUse+1 rows + daysArray[ctr]= ctr + ctr+=1 + #while ends + +try: + beginIndex= timestamps.index(beginTS) + endIndex= timestamps.index(endTS) +except ValueError: + print("beginTS or endTS not found!") + raise ValueError + + +#beginTS+= (noOfDaysToUse*day) +beginIndex+=noOfDaysToUse + + +while (beginIndex<=endIndex): + #closeData= dataAccess.getMatrixFromTS(listOfStocks, 'adj_close', beginTS, -noOfDaysToUse) + closeData= dataAccess.getMatrixBetweenIndex(listOfStocks, 'adj_close', beginIndex- noOfDaysToUse, beginIndex) + print("At ts: " + str (timestamps[beginIndex])) + + if (closeData is not None): + print("closeData is not none") + stockCtr=0 + while (stockCtr < len (listOfStocks)): + nanPresent=False + closeData= numpy.ma.masked_values(closeData, numpy.NaN) +# ctr=0 +# while (ctr<= noOfDaysToUse): +# try: +# if (numpy.isnan(closeData[ctr][stockCtr])): +# nanPresent=True +# zeroDueToNaNCtr+=1 +# adm.addRow(listOfStocks[stockCtr], "blah", 0.0, beginTS) +# break +# ctr+=1 +# +# +# except IndexError: +# print "stockCtr: " + str(stockCtr)+", ctr: "+str(ctr) +# print "Shape is: "+str(closeData.shape) + + + + #while (ctr<= noOfDaysToUse) ends + if (nanPresent is False): + #calculate the best fit 3 degree polynomial + #print "daysArray: "+str(daysArray.shape) +" closeData: " + str(blah.shape) + "the actual arr: " + str (closeData[:][stockCtr]) + polynomial= numpy.polyfit (daysArray, closeData[:, stockCtr], 3) + predictedClosingValue= numpy.polyval(polynomial, noOfDaysToUse + daysAhead -1) + #if (predictedClosingValue <0): + #print "predicted closing value negative! But that can't be!" + #predictedClosingValue=0 + #print "val: " + str(predictedClosingValue) + ", closingVal: "+ str(closeData[noOfDaysToUse][stockCtr])+", stock: " +str(listOfStocks[stockCtr]+ " ts: "+ str(timestamps[beginIndex])) + #print "val: " + str(predictedClosingValue) +", stock: " +str(listOfStocks[stockCtr]+ " ts: "+ str(timestamps[beginIndex])) + + valueToBeAdded= (predictedClosingValue- closeData[noOfDaysToUse][stockCtr])/ closeData[noOfDaysToUse][stockCtr] + adm.addRow(listOfStocks[stockCtr], "blah",valueToBeAdded , timestamps[beginIndex]) + #if ends + + stockCtr+=1 + #while ends + else: + #closeData is None + print("closeData is None") +# for stock in listOfStocks: +# adm.addRow(stock, "blah", 0.0, beginTS) +# zeroDueToNoneCtr+=1 + + beginIndex+=1 + #while ends + adm.closeFile() \ No newline at end of file diff --git a/Legacy/Legacy/qstkoptimizers/BollingerOptimizer.py b/Legacy/Legacy/qstkoptimizers/BollingerOptimizer.py index b384c91bb..e8e9b9494 100644 --- a/Legacy/Legacy/qstkoptimizers/BollingerOptimizer.py +++ b/Legacy/Legacy/qstkoptimizers/BollingerOptimizer.py @@ -1,84 +1,84 @@ -''' -Created on Aug 2, 2010 - -@author: sjoshi42 -''' - - -''' -Created on Jul 27, 2010 - -@author: Shreyas Joshi -@summary: This module reads in the alpha values generated based on the bollinger bands. It then outputs orders accordingly. -''' -import DataAccess -import numpy -class Optimizer(object): - - def __init__(self, listOfStocks): - self.listOfStocks= listOfStocks - self.DAY=86400 - dataItemsList=list() - dataItemsList.append("alphaValue") - self.alphaData= DataAccess.DataAccess(False, "AAPLonlybollingerBandsAlphaVals.h5", "/alphaData", "alphaData", True, listOfStocks, None, None, None, dataItemsList) - print "Timestamps are: " - for ts in self.alphaData.timestamps: - print ts - #__init__ done - - - def execute(self, portfolio,positions,timestamp,stockInfo, dataAccess): - - output=[] - for stock in self.listOfStocks: - alphaVal= self.alphaData.getStockDataItem(stock, "alphaValue", timestamp) - - - print "alphaVal: "+ str (alphaVal)+ ", stock: "+ str(stock)+", ts: " + str(timestamp) - - if not (numpy.isnan(alphaVal)): - #alphaVal is not Nan - if (alphaVal > 0.9): - #buy - order= stockInfo.OutputOrder() - order.symbol= stock - order.volume= 100 #min(int(500*alphaVal), 100) - order.task= 'buy' - order.orderType = 'moc' - order.closeType = 'fifo' - order.duration = self.DAY - newOrder = order.getOutput() - if newOrder != None: - output.append(newOrder) - else: - print "ERROR! ERROR! ERROR!" - else: - pass - #print "alhpaVal for "+str(stock)+" is: " + str(alphaVal) - else: - pass - #print "alphaVal is nan" - - #for stock in self.listOfStocks done - - for stock in portfolio.getListOfStocks(): - alphaVal= self.alphaData.getStockDataItem(stock, "alphaValue", timestamp) - if not (numpy.isnan(alphaVal)): - if (alphaVal < -0.6): - order= stockInfo.OutputOrder() - order.symbol= stock - order.volume= max( (int (portfolio.getHeldQty(stock) /4)) , 1) - order.task= 'sell' - order.orderType = 'moc' - order.closeType = 'fifo' - order.duration = self.DAY - newOrder = order.getOutput() - if newOrder != None: - output.append(newOrder) - else: - print "ERROR! ERROR! ERROR!" - return output - - - - +''' +Created on Aug 2, 2010 + +@author: sjoshi42 +''' + + +''' +Created on Jul 27, 2010 + +@author: Shreyas Joshi +@summary: This module reads in the alpha values generated based on the bollinger bands. It then outputs orders accordingly. +''' +import DataAccess +import numpy +class Optimizer(object): + + def __init__(self, listOfStocks): + self.listOfStocks= listOfStocks + self.DAY=86400 + dataItemsList=list() + dataItemsList.append("alphaValue") + self.alphaData= DataAccess.DataAccess(False, "AAPLonlybollingerBandsAlphaVals.h5", "/alphaData", "alphaData", True, listOfStocks, None, None, None, dataItemsList) + print("Timestamps are: ") + for ts in self.alphaData.timestamps: + print(ts) + #__init__ done + + + def execute(self, portfolio,positions,timestamp,stockInfo, dataAccess): + + output=[] + for stock in self.listOfStocks: + alphaVal= self.alphaData.getStockDataItem(stock, "alphaValue", timestamp) + + + print("alphaVal: "+ str (alphaVal)+ ", stock: "+ str(stock)+", ts: " + str(timestamp)) + + if not (numpy.isnan(alphaVal)): + #alphaVal is not Nan + if (alphaVal > 0.9): + #buy + order= stockInfo.OutputOrder() + order.symbol= stock + order.volume= 100 #min(int(500*alphaVal), 100) + order.task= 'buy' + order.orderType = 'moc' + order.closeType = 'fifo' + order.duration = self.DAY + newOrder = order.getOutput() + if newOrder != None: + output.append(newOrder) + else: + print("ERROR! ERROR! ERROR!") + else: + pass + #print "alhpaVal for "+str(stock)+" is: " + str(alphaVal) + else: + pass + #print "alphaVal is nan" + + #for stock in self.listOfStocks done + + for stock in portfolio.getListOfStocks(): + alphaVal= self.alphaData.getStockDataItem(stock, "alphaValue", timestamp) + if not (numpy.isnan(alphaVal)): + if (alphaVal < -0.6): + order= stockInfo.OutputOrder() + order.symbol= stock + order.volume= max( (int (portfolio.getHeldQty(stock) /4)) , 1) + order.task= 'sell' + order.orderType = 'moc' + order.closeType = 'fifo' + order.duration = self.DAY + newOrder = order.getOutput() + if newOrder != None: + output.append(newOrder) + else: + print("ERROR! ERROR! ERROR!") + return output + + + + diff --git a/Legacy/Legacy/qstkoptimizers/Optimizer.py b/Legacy/Legacy/qstkoptimizers/Optimizer.py index feed248c4..47ed3e6d7 100644 --- a/Legacy/Legacy/qstkoptimizers/Optimizer.py +++ b/Legacy/Legacy/qstkoptimizers/Optimizer.py @@ -1,308 +1,308 @@ -''' -Created on May 28, 2010 - -@author: Shreyas Joshi -@contact: shreyasj@gatech.edu -''' - -import DataAccess as da -import tables as pt -import math -import numpy as np - -class Optimizer(object): - ''' - @summary: The optimizer class is supposed to get the alpha values and data on the current portfolio to make a decision on what trades to make. - - ''' - -# one day in unix time - - - def __init__(self, listOfStocks): - ''' - Constructor - ''' -# self.alphah5f= pt.openFile("randomAlpha.h5", mode = "a") # if mode ='w' is used here then the file gets overwritten! - self.listOfLoosingStocks=list() - self.noOfDaysStockHasBeenLoosingValue=list() - - self.listOfStocks= list(listOfStocks) - self.DAY = 86400 - staticDataItemsList= list() - staticDataItemsList.append("blah") - dataItemsList= list() - dataItemsList.append("alphaValue") - self.minCom= 5.00 - self.ComPerShare = 0.01 -# self.alphaData= da.DataAccess(False,"randomAlpha.h5","/alphaData", "alphaData", True, listOfStocks, None, dataItemsList) - #def __init__ ends - - def strategyOne (self, portfolio,positions,timestamp,stockInfo, dataAccess): - output=[] - adjOpenData= dataAccess.getMatrix (self.listOfStocks, "adj_open", timestamp- 2*self.DAY, timestamp- self.DAY) - - if adjOpenData is not None: - #choose the biggest loser - ctr=0 - currentBiggestLoss= - float("infinity") - currentBiggestLoserIndex=-1 - while (ctr adjOpenData[1][ctr]): - #Which means the stock lost value - if ((adjOpenData[0][ctr] - adjOpenData[1][ctr])/adjOpenData[0][ctr] > currentBiggestLoss): #biggest % loss - currentBiggestLoss= (adjOpenData[0][ctr] - adjOpenData[1][ctr]) - currentBiggestLoserIndex= ctr - ctr+=1 - #While loop done - #Now we have the stock which lost the most value. We buy it and also put in a sell order for 2 days later - - if (currentBiggestLoserIndex != -1): - order= stockInfo.OutputOrder() - order.symbol= self.listOfStocks[currentBiggestLoserIndex] - order.volume= 10 - order.task= 'buy' - order.orderType = 'moc' - order.duration = self.DAY - - newOrder = order.getOutput() - - if newOrder != None: - output.append(newOrder) - else: - print "ERROR! ERROR! ERROR!" - #if (currentBiggestLoserIndex != -1): ends - #if adjOpenData is not None: ends - else: - print "adjOpenData is None!" - #else ends - #Now to decide which stocks to sell - currentPositions= positions.getPositions() - for stock in portfolio.currStocks: - for pos in currentPositions: - if (str(pos['symbol'])== str(stock)): - if ((pos['timestamp'] )< timestamp - 2*self.DAY): - temp= dataAccess.getStockDataItem(pos['symbol'], 'adj_close', timestamp) - if not (np.isnan(temp)): - if ((pos['purchase_price'] + (pos['shares']*self.ComPerShare))< temp): - order= stockInfo.OutputOrder() - order.symbol= stock - order.volume= pos['shares'] - order.task= 'sell' - order.orderType = 'moc' - order.closeType = 'fifo' - order.duration = self.DAY - newOrder = order.getOutput() - - if newOrder != None: - output.append(newOrder) - else: - print "ERROR! ERROR! ERROR!" - #for pos in currentPositions: ends - return output - - def strategyTwo (self, portfolio,positions,timestamp,stockInfo, dataAccess): - #Here we track all the stocks that continuously loose value- then buy them when they stop loosing value. Then hold them until they keep - #gaining value. Then sell them - - - output=[] - - #adjOpenData= dataAccess.getMatrix (self.listOfStocks, "adj_open", timestamp- 2*self.DAY, timestamp- self.DAY) - - adjOpenData= dataAccess.getMatrixFromTS (self.listOfStocks, "adj_open", timestamp, -1) - -# print "list of loosing stocks: "+ str (self.listOfLoosingStocks) -# print "current positions: " + str(positions.getPositions()) -# print "no of days: " + str (self.noOfDaysStockHasBeenLoosingValue) - - if (adjOpenData is not None): - ctr=0 - while (ctr< len(self.listOfStocks)): - if (adjOpenData[0][ctr] > adjOpenData[1][ctr]): - - try: - index2= self.listOfLoosingStocks.index(self.listOfStocks[ctr]) - self.noOfDaysStockHasBeenLoosingValue[index2]+=1 - except: - #stock not found in the list - self.listOfLoosingStocks.append(self.listOfStocks[ctr]) - self.noOfDaysStockHasBeenLoosingValue.append(1) - - currentPositions= positions.getPositions() - for pos in currentPositions: - try: - index2= self.listOfLoosingStocks.index(pos['symbol']) #if it isn't in this list then we don't have to sell it - if (self.noOfDaysStockHasBeenLoosingValue[index2] > 2): - # we have this stock and it lost value twice - # Ergo- we sell - #sell - #if ((pos['purchase_price'] + (pos['shares']*self.ComPerShare))< temp): #rig it to make money - print str(pos['symbol'])+" finally lost value for "+ str(self.noOfDaysStockHasBeenLoosingValue[index2])+" days. Selling it" - order= stockInfo.OutputOrder() - order.symbol= pos['symbol'] - order.volume= pos['shares'] - order.task= 'sell' - order.orderType = 'moc' - order.closeType = 'fifo' - order.duration = self.DAY - - newOrder = order.getOutput() - if newOrder != None: - output.append(newOrder) - else: - print "ERROR! ERROR! ERROR!" - except ValueError: - pass #index not found - #for pos in currentPositions - - else: - #this stock did not loose value - - #Check if had been loosing value - #print str(self.listOfStocks[ctr])+ " gained value" - - try: - index1= self.listOfLoosingStocks.index(self.listOfStocks[ctr]) - if (self.noOfDaysStockHasBeenLoosingValue[index1]>3): - - #print "This stock has been loosing value for atleast 3 days" - - order= stockInfo.OutputOrder() - order.symbol= self.listOfStocks[ctr] - order.volume= min(10* self.noOfDaysStockHasBeenLoosingValue[index1], 100) - order.task= 'buy' - order.orderType = 'moc' - order.closeType = 'fifo' - order.duration = self.DAY - newOrder = order.getOutput() - if newOrder != None: - output.append(newOrder) - else: - print "ERROR! ERROR! ERROR!" - - #The stock was loosing value for <=3 days but now gained value- so off with the head - self.listOfLoosingStocks.pop(index1) - self.noOfDaysStockHasBeenLoosingValue.pop(index1) - - except ValueError: - pass - - -# try: -# index1= self.listOfLoosingStocks.index(self.listOfStocks[ctr]) -# -# print str(self.listOfStocks[ctr])+" lost value for "+ str(self.noOfDaysStockHasBeenLoosingValue[index]+ "..and then gained..") -# #Stock found -# #if it had lost value for more than 2 days then buy! -# if (self.noOfDaysStockHasBeenLoosingValue[index1]>3): -# #buy -# order= stockInfo.OutputOrder() -# order.symbol= self.listOfStocks[ctr] -# order.volume= max(10* self.noOfDaysStockHasBeenLoosingValue[index1], 100) -# order.task= 'buy' -# order.orderType = 'moc' -# order.closeType = 'fifo' -# order.duration = self.DAY -# newOrder = order.getOutput() -# if newOrder != None: -# output.append(newOrder) -# else: -# print "ERROR! ERROR! ERROR!" -# else: -# #it was loosing value- but for less than 2 days. So we just remove this entry... -# self.listOfLoosingStocks.pop(index1) -# self.noOfDaysStockHasBeenLoosingValue.pop(index1) -# except: -# #Not found- this stock had not lost value -# print "could not find index! Possibly a bug" - ctr+=1 - #while loop ends..hopefully! - - - - - - return output - #strategyTwo ends - - - - def execute (self, portfolio,positions,timestamp,stockInfo, dataAccess): - ''' - @param portfolio: The portfolio object that has symbol and value of currently held stocks. - @param positions: Detailed info about current stock holdings. - @param timestamp: Current simulator time stamp - @param stockInfo: Not used anymore for dataAccess. - @param dataAccess: a dataAccess object that will henceforth be used to access all data - ''' - - output=[] - #output = self.strategyOne(portfolio, positions, timestamp, stockInfo, dataAccess) - output = self.strategyTwo(portfolio, positions, timestamp, stockInfo, dataAccess) - #for pos in currentPositions: ends - #print "The outout is: " + str(output) - - return output - - - - - - - - - -# -# #Right now this is stratDemo firstStrategy -# output = [] -# #This first for loop goes over all of the stock data to determine which stocks to buy -# for stock in dataAccess.getListOfStocks(): #stockInfo.getStocks(startTime = timestamp - self.DAY,endTime = timestamp): -# # if close is higher than open and close is closer to high than open is to low, buy -# -## print "In Optimizer" -## print " timestamp asked for is: " + str(timestamp - self.DAY, timestamp) -## print "self.DAY: " + str(self.DAY) -# adj_open= dataAccess.getStockDataList(stock, 'adj_open', timestamp - self.DAY, timestamp) -# adj_close= dataAccess.getStockDataList(stock, 'adj_close', timestamp - self.DAY, timestamp) -# adj_high= dataAccess.getStockDataList(stock, 'adj_high', timestamp - self.DAY, timestamp) -## alphaValue= self.alphaData.getStockDataList (stock, 'alphaValue', timestamp - self.DAY, timestamp) -# -# if (adj_open.size > 0): -# #if alphaValue <= 0.5 and adj_open < adj_close and (adj_high - adj_close) > (adj_open - adj_close): #highly possible bug here? -# if adj_open < adj_close and (adj_high - adj_close) > (adj_open - adj_close): #highly possible bug here? -# order = stockInfo.OutputOrder() -# order.symbol = stock #stock['symbol'] -# order.volume = 20 -# order.task = 'buy' -# order.orderType = 'moc' -# order.duration = self.DAY * 2 -# newOrder = order.getOutput() -# if newOrder != None: -# output.append(newOrder) -# -# #This for loop goes over all of our current stocks to determine which stocks to sell -# for stock in portfolio.currStocks: -# openPrice = list(dataAccess.getStockDataList(stock, 'adj_open',timestamp - self.DAY, timestamp))#dataAccess.getDataList(stock, 'adj_open',timestamp - self.DAY, timestamp)#stockInfo.getPrices(timestamp - self.DAY, timestamp,stock,'adj_open') -# closePrice = list(dataAccess.getStockDataList(stock, 'adj_close',timestamp - self.DAY, timestamp))#dataAccess.getDataList(stock, 'adj_close',timestamp - self.DAY, timestamp) #stockInfo.getPrices(timestamp - self.DAY, timestamp,stock,'adj_close') -# highPrice = list(dataAccess.getStockDataList(stock, 'adj_high',timestamp - self.DAY, timestamp))#dataAccess.getDataList(stock, 'adj_high',timestamp - self.DAY, timestamp) #stockInfo.getPrices(timestamp - self.DAY, timestamp,stock,'adj_high') -# lowPrice = list(dataAccess.getStockDataList(stock, 'adj_low',timestamp - self.DAY, timestamp))#dataAccess.getDataList(stock, 'adj_low',timestamp - self.DAY, timestamp) #stockInfo.getPrices(timestamp - self.DAY, timestamp,stock,'adj_low') -# if(len(openPrice) != 0 and len(closePrice) != 0 and len(highPrice) != 0 and len(lowPrice) != 0): -# # if closeprice is closer to low than openprice is to high, sell -# if (closePrice[0]-lowPrice[0]) > (highPrice[0]-openPrice[0]): -# order = stockInfo.OutputOrder() -# order.symbol = stock -# order.volume = portfolio.currStocks[stock]/2+1 -# order.task = 'sell' -# order.orderType = 'moo' -# order.closeType = 'fifo' -# order.duration = self.DAY * 2 -# newOrder = order.getOutput() -# if newOrder != None: -# output.append(newOrder) -# # return the sell orders and buy orders to the simulator to execute -# return output - - #return orders +''' +Created on May 28, 2010 + +@author: Shreyas Joshi +@contact: shreyasj@gatech.edu +''' + +import DataAccess as da +import tables as pt +import math +import numpy as np + +class Optimizer(object): + ''' + @summary: The optimizer class is supposed to get the alpha values and data on the current portfolio to make a decision on what trades to make. + + ''' + +# one day in unix time + + + def __init__(self, listOfStocks): + ''' + Constructor + ''' +# self.alphah5f= pt.openFile("randomAlpha.h5", mode = "a") # if mode ='w' is used here then the file gets overwritten! + self.listOfLoosingStocks=list() + self.noOfDaysStockHasBeenLoosingValue=list() + + self.listOfStocks= list(listOfStocks) + self.DAY = 86400 + staticDataItemsList= list() + staticDataItemsList.append("blah") + dataItemsList= list() + dataItemsList.append("alphaValue") + self.minCom= 5.00 + self.ComPerShare = 0.01 +# self.alphaData= da.DataAccess(False,"randomAlpha.h5","/alphaData", "alphaData", True, listOfStocks, None, dataItemsList) + #def __init__ ends + + def strategyOne (self, portfolio,positions,timestamp,stockInfo, dataAccess): + output=[] + adjOpenData= dataAccess.getMatrix (self.listOfStocks, "adj_open", timestamp- 2*self.DAY, timestamp- self.DAY) + + if adjOpenData is not None: + #choose the biggest loser + ctr=0 + currentBiggestLoss= - float("infinity") + currentBiggestLoserIndex=-1 + while (ctr adjOpenData[1][ctr]): + #Which means the stock lost value + if ((adjOpenData[0][ctr] - adjOpenData[1][ctr])/adjOpenData[0][ctr] > currentBiggestLoss): #biggest % loss + currentBiggestLoss= (adjOpenData[0][ctr] - adjOpenData[1][ctr]) + currentBiggestLoserIndex= ctr + ctr+=1 + #While loop done + #Now we have the stock which lost the most value. We buy it and also put in a sell order for 2 days later + + if (currentBiggestLoserIndex != -1): + order= stockInfo.OutputOrder() + order.symbol= self.listOfStocks[currentBiggestLoserIndex] + order.volume= 10 + order.task= 'buy' + order.orderType = 'moc' + order.duration = self.DAY + + newOrder = order.getOutput() + + if newOrder != None: + output.append(newOrder) + else: + print("ERROR! ERROR! ERROR!") + #if (currentBiggestLoserIndex != -1): ends + #if adjOpenData is not None: ends + else: + print("adjOpenData is None!") + #else ends + #Now to decide which stocks to sell + currentPositions= positions.getPositions() + for stock in portfolio.currStocks: + for pos in currentPositions: + if (str(pos['symbol'])== str(stock)): + if ((pos['timestamp'] )< timestamp - 2*self.DAY): + temp= dataAccess.getStockDataItem(pos['symbol'], 'adj_close', timestamp) + if not (np.isnan(temp)): + if ((pos['purchase_price'] + (pos['shares']*self.ComPerShare))< temp): + order= stockInfo.OutputOrder() + order.symbol= stock + order.volume= pos['shares'] + order.task= 'sell' + order.orderType = 'moc' + order.closeType = 'fifo' + order.duration = self.DAY + newOrder = order.getOutput() + + if newOrder != None: + output.append(newOrder) + else: + print("ERROR! ERROR! ERROR!") + #for pos in currentPositions: ends + return output + + def strategyTwo (self, portfolio,positions,timestamp,stockInfo, dataAccess): + #Here we track all the stocks that continuously loose value- then buy them when they stop loosing value. Then hold them until they keep + #gaining value. Then sell them + + + output=[] + + #adjOpenData= dataAccess.getMatrix (self.listOfStocks, "adj_open", timestamp- 2*self.DAY, timestamp- self.DAY) + + adjOpenData= dataAccess.getMatrixFromTS (self.listOfStocks, "adj_open", timestamp, -1) + +# print "list of loosing stocks: "+ str (self.listOfLoosingStocks) +# print "current positions: " + str(positions.getPositions()) +# print "no of days: " + str (self.noOfDaysStockHasBeenLoosingValue) + + if (adjOpenData is not None): + ctr=0 + while (ctr< len(self.listOfStocks)): + if (adjOpenData[0][ctr] > adjOpenData[1][ctr]): + + try: + index2= self.listOfLoosingStocks.index(self.listOfStocks[ctr]) + self.noOfDaysStockHasBeenLoosingValue[index2]+=1 + except: + #stock not found in the list + self.listOfLoosingStocks.append(self.listOfStocks[ctr]) + self.noOfDaysStockHasBeenLoosingValue.append(1) + + currentPositions= positions.getPositions() + for pos in currentPositions: + try: + index2= self.listOfLoosingStocks.index(pos['symbol']) #if it isn't in this list then we don't have to sell it + if (self.noOfDaysStockHasBeenLoosingValue[index2] > 2): + # we have this stock and it lost value twice + # Ergo- we sell + #sell + #if ((pos['purchase_price'] + (pos['shares']*self.ComPerShare))< temp): #rig it to make money + print(str(pos['symbol'])+" finally lost value for "+ str(self.noOfDaysStockHasBeenLoosingValue[index2])+" days. Selling it") + order= stockInfo.OutputOrder() + order.symbol= pos['symbol'] + order.volume= pos['shares'] + order.task= 'sell' + order.orderType = 'moc' + order.closeType = 'fifo' + order.duration = self.DAY + + newOrder = order.getOutput() + if newOrder != None: + output.append(newOrder) + else: + print("ERROR! ERROR! ERROR!") + except ValueError: + pass #index not found + #for pos in currentPositions + + else: + #this stock did not loose value + + #Check if had been loosing value + #print str(self.listOfStocks[ctr])+ " gained value" + + try: + index1= self.listOfLoosingStocks.index(self.listOfStocks[ctr]) + if (self.noOfDaysStockHasBeenLoosingValue[index1]>3): + + #print "This stock has been loosing value for atleast 3 days" + + order= stockInfo.OutputOrder() + order.symbol= self.listOfStocks[ctr] + order.volume= min(10* self.noOfDaysStockHasBeenLoosingValue[index1], 100) + order.task= 'buy' + order.orderType = 'moc' + order.closeType = 'fifo' + order.duration = self.DAY + newOrder = order.getOutput() + if newOrder != None: + output.append(newOrder) + else: + print("ERROR! ERROR! ERROR!") + + #The stock was loosing value for <=3 days but now gained value- so off with the head + self.listOfLoosingStocks.pop(index1) + self.noOfDaysStockHasBeenLoosingValue.pop(index1) + + except ValueError: + pass + + +# try: +# index1= self.listOfLoosingStocks.index(self.listOfStocks[ctr]) +# +# print str(self.listOfStocks[ctr])+" lost value for "+ str(self.noOfDaysStockHasBeenLoosingValue[index]+ "..and then gained..") +# #Stock found +# #if it had lost value for more than 2 days then buy! +# if (self.noOfDaysStockHasBeenLoosingValue[index1]>3): +# #buy +# order= stockInfo.OutputOrder() +# order.symbol= self.listOfStocks[ctr] +# order.volume= max(10* self.noOfDaysStockHasBeenLoosingValue[index1], 100) +# order.task= 'buy' +# order.orderType = 'moc' +# order.closeType = 'fifo' +# order.duration = self.DAY +# newOrder = order.getOutput() +# if newOrder != None: +# output.append(newOrder) +# else: +# print "ERROR! ERROR! ERROR!" +# else: +# #it was loosing value- but for less than 2 days. So we just remove this entry... +# self.listOfLoosingStocks.pop(index1) +# self.noOfDaysStockHasBeenLoosingValue.pop(index1) +# except: +# #Not found- this stock had not lost value +# print "could not find index! Possibly a bug" + ctr+=1 + #while loop ends..hopefully! + + + + + + return output + #strategyTwo ends + + + + def execute (self, portfolio,positions,timestamp,stockInfo, dataAccess): + ''' + @param portfolio: The portfolio object that has symbol and value of currently held stocks. + @param positions: Detailed info about current stock holdings. + @param timestamp: Current simulator time stamp + @param stockInfo: Not used anymore for dataAccess. + @param dataAccess: a dataAccess object that will henceforth be used to access all data + ''' + + output=[] + #output = self.strategyOne(portfolio, positions, timestamp, stockInfo, dataAccess) + output = self.strategyTwo(portfolio, positions, timestamp, stockInfo, dataAccess) + #for pos in currentPositions: ends + #print "The outout is: " + str(output) + + return output + + + + + + + + + +# +# #Right now this is stratDemo firstStrategy +# output = [] +# #This first for loop goes over all of the stock data to determine which stocks to buy +# for stock in dataAccess.getListOfStocks(): #stockInfo.getStocks(startTime = timestamp - self.DAY,endTime = timestamp): +# # if close is higher than open and close is closer to high than open is to low, buy +# +## print "In Optimizer" +## print " timestamp asked for is: " + str(timestamp - self.DAY, timestamp) +## print "self.DAY: " + str(self.DAY) +# adj_open= dataAccess.getStockDataList(stock, 'adj_open', timestamp - self.DAY, timestamp) +# adj_close= dataAccess.getStockDataList(stock, 'adj_close', timestamp - self.DAY, timestamp) +# adj_high= dataAccess.getStockDataList(stock, 'adj_high', timestamp - self.DAY, timestamp) +## alphaValue= self.alphaData.getStockDataList (stock, 'alphaValue', timestamp - self.DAY, timestamp) +# +# if (adj_open.size > 0): +# #if alphaValue <= 0.5 and adj_open < adj_close and (adj_high - adj_close) > (adj_open - adj_close): #highly possible bug here? +# if adj_open < adj_close and (adj_high - adj_close) > (adj_open - adj_close): #highly possible bug here? +# order = stockInfo.OutputOrder() +# order.symbol = stock #stock['symbol'] +# order.volume = 20 +# order.task = 'buy' +# order.orderType = 'moc' +# order.duration = self.DAY * 2 +# newOrder = order.getOutput() +# if newOrder != None: +# output.append(newOrder) +# +# #This for loop goes over all of our current stocks to determine which stocks to sell +# for stock in portfolio.currStocks: +# openPrice = list(dataAccess.getStockDataList(stock, 'adj_open',timestamp - self.DAY, timestamp))#dataAccess.getDataList(stock, 'adj_open',timestamp - self.DAY, timestamp)#stockInfo.getPrices(timestamp - self.DAY, timestamp,stock,'adj_open') +# closePrice = list(dataAccess.getStockDataList(stock, 'adj_close',timestamp - self.DAY, timestamp))#dataAccess.getDataList(stock, 'adj_close',timestamp - self.DAY, timestamp) #stockInfo.getPrices(timestamp - self.DAY, timestamp,stock,'adj_close') +# highPrice = list(dataAccess.getStockDataList(stock, 'adj_high',timestamp - self.DAY, timestamp))#dataAccess.getDataList(stock, 'adj_high',timestamp - self.DAY, timestamp) #stockInfo.getPrices(timestamp - self.DAY, timestamp,stock,'adj_high') +# lowPrice = list(dataAccess.getStockDataList(stock, 'adj_low',timestamp - self.DAY, timestamp))#dataAccess.getDataList(stock, 'adj_low',timestamp - self.DAY, timestamp) #stockInfo.getPrices(timestamp - self.DAY, timestamp,stock,'adj_low') +# if(len(openPrice) != 0 and len(closePrice) != 0 and len(highPrice) != 0 and len(lowPrice) != 0): +# # if closeprice is closer to low than openprice is to high, sell +# if (closePrice[0]-lowPrice[0]) > (highPrice[0]-openPrice[0]): +# order = stockInfo.OutputOrder() +# order.symbol = stock +# order.volume = portfolio.currStocks[stock]/2+1 +# order.task = 'sell' +# order.orderType = 'moo' +# order.closeType = 'fifo' +# order.duration = self.DAY * 2 +# newOrder = order.getOutput() +# if newOrder != None: +# output.append(newOrder) +# # return the sell orders and buy orders to the simulator to execute +# return output + + #return orders #def execute ends \ No newline at end of file diff --git a/Legacy/Legacy/qstkoptimizers/curveFittingOptimizer.py b/Legacy/Legacy/qstkoptimizers/curveFittingOptimizer.py index bacaa3885..1cba35c2f 100644 --- a/Legacy/Legacy/qstkoptimizers/curveFittingOptimizer.py +++ b/Legacy/Legacy/qstkoptimizers/curveFittingOptimizer.py @@ -1,76 +1,76 @@ -''' -Created on Jul 27, 2010 - -@author: Shreyas Joshi -''' -import DataAccess -import numpy -class Optimizer(object): - - def __init__(self, listOfStocks): - self.listOfStocks= listOfStocks - self.DAY=86400 - dataItemsList=list() - dataItemsList.append("alphaValue") - self.alphaData= DataAccess.DataAccess(False, "curveFittingAlphaVals_Jan_85_to_2010.h5", "/alphaData", "alphaData", True, listOfStocks, None, None, None, dataItemsList) - print "Timestamps are: " - for ts in self.alphaData.timestamps: - print ts - #__init__ done - - - def execute(self, portfolio,positions,timestamp,stockInfo, dataAccess): - - output=[] - for stock in self.listOfStocks: - alphaVal= self.alphaData.getStockDataItem(stock, "alphaValue", timestamp) - - - print "alphaVal: "+ str (alphaVal)+ ", stock: "+ str(stock)+", ts: " + str(timestamp) - - if not (numpy.isnan(alphaVal)): - #alphaVal is not Nan - if (alphaVal > 15.0): - #buy - order= stockInfo.OutputOrder() - order.symbol= stock - order.volume= 100 #min(int(500*alphaVal), 100) - order.task= 'buy' - order.orderType = 'moc' - order.closeType = 'fifo' - order.duration = self.DAY - newOrder = order.getOutput() - if newOrder != None: - output.append(newOrder) - else: - print "ERROR! ERROR! ERROR!" - else: - pass - #print "alhpaVal for "+str(stock)+" is: " + str(alphaVal) - else: - pass - #print "alphaVal is nan" - - #for stock in self.listOfStocks done - - for stock in portfolio.getListOfStocks(): - alphaVal= self.alphaData.getStockDataItem(stock, "alphaValue", timestamp) - if not (numpy.isnan(alphaVal)): - if (alphaVal < 3.0): - order= stockInfo.OutputOrder() - order.symbol= stock - order.volume= portfolio.getHeldQty(stock) - order.task= 'sell' - order.orderType = 'moc' - order.closeType = 'fifo' - order.duration = self.DAY - newOrder = order.getOutput() - if newOrder != None: - output.append(newOrder) - else: - print "ERROR! ERROR! ERROR!" - return output - - - - +''' +Created on Jul 27, 2010 + +@author: Shreyas Joshi +''' +import DataAccess +import numpy +class Optimizer(object): + + def __init__(self, listOfStocks): + self.listOfStocks= listOfStocks + self.DAY=86400 + dataItemsList=list() + dataItemsList.append("alphaValue") + self.alphaData= DataAccess.DataAccess(False, "curveFittingAlphaVals_Jan_85_to_2010.h5", "/alphaData", "alphaData", True, listOfStocks, None, None, None, dataItemsList) + print("Timestamps are: ") + for ts in self.alphaData.timestamps: + print(ts) + #__init__ done + + + def execute(self, portfolio,positions,timestamp,stockInfo, dataAccess): + + output=[] + for stock in self.listOfStocks: + alphaVal= self.alphaData.getStockDataItem(stock, "alphaValue", timestamp) + + + print("alphaVal: "+ str (alphaVal)+ ", stock: "+ str(stock)+", ts: " + str(timestamp)) + + if not (numpy.isnan(alphaVal)): + #alphaVal is not Nan + if (alphaVal > 15.0): + #buy + order= stockInfo.OutputOrder() + order.symbol= stock + order.volume= 100 #min(int(500*alphaVal), 100) + order.task= 'buy' + order.orderType = 'moc' + order.closeType = 'fifo' + order.duration = self.DAY + newOrder = order.getOutput() + if newOrder != None: + output.append(newOrder) + else: + print("ERROR! ERROR! ERROR!") + else: + pass + #print "alhpaVal for "+str(stock)+" is: " + str(alphaVal) + else: + pass + #print "alphaVal is nan" + + #for stock in self.listOfStocks done + + for stock in portfolio.getListOfStocks(): + alphaVal= self.alphaData.getStockDataItem(stock, "alphaValue", timestamp) + if not (numpy.isnan(alphaVal)): + if (alphaVal < 3.0): + order= stockInfo.OutputOrder() + order.symbol= stock + order.volume= portfolio.getHeldQty(stock) + order.task= 'sell' + order.orderType = 'moc' + order.closeType = 'fifo' + order.duration = self.DAY + newOrder = order.getOutput() + if newOrder != None: + output.append(newOrder) + else: + print("ERROR! ERROR! ERROR!") + return output + + + + diff --git a/Legacy/Legacy/simulator/Portfolio.py b/Legacy/Legacy/simulator/Portfolio.py index 65f8188df..d9874832d 100644 --- a/Legacy/Legacy/simulator/Portfolio.py +++ b/Legacy/Legacy/simulator/Portfolio.py @@ -1,99 +1,99 @@ -import StrategyData, Simulator, tables as pt -from models.PortfolioModel import PortfolioModel -import numpy as np - -class Portfolio: - - def __init__(self, cash, stocks): - ''' - @param cash: int representing the cash on hand - @param stocks: dictionary representing all of the stocks a user has {} - ''' - self.currCash = float(cash) - self.currStocks = stocks - self.lastNonNanValue= {} #dict - - - def buyTransaction(self, order): - ''' - @param order: the order (we know is valid at this point) to execute on the portfolio - @summary: Updates the portfolio after a stock is purchased - ''' - # Subtract the impact cost - it cost more because you inflated the price buying so much - # cashChange is NEGATIVE when passed in - self.currCash += float(-order['fill/commission'] + (order['fill/cashChange'] * order['fill/quantity']) - order['fill/impactCost']) - print "Cash adjusted for buy txn is: " + str(self.currCash) - if order['symbol'] in self.currStocks.keys(): - self.currStocks[order['symbol']] += order['fill/quantity'] - else: - self.currStocks[order['symbol']] = order['fill/quantity'] - - self.lastNonNanValue[order['symbol']]= order['limit_price'] - - def sellTransaction(self,order): - ''' - @param order: the order (we know is valid at this point) to execute on the portfolio - @summary: Updates the portfolio after a stock is sold - ''' - # Subtract effect - gain less money - # cashChange is POSITIVE when passed in - self.currCash += float(-order['fill/commission'] + (order['fill/cashChange'] * order['fill/quantity']) - order['fill/impactCost']) - print "Cash adjusted for sell txn is: " + str(self.currCash) - if order['symbol'] in self.currStocks: - self.currStocks[order['symbol']] -= order['fill/quantity'] - if self.currStocks[order['symbol']] == 0: - del self.currStocks[order['symbol']] - else: - self.currStocks[order['symbol']] = -order['fill/quantity'] - - self.lastNonNanValue[order['symbol']]= order['limit_price'] - - def hasStock(self,symbol,volume): - ''' - @summary: Returns a boolean of whether or not the appropriate amount of the given stock exist in the portfolio. - ''' - if not symbol in self.currStocks: - return False - if volume < 0: - return self.currStocks[symbol] <= volume - return self.currStocks[symbol] >= volume - - - def calcPortfolioValue(self, timestamp, dataAccess): - ''' - @attention: includes cash - ''' - DAY= 86400 - - portfolioValue= float(0.0) - for stock in self.currStocks: - stockPrice= dataAccess.getStockDataItem (str(stock), 'adj_close', timestamp - DAY) - - if (np.isnan(stockPrice)): - portfolioValue+= float (self.lastNonNanValue[str(stock)]) - else: - #value is not nan - portfolioValue+= float(stockPrice) - self.lastNonNanValue[str(stock)]= float(stockPrice) - - portfolioValue+= float(self.currCash) - return portfolioValue - #calcPortfolioValue done - - def getHeldQty(self, stock): - if stock in self.currStocks.keys(): - return self.currStocks[stock] - else: - return np.NaN - - def getListOfStocks(self): - ''' - @return: A list of all cureently held stocks (in arbitrary order?) - ''' - return self.currStocks.keys() - - - def close(self): - #no tables or HDF5 output - pass +import StrategyData, Simulator, tables as pt +from models.PortfolioModel import PortfolioModel +import numpy as np + +class Portfolio: + + def __init__(self, cash, stocks): + ''' + @param cash: int representing the cash on hand + @param stocks: dictionary representing all of the stocks a user has {} + ''' + self.currCash = float(cash) + self.currStocks = stocks + self.lastNonNanValue= {} #dict + + + def buyTransaction(self, order): + ''' + @param order: the order (we know is valid at this point) to execute on the portfolio + @summary: Updates the portfolio after a stock is purchased + ''' + # Subtract the impact cost - it cost more because you inflated the price buying so much + # cashChange is NEGATIVE when passed in + self.currCash += float(-order['fill/commission'] + (order['fill/cashChange'] * order['fill/quantity']) - order['fill/impactCost']) + print("Cash adjusted for buy txn is: " + str(self.currCash)) + if order['symbol'] in list(self.currStocks.keys()): + self.currStocks[order['symbol']] += order['fill/quantity'] + else: + self.currStocks[order['symbol']] = order['fill/quantity'] + + self.lastNonNanValue[order['symbol']]= order['limit_price'] + + def sellTransaction(self,order): + ''' + @param order: the order (we know is valid at this point) to execute on the portfolio + @summary: Updates the portfolio after a stock is sold + ''' + # Subtract effect - gain less money + # cashChange is POSITIVE when passed in + self.currCash += float(-order['fill/commission'] + (order['fill/cashChange'] * order['fill/quantity']) - order['fill/impactCost']) + print("Cash adjusted for sell txn is: " + str(self.currCash)) + if order['symbol'] in self.currStocks: + self.currStocks[order['symbol']] -= order['fill/quantity'] + if self.currStocks[order['symbol']] == 0: + del self.currStocks[order['symbol']] + else: + self.currStocks[order['symbol']] = -order['fill/quantity'] + + self.lastNonNanValue[order['symbol']]= order['limit_price'] + + def hasStock(self,symbol,volume): + ''' + @summary: Returns a boolean of whether or not the appropriate amount of the given stock exist in the portfolio. + ''' + if not symbol in self.currStocks: + return False + if volume < 0: + return self.currStocks[symbol] <= volume + return self.currStocks[symbol] >= volume + + + def calcPortfolioValue(self, timestamp, dataAccess): + ''' + @attention: includes cash + ''' + DAY= 86400 + + portfolioValue= float(0.0) + for stock in self.currStocks: + stockPrice= dataAccess.getStockDataItem (str(stock), 'adj_close', timestamp - DAY) + + if (np.isnan(stockPrice)): + portfolioValue+= float (self.lastNonNanValue[str(stock)]) + else: + #value is not nan + portfolioValue+= float(stockPrice) + self.lastNonNanValue[str(stock)]= float(stockPrice) + + portfolioValue+= float(self.currCash) + return portfolioValue + #calcPortfolioValue done + + def getHeldQty(self, stock): + if stock in list(self.currStocks.keys()): + return self.currStocks[stock] + else: + return np.NaN + + def getListOfStocks(self): + ''' + @return: A list of all cureently held stocks (in arbitrary order?) + ''' + return list(self.currStocks.keys()) + + + def close(self): + #no tables or HDF5 output + pass \ No newline at end of file diff --git a/Legacy/Legacy/simulator/Position.py b/Legacy/Legacy/simulator/Position.py index 64212a38f..62c532b0a 100644 --- a/Legacy/Legacy/simulator/Position.py +++ b/Legacy/Legacy/simulator/Position.py @@ -1,169 +1,169 @@ -import tables as pt -import numpy as np -from models.PositionModel import PositionModel - -class Position: - def __init__(self): - self.position = np.array([]) - - def getPositions(self): - ''' - Returns all of the positions - ''' - return self.position - - def addPosition(self,timestamp,symbol,shares,purchase_price): - ''' - Adds a new position - timestamp: the time the position was entered - symbol: the ticker of the stock - shares: the number of shares - purchase_price: the price per share (excludes any additional costs such as commission or impact) - ''' - row = {} - row['timestamp'] = timestamp - row['symbol'] = symbol - row['shares'] = shares - row['purchase_price'] = purchase_price - self.position = np.append(self.position, row) - - def removePosition(self, symbol, shares, closeType): - ''' - Removes/modifies positions until the total number of shares have been removed - symbol: the ticker of the stock - shares: the number of shares to remove - closeType: removal order "lifo" or "fifo" - NOTE: Method assumes that verification of valid sell has already been completed - ''' - debug = False - - rowIndexes = [] - rows = [] - #check for negative shares, short or not - if shares<0: - short = True - else: - short = False - if debug: - print 'REMOVING POSITIONS' - print 'REMOVE:',symbol,shares,closeType - for row in self.position: - print 'CURRROWS:', row - #get all rows for the correct stock - idx = 0 - for row in self.position: - if(row['symbol']==symbol): - row["keyIndex"]=idx - rows.append(row) - idx+=1 - if debug: - print 'POSSIBLE ROWS TO REMOVE: ',rows - if(closeType=='fifo'): - i = 0 - row = rows[i] #get first row - if debug: - print 'FIFO', row - posShares = row['shares'] #get shares - posShares = abs(posShares) #account for shorts (make positive) - #determines the number of positions to remove - while(shares>posShares): - shares-=posShares - i+=1 - row = rows[i] - posShares = row['shares'] - posShares = abs(posShares) - #converts shorts back to negative - if short: - shares *= -1 - posShares *= -1 - #change shares in the last (changed) row - newRow = self.position[ rows[i]['keyIndex'] ] - newShares = posShares-shares - newRow['shares'] = newShares - if debug: - print 'UPDATEDROW(FIFO):', newRow - #removes old rows - removes = [] - #remove updated row if it has 0 shares now - if newShares == 0: - removes.append(rows[i]['keyIndex']) - #remove the rest of the rows - cnt = 0 - while cntposShares): - shares-=posShares - i-=1 - row = rows[i] - posShares = row['shares'] - posShares = abs(posShares) - #converts shorts back to negative - if short: - shares *= -1 - posShares *= -1 - #modifies changed row - newRow = self.position[ rows[i]['keyIndex'] ] - newShares = posShares-shares - newRow['shares'] = newShares - if debug: - print 'UPDATEDROW(LIFO):', newRow - #removes old rows - removes = [] - #remove updated row if it has 0 shares now - if newShares == 0: - removes.append(rows[i]['keyIndex']) - #remove the rest of the rows - cnt = len(rows)-1 - while cnt>i: - row = rows[cnt] - removes.append(row['keyIndex']) - cnt-=1 - if debug: - for idx in removes: - print 'ROWREMOVED:', self.position[idx] - self.position = np.delete(self.position,removes) - for row in rows: - del row['keyIndex'] - else: - #invalid type - raise TypeError("Not an existing close type '%s'." % str(closeType)) - - def fillTable(self): - ''' - Converts the arrays into HDF5 tables for post simulation review - ''' - self.positionFile = pt.openFile('PositionModel.h5', mode = "w") - self.position = self.positionFile.createTable('/', 'position', PositionModel) - for arrRow in self.position: - row = self.position.row - row['timestamp'] = arrRow['timestamp'] - row['symbol'] = arrRow['symbol'] - row['shares'] = arrRow['shares'] - row['purchase_price'] = arrRow['purchase_price'] - row.append() - self.position.flush() - self.positionFile.close() - - def close(self): - self.fillTable() - +import tables as pt +import numpy as np +from models.PositionModel import PositionModel + +class Position: + def __init__(self): + self.position = np.array([]) + + def getPositions(self): + ''' + Returns all of the positions + ''' + return self.position + + def addPosition(self,timestamp,symbol,shares,purchase_price): + ''' + Adds a new position + timestamp: the time the position was entered + symbol: the ticker of the stock + shares: the number of shares + purchase_price: the price per share (excludes any additional costs such as commission or impact) + ''' + row = {} + row['timestamp'] = timestamp + row['symbol'] = symbol + row['shares'] = shares + row['purchase_price'] = purchase_price + self.position = np.append(self.position, row) + + def removePosition(self, symbol, shares, closeType): + ''' + Removes/modifies positions until the total number of shares have been removed + symbol: the ticker of the stock + shares: the number of shares to remove + closeType: removal order "lifo" or "fifo" + NOTE: Method assumes that verification of valid sell has already been completed + ''' + debug = False + + rowIndexes = [] + rows = [] + #check for negative shares, short or not + if shares<0: + short = True + else: + short = False + if debug: + print('REMOVING POSITIONS') + print('REMOVE:',symbol,shares,closeType) + for row in self.position: + print('CURRROWS:', row) + #get all rows for the correct stock + idx = 0 + for row in self.position: + if(row['symbol']==symbol): + row["keyIndex"]=idx + rows.append(row) + idx+=1 + if debug: + print('POSSIBLE ROWS TO REMOVE: ',rows) + if(closeType=='fifo'): + i = 0 + row = rows[i] #get first row + if debug: + print('FIFO', row) + posShares = row['shares'] #get shares + posShares = abs(posShares) #account for shorts (make positive) + #determines the number of positions to remove + while(shares>posShares): + shares-=posShares + i+=1 + row = rows[i] + posShares = row['shares'] + posShares = abs(posShares) + #converts shorts back to negative + if short: + shares *= -1 + posShares *= -1 + #change shares in the last (changed) row + newRow = self.position[ rows[i]['keyIndex'] ] + newShares = posShares-shares + newRow['shares'] = newShares + if debug: + print('UPDATEDROW(FIFO):', newRow) + #removes old rows + removes = [] + #remove updated row if it has 0 shares now + if newShares == 0: + removes.append(rows[i]['keyIndex']) + #remove the rest of the rows + cnt = 0 + while cntposShares): + shares-=posShares + i-=1 + row = rows[i] + posShares = row['shares'] + posShares = abs(posShares) + #converts shorts back to negative + if short: + shares *= -1 + posShares *= -1 + #modifies changed row + newRow = self.position[ rows[i]['keyIndex'] ] + newShares = posShares-shares + newRow['shares'] = newShares + if debug: + print('UPDATEDROW(LIFO):', newRow) + #removes old rows + removes = [] + #remove updated row if it has 0 shares now + if newShares == 0: + removes.append(rows[i]['keyIndex']) + #remove the rest of the rows + cnt = len(rows)-1 + while cnt>i: + row = rows[cnt] + removes.append(row['keyIndex']) + cnt-=1 + if debug: + for idx in removes: + print('ROWREMOVED:', self.position[idx]) + self.position = np.delete(self.position,removes) + for row in rows: + del row['keyIndex'] + else: + #invalid type + raise TypeError("Not an existing close type '%s'." % str(closeType)) + + def fillTable(self): + ''' + Converts the arrays into HDF5 tables for post simulation review + ''' + self.positionFile = pt.openFile('PositionModel.h5', mode = "w") + self.position = self.positionFile.createTable('/', 'position', PositionModel) + for arrRow in self.position: + row = self.position.row + row['timestamp'] = arrRow['timestamp'] + row['symbol'] = arrRow['symbol'] + row['shares'] = arrRow['shares'] + row['purchase_price'] = arrRow['purchase_price'] + row.append() + self.position.flush() + self.positionFile.close() + + def close(self): + self.fillTable() + diff --git a/Legacy/Legacy/simulator/Simulator.py b/Legacy/Legacy/simulator/Simulator.py index 09218a928..6dbeef214 100644 --- a/Legacy/Legacy/simulator/Simulator.py +++ b/Legacy/Legacy/simulator/Simulator.py @@ -1,967 +1,967 @@ -#import optimizers.BollingerOptimizer as Optimizer -import optimizers.BollingerOptimizer as Optimizer -import models.PortfolioModel, models.PositionModel, models.OrderModel, models.StrategyDataModel -import tables as pt, numpy as np -from optparse import OptionParser -import sys, time -import Portfolio, Position, Order, DataAccess as da , StrategyData -import os -import dircache -import numpy as np -#import curveFittingOptimizer -#import optimizers.BollingerOptimizer as Optimizer - -from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas -from matplotlib.figure import Figure - - - -class Simulator(): - def __init__(self, cash, stocks, startTime, endTime, interval, minCom, comPerShare, isTable, maxEffect, arrayFile, listOfStocksFile): - # strategy contains a reference to the strategy method specified in the command line -# self.strategy = strategy - # startTime/endTime are the timestamps marking the beginning and end of the time for which the simulation should run - self.startTime = startTime - self.currTimestamp = startTime - self.endTime = endTime - # interval is the amount of time between iterations of the strategy - self.interval = interval - # minCom is the minimum commission per transaction - self.minCom = minCom - # comPerShare is the calculated commission per share--if this is greater than the minimum commission, this is what gets used - self.comPerShare = comPerShare - # timeStampIndex and currDataTimeIndex are markers to track the current position in the list of timestamps - self.timeStampIndex = 0 - self.currDataTimeIndex = 0 - # maxEffect is the maximum percentage change in price a single transaction can have on the actual market price - self.maxEffect = maxEffect - # times becomes the list of timestamps - self.times = [] - # isTable tells the simulator whether to use the table- or array-specific methods - self.isTable = isTable - - #starting portfolio, position, and order initializations - self.portfolio = Portfolio.Portfolio(cash, stocks) - self.position = Position.Position() - self.order = Order.Order(self.isTable) - #populate the strategyData with the relevant type of data storage - if isTable: - - -# self.h5f= pt.openFile(pytablesFile, mode = "a") # if mode ='w' is used here then the file gets overwritten! - - listOfPaths=list() - #listOfPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_NASDAQ\\") - #listOfPaths.append("C:\\temp\\") - #listOfPaths.append("C:\\tempoutput\\") - listOfPaths.append("/hzr71/research/QSData/tempdata/") #Modification for gekko - self.listOfStocks= self.getStocks(listOfStocksFile, listOfPaths) - - self.dataAccess= da.DataAccess (True, listOfPaths, "/StrategyData", "StrategyData", True, self.listOfStocks, self.startTime, self.endTime) - self.strategyData = StrategyData.StrategyData("someRandomStringToNotBreakTheCode", self.dataAccess, self.isTable) - -# else: -# self.strategyData = StrategyData.StrategyData(arrayFile,self.isTable) - - def getStocks(self, pathToFile, listOfPaths): - - listOfStocks=list() - if (os.path.exists(pathToFile)): - print "Reading in stock names from file..." - f= open(pathToFile) - lines= f.readlines() - f.close() - for line1 in lines: - listOfStocks.append(line1.partition("\n")[0]) - #for done - else: - #Path does not exist - print "Reading in all stock names..." - fileExtensionToRemove=".h5" - - for path in listOfPaths: - stocksAtThisPath=list () - - stocksAtThisPath= dircache.listdir(str(path)) - #Next, throw away everything that is not a .h5 And these are our stocks! - stocksAtThisPath = filter (lambda x:(str(x).find(str(fileExtensionToRemove)) > -1), stocksAtThisPath) - #Now, we remove the .h5 to get the name of the stock - stocksAtThisPath = map(lambda x:(x.partition(str(fileExtensionToRemove))[0]),stocksAtThisPath) - - for stock in stocksAtThisPath: - listOfStocks.append(stock) - return listOfStocks - #readStocksFromFile done - - - - def addTimeStamps(self): - # generates the list of timestamps - global timersActive - temp = [] - - if timersActive: - print 'Generating valid timestamps' - cnt = 0 - cycTime = time.time() -# for i in self.strategyData.strategyData.iterrows(): - for ts in self.dataAccess.getTimestampArray(): - if ts not in temp: - temp.append(ts) - if timersActive: - if(cnt%1000000==0): - print '%i rows finished: %i secs elapsed'%(cnt,time.time()-cycTime) - cnt+=1 - if timersActive: - print 'all rows added: %i secs elapsed'%(time.time()-cycTime) - #Put the list in order, convert it to a NumPy array - temp.sort() - temp = np.array(temp) - return temp - - def calcCommission(self, volume): - ''' - @summary: returns the commission on a given trade given the volume - ''' - return max(minCom,volume * self.comPerShare) - - def getCurrentDataTimestamp(self): - ''' - @summary: returns the timestamp of the most recent data available - ''' - while self.times[self.currDataTimeIndex+1] 0: -# myStockasDict = stocks[0] #Grab the first dictionary in the list -# return myStockasDict['volume'] # Get the volume -# return None - - def buyStock(self, newOrder): - ''' - @summary: function takes in an instance of OrderDetails, executes the changes to the portfolio and adds the order to the order table - @param newOrder: an instance of OrderDetails representing the new order - @warning: The Order should not be added to the order table before calling this function - ''' - ts = self.getCurrentDataTimestamp() - maxVol4Day = self.dataAccess.getStockDataItem(newOrder['symbol'], 'volume', ts)#self.getVolumePerDay(newOrder['symbol'], ts) - if newOrder['order_type'] == 'moo': - #market order open -# price = strategyData.getPrice(ts, newOrder['symbol'], 'adj_open') - price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_open', ts) - if price == None or np.isnan (price): - if noisy: - print "Price data unavailable for ts:",ts,'stock:',newOrder['symbol'] - return None - elif maxVol4Day == None or np.isnan(maxVol4Day): - if noisy: - print "Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol'] - return None - else: - print "Checking cash..." - checkAmount = min(abs(newOrder['shares']),maxVol4Day) - # New is cost the original total price (price * shares) + effect*Total Price - # Basically, you raise the cost as you buy - cost = (checkAmount * price[0]['adj_open'] + (checkAmount * price[0]['adj_open'] * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) - if(cost>self.portfolio.currCash): - #Not enough cash to buy stock - print "Not enough cash to buy stock." - #print "Apparently not enough cash. I don't believe this. Current cash: " + str (self.portfolio.currCash) + " total cost: "+ str (cost)+ ", cost of one share: "+str (self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_open', ts)) - - return None - if abs(newOrder['shares']) > maxVol4Day: - if newOrder['shares'] < 0: - newOrder['shares'] = -maxVol4Day - else: - newOrder['shares'] = maxVol4Day - newOrder.update() - self.order.order.flush() - #__execute trade__ - #populate fill field in order - newOrder['fill/timestamp'] = ts - newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'BUY') else -newOrder['shares'] - newOrder['fill/cashChange'] = -price - newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) - newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has - #add trade to portfolio - self.portfolio.buyTransaction(newOrder) - #add position - self.position.addPosition(ts,newOrder['symbol'],newOrder['fill/quantity'],price) - elif newOrder['order_type'] == 'moc': - #market order close -# price = self.strategyData.getPrice(ts, newOrder['symbol'], 'adj_close') -# price = self.dataAccess.getData(newOrder['symbol'], 'adj_close', ts, ts)[0]['adj_close'] - - - price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_close', ts) - if price == None or np.isnan (price): - if noisy: - print "Price data unavailable for ts:",ts,'stock:',newOrder['symbol'] - return None - elif maxVol4Day == None or np.isnan(maxVol4Day): - if noisy: - print "Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol'] - return None - else: - checkAmount = min(abs(newOrder['shares']),maxVol4Day) - # New is cost the original total price (price * shares) + effect*Total Price - # Basically, you raise the cost as you buy -# cost = (checkAmount + (checkAmount * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) - cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) - if(cost>self.portfolio.currCash): - #Not enough cash to buy stock - print "Not enough cash. Current cash: " + str (self.portfolio.currCash) + " total cost: "+ str (cost)+ ", cost of one share: "+str (self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_close', ts)) - - return None - if abs(newOrder['shares']) > maxVol4Day: - if newOrder['shares'] < 0: - newOrder['shares'] = -maxVol4Day - else: - newOrder['shares'] = maxVol4Day - newOrder.update() - self.order.order.flush() - newOrder['fill/timestamp'] = ts - newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'BUY') else -newOrder['shares'] - newOrder['fill/cashChange'] = -price - newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) - newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has - #add trade to portfolio - self.portfolio.buyTransaction(newOrder) - #add position - self.position.addPosition(ts,newOrder['symbol'],newOrder['fill/quantity'],price) - elif newOrder['order_type'] == 'limit': - #limit order - price = newOrder['limit_price'] - if price == None or np.isnan (price): - if noisy: - print "Price data unavailable for ts:",ts,'stock:',newOrder['symbol'] - return None - elif maxVol4Day == None or np.isnan(maxVol4Day): - if noisy: - print "Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol'] - return None - else: -# if ((newOrder['limit_price'] > self.strategyData.getPrice(ts, newOrder['symbol'], 'adj_high')) or ( newOrder['limit_price'] < self.strategyData.getPrice(ts, newOrder['symbol'], 'adj_low'))): - if ((newOrder['limit_price'] > self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_high', ts)) or ( newOrder['limit_price'] < self.dataAccess.getData(newOrder['symbol'], 'adj_low', ts))): - #limit price outside of daily range - return None - checkAmount = min(abs(newOrder['shares']),maxVol4Day) - # New is cost the original total price (price * shares) + effect*Total Price - # Basically, you raise the cost as you buy - cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) - if(cost>self.portfolio.currCash): - #Not enough cash to buy stock - return None - if abs(newOrder['shares']) > maxVol4Day: - if newOrder['shares'] < 0: - newOrder['shares'] = -maxVol4Day - else: - newOrder['shares'] = maxVol4Day - newOrder.update() - self.order.order.flush() - #__execute trade__ - #populate fill field in order - newOrder['fill/timestamp'] = ts - newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'BUY') else -newOrder['shares'] - newOrder['fill/cashChange'] = -price - newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) - newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has - #add trade to portfolio - self.portfolio.buyTransaction(newOrder) - #add position - self.position.addPosition(ts,newOrder['symbol'],newOrder['fill/quantity'],price) - elif newOrder['order_type'] == 'vwap': - #volume weighted average price -# price = strategyData.getPrice(ts, newOrder['symbol'], 'adj_open') -# price = self.dataAccess.getData(newOrder['symbol'], 'adj_open', ts, ts)[0]['adj_close'] - price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_open', ts) - if price == None or np.isnan (price): - if noisy: - print "Price data unavailable for ts:",ts,'stock:',newOrder['symbol'] - return None - elif maxVol4Day == None or np.isnan(maxVol4Day): - if noisy: - print "Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol'] - return None - else: - checkAmount = min(abs(newOrder['shares']),maxVol4Day) - # New is cost the original total price (price * shares) + effect*Total Price - # Basically, you raise the cost as you buy - price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_close', ts)#[0]['adj_close'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_close') - price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_high', ts)#[0]['adj_high'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_high') - price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_low', ts)#[0]['adj_low'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_low') - price = price / 4. - cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) - if(cost>self.portfolio.currCash): - #Not enough cash to buy stock - return None - if abs(newOrder['shares']) > maxVol4Day: - if newOrder['shares'] < 0: - newOrder['shares'] = -maxVol4Day - else: - newOrder['shares'] = maxVol4Day - newOrder.update() - self.order.order.flush() - # New is cost the original total price (price * shares) + effect*Total Price - # Basically, you raise the cost the more you buy. - #__execute trade__ - #populate fill field in order - newOrder['fill/timestamp'] = ts - newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'BUY') else -newOrder['shares'] - newOrder['fill/cashChange'] = -price - newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) - newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has - #add trade to portfolio - self.portfolio.buyTransaction(newOrder) - #add position - self.position.addPosition(ts,newOrder['symbol'],newOrder['fill/quantity'],price) - else: - #throw invalid type error - raise TypeError("Not an existing trade type '%s'." % str(newOrder['order_type'])) - newOrder.update() - self.order.order.flush() - return price - - def sellStock(self,newOrder): - ''' - @summary: function takes in an instance of OrderDetails, executes the changes to the portfolio and adds the order to the order table - @param newOrder: an instance of OrderDetails representing the new order - @warning: The Order should not be added to the order table before calling this function - ''' - ts = self.getCurrentDataTimestamp() #need a function to get the next available time we can trade - maxVol4Day = self.dataAccess.getStockDataItem(newOrder['symbol'], 'volume', ts)#self.getVolumePerDay(newOrder['symbol'], ts) - if newOrder['order_type'] == 'moo': - #market order open - price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_open', ts)#[0]['adj_open'] #self.strategyData.getPrice(ts, newOrder['symbol'], 'adj_open') - if price == None or np.isnan (price): - if noisy: - print "Price data unavailable for",ts,newOrder['symbol'] - return None - elif maxVol4Day == None or np.isnan(maxVol4Day): - if noisy: - print "Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol'] - return None - else: - checkAmount = min(abs(newOrder['shares']),maxVol4Day) - if newOrder['task'].upper() == 'SELL': - if not (self.portfolio.hasStock(newOrder['symbol'],checkAmount)): # NEW - #Not enough shares owned to sell requested amount - print "Not enough shares owned to sell the requested amount" - return None - else: - if not (self.portfolio.hasStock(newOrder['symbol'],-checkAmount)): # NEW - #Not enough shares owned to sell requested amount - print "Not enough shares owned to sell the requested amount" - return None - cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) - if(cost>self.portfolio.currCash) and (newOrder['shares'] < 0): - #Not enough cash to cover stock - print "Not enough cash to cover stock" - return None - #__execute trade__ - #populate fill field in order - if abs(newOrder['shares']) > maxVol4Day: - if newOrder['shares'] < 0: - newOrder['shares'] = -maxVol4Day - else: - newOrder['shares'] = maxVol4Day - newOrder.update() - self.order.order.flush() - newOrder['fill/timestamp'] = ts - newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'] - newOrder['fill/cashChange'] = price #NEW - newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) - newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has - #add trade to portfolio - self.portfolio.sellTransaction(newOrder) - #remove positions according to lifo/fifo - self.position.removePosition(newOrder['symbol'],newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'],newOrder['close_type']) - elif newOrder['order_type'] == 'moc': - #market order close - price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_close', ts)#[0]['adj_close'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_close') - if price == None or np.isnan (price): - if noisy: - print "Price data unavailable for",ts,newOrder['symbol'] - return None - elif maxVol4Day == None or np.isnan(maxVol4Day): - if noisy: - print "Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol'] - return None - else: - checkAmount = min(abs(newOrder['shares']),maxVol4Day) - if newOrder['shares'] > 0: - if not (self.portfolio.hasStock(newOrder['symbol'],checkAmount)): # NEW - #Not enough shares owned to sell requested amount - print "Not enough shares owned to sell the requested amount" - return None - else: - if not (self.portfolio.hasStock(newOrder['symbol'],-checkAmount)): # NEW - #Not enough shares owned to sell requested amount - print "Not enough shares owned to sell the requested amount" - return None - cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) - if(cost>self.portfolio.currCash) and (newOrder['shares'] < 0): - #Not enough cash to cover stock - print "Not enough cash to cover stock" - return None - #__execute trade__ - #populate fill field in order - if abs(newOrder['shares']) > maxVol4Day: - if newOrder['shares'] < 0: - newOrder['shares'] = -maxVol4Day - else: - newOrder['shares'] = maxVol4Day - newOrder.update() - self.order.order.flush() - newOrder['fill/timestamp'] = ts - newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'] - newOrder['fill/cashChange'] = price - newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) - newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has - #add trade to portfolio - self.portfolio.sellTransaction(newOrder) - #remove positions according to lifo/fifo - self.position.removePosition(newOrder['symbol'],newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'],newOrder['close_type']) - elif newOrder['order_type'] == 'limit': - #limit order - price = newOrder['limit_price'] - if price == None or np.isnan (price): - if noisy: - print "Price data unavailable for",ts,newOrder['symbol'] - return None - elif maxVol4Day == None or np.isnan(maxVol4Day): - if noisy: - print "Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol'] - return None - else: - checkAmount = min(abs(newOrder['shares']),maxVol4Day) - if newOrder['shares'] > 0: - if not (self.portfolio.hasStock(newOrder['symbol'],checkAmount)): # NEW - #Not enough shares owned to sell requested amount - print "Not enough shares owned to sell the requested amount" - return None - else: - if not (self.portfolio.hasStock(newOrder['symbol'],-checkAmount)): # NEW - #Not enough shares owned to sell requested amount - return None - cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) - if(cost>self.portfolio.currCash) and (newOrder['shares'] < 0): - #Not enough cash to cover stock - print "Not enough cash to cover stock" - return None - #__execute trade__ - #populate fill field in order -# if ((newOrder['limit_price'] > strategyData.getPrice(ts, newOrder['symbol'], 'adj_high')) or ( newOrder['limit_price'] < strategyData.getPrice(ts, newOrder['symbol'], 'adj_low'))): - if ((newOrder['limit_price'] > self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_high', ts)) or ( newOrder['limit_price'] < self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_low', ts))): - #limit price outside of daily range - return None - if abs(newOrder['shares']) > maxVol4Day: - if newOrder['shares'] < 0: - newOrder['shares'] = -maxVol4Day - else: - newOrder['shares'] = maxVol4Day - newOrder.update() - self.order.order.flush() - #__execute trade__ - #populate fill field in order - newOrder['fill/timestamp'] = ts - newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'] - newOrder['fill/cashChange'] = price - newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) - newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has - #add trade to portfolio - self.portfolio.sellTransaction(newOrder) - #remove positions according to lifo/fifo - self.position.removePosition(newOrder['symbol'],newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'],newOrder['close_type']) - elif newOrder.order_type == 'vwap': - #volume weighted average price - price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_open', ts)#[0]['adj_open'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_open') - if price == None or np.isnan (price): - if noisy: - print "Price data unavailable for",ts,newOrder['symbol'] - return None - elif maxVol4Day == None or np.isnan(maxVol4Day): - if noisy: - print "Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol'] - return None - else: - checkAmount = min(abs(newOrder['shares']),maxVol4Day) - if newOrder['shares'] > 0: - if not (self.portfolio.hasStock(newOrder['symbol'],checkAmount)): # NEW - #Not enough shares owned to sell requested amount - print "Not enough shares owned to sell the requested amount" - return None - else: - if not (self.portfolio.hasStock(newOrder['symbol'],-checkAmount)): # NEW - #Not enough shares owned to sell requested amount - print "Not enough shares owned to sell the requested amount" - return None - price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_close', ts)#[0]['adj_close'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_close') - price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_high', ts)#[0]['adj_high'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_high') - price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_low', ts)#[0]['adj_low'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_low') - price = price / 4. - cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) - if(cost>self.portfolio.currCash) and (newOrder['shares'] < 0): - #Not enough cash to cover stock - print "Not enough cash to cover stock" - return None - #__execute trade__ - #populate fill field in order - if abs(newOrder['shares']) > maxVol4Day: - if newOrder['shares'] < 0: - newOrder['shares'] = -maxVol4Day - else: - newOrder['shares'] = maxVol4Day - newOrder.update() - self.order.order.flush() - newOrder['fill/timestamp'] = ts - newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'] - newOrder['fill/cashChange'] = price - newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) - newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has - #add trade to portfolio - self.portfolio.sellTransaction(newOrder) - #remove positions according to lifo/fifo - self.position.removePosition(newOrder['symbol'],newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'],newOrder['close_type']) - else: - #throw invalid type error - raise TypeError("Not an existing trade type '%s'." % str(newOrder.order_type)) - newOrder.update() - self.order.order.flush() - return price - - def execute(self): - ''' - @summary: This function iterates through the orders and attempts to execute all the ones that are still valid and unfilled - ''' - count = 0 - for order in self.order.getOrders(): - if (order['timestamp'] < self.currTimestamp): - if (order['duration'] + order['timestamp']) >= self.currTimestamp: - if order['fill/timestamp'] == 0: - #Have unfilled, valid orders - if order['task'].upper() == "BUY": - #is a buy - if self.portfolio.hasStock(order['symbol'],1): - if order['shares']>0: - result = self.buyStock(order) - if noisy: - if result is not None: - print "Succeeded in buying %d shares of %s for %.2f as %s, with close type %s. Placed at: %d. Current timestamp: %d, order #%d" % (order['shares'], order['symbol'], result, order['order_type'], order['close_type'], order['timestamp'], self.currTimestamp, count) - #else: - #print "THIS IS MOST LIKELY WRONG- Did not succeed in buying %d shares of %s as %s; not enough cash. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count) - else: - if noisy: - print "Did not succeed in buying %d shares of %s as %s; negative values are not valid buy amounts. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count) - elif self.portfolio.hasStock(order['symbol'],-1): - if noisy: - print "Did not succeed in buying %d shares of %s as %s; you must cover your shortsell before you can buy. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count) - else: - result = self.buyStock(order) - if noisy: - if result: - print "Succeeded in buying %d shares of %s for %.2f as %s. Placed at: %d. Current timestamp: %d, order #%d" % (order['shares'], order['symbol'], result, order['order_type'], order['timestamp'], self.currTimestamp, count) - else: - print "Did not succeed in buying %d shares of %s as %s. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count) - elif order['task'].upper() == "SELL": - # is a sell - if order['shares']>0: - result = self.sellStock(order) - if noisy: - if result: - print "Succeeded in selling %d shares of %s for %.2f as %s, with close type %s. Current timestamp: %d" % (order['shares'], order['symbol'], result, order['order_type'], order['close_type'], self.currTimestamp) - #else: - #print "Did not succeed in selling %d shares of %s as %s; not enough owned. Order valid until %d. Current timestamp: %d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], self.currTimestamp) - else: - if noisy: - print "Did not succeed in selling %d shares of %s as %s; you cannot sell a non-positive amount. Order valid until %d. Current timestamp: %d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], self.currTimestamp) - elif order['task'].upper() == "SHORT": - #is a short sell - if self.portfolio.hasStock(order['symbol'],-1): - if order['shares']>0: - result = self.buyStock(order) - if noisy: - if result: - print "Succeeded in short selling %d shares of %s for %.2f as %s, with close type %s. Placed at: %d. Current timestamp: %d, order #%d" % (-order['shares'], order['symbol'], -result, order['order_type'], order['close_type'], order['timestamp'], self.currTimestamp, count) - else: - print "Did not succeed in short selling %d shares of %s as %s; not enough cash??? How do you not have enough cash for a short sell?. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count) - else: - if noisy: - print "Did not succeed in short selling %d shares of %s as %s; negative values are not valid short sell amounts. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(-order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count) - elif self.portfolio.hasStock(order['symbol'],1): - if noisy: - print "Did not succeed in short selling %d shares of %s as %s; you cannot short sell a stock you already own. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(-order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count) - else: - result = self.buyStock(order) - if noisy: - if result: - print "Succeeded in short selling %d shares of %s for %.2f as %s, with close type %s. Placed at: %d. Current timestamp: %d, order #%d" % (-order['shares'], order['symbol'], result, order['order_type'], order['close_type'], order['timestamp'], self.currTimestamp, count) - else: - print "Did not succeed in short selling %d shares of %s as %s; not enough cash??? How do you not have enough cash for a short sell?. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(-order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count) - elif order['task'].upper() == "COVER": - # is a cover - if order['shares']>0: - result = self.sellStock(order) - if noisy: - if result: - print "Succeeded in covering %d shares of %s for %.2f as %s, with close type %s. Current timestamp: %d" % (-order['shares'], order['symbol'], result, order['order_type'], order['close_type'], self.currTimestamp) - else: - print "Did not succeed in covering %d shares of %s as %s; not short enough or not enough cash. Order valid until %d. Current timestamp: %d" %(-order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], self.currTimestamp) - else: - if noisy: - print "Did not succeed in covering %d shares of %s as %s; you cannot cover a non-positive amount. Order valid until %d. Current timestamp: %d" %(-order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], self.currTimestamp) - else: - if noisy: - print "'%s' is not a valid task. Order valid until %d. Current timestamp: %d" % (order['task'].upper(), order['duration'] + order['timestamp'], self.currTimestamp) - count += 1 - - - def addOrders(self,commands): - ''' - @summary: takes in commands (return value of strategy), parses it, and adds it in the correct format to the order data storage - ''' - if self.isTable: - for stock in commands: - newOrder = self.order.addOrder(self.getExecutionTimestamp(),stock[0],stock[1],stock[2],stock[3],stock[4],stock[5],stock[6]) - newOrder.append() - self.order.order.flush() - else: - for stock in commands: - self.order.addOrder(self.getExecutionTimestamp(),stock[0],stock[1],stock[2],stock[3],stock[4],stock[5],stock[6]) - - def run(self): - ''' - @summary: Run the simulation - ''' - - optimizer= Optimizer.Optimizer(self.listOfStocks) - #optimizer= curveFittingOptimizer.Optimizer(self.listOfStocks) - timestamps= list(self.dataAccess.getTimestampArray()) - portfolioValList= list() - - ctr=0 - while (timestamps[ctr]< self.startTime): - ctr+=1 - #while loop done - - - self.currTimestamp = timestamps[ctr] #self.startTime - - ctr2= ctr - - while (timestamps[ctr2]< self.endTime): - ctr2+=1 - if (ctr2>= len(timestamps)): - break - - #while loop done - - if (ctr2>= len (timestamps)): - self.endTime= timestamps[ctr2-1] - else: - self.endTime= timestamps[ctr2] - - if timersActive: - print "Simulation timer started at "+ str(self.currTimestamp) - totalTime = time.time() - cycTime = time.clock() - -# self.strategyData.currTimestamp = self.currTimestamp - i=1 - while self.currTimestamp < self.endTime and self.currTimestamp < time.time(): # and self.currTimestamp < self.strategyData.timestampIndex[len(self.strategyData.timestampIndex)-2]: ************POSSIBLE BUG***** JUST TRYING OUT - # While not yet reached the end timestamp AND not yet caught up to present AND not yet reached the end of the data - # execute the existing orders, then run the strategy and add the new orders - - - - beforeExec=time.clock() - self.execute() - afterExec= time.clock() -# self.addOrders(self.strategy(self.portfolio,self.position,self.currTimestamp,self.strategyData)) -# self.addOrders(optimizer.execute(self.portfolio,self.position,self.currTimestamp,self.strategyData)) - beforeAddOrders= time.clock() - self.addOrders(optimizer.execute(self.portfolio,self.position,self.currTimestamp,self.strategyData, self.dataAccess)) - afterAddOrders= time.clock() - - if noisy or timersActive: - print '' #newline - if mtm: - #portValue = self.portfolio.currCash + self.strategyData.calculatePortValue(self.portfolio.currStocks,self.currTimestamp) - portValue= float (0.0) - print "| %i %.2f |"%(self.currTimestamp,portValue) + " Value from portfolio class: " + str (self.portfolio.calcPortfolioValue(self.currTimestamp, self.dataAccess)) - if timersActive and not noisy: - print "Strategy at %i took %.4f secs"%(self.currTimestamp,(time.clock()-cycTime)) - i+=1 - cycTime = time.clock() - if noisy and not timersActive: - portValue = (self.portfolio.calcPortfolioValue(self.currTimestamp, self.dataAccess)) #self.portfolio.currCash + self.strategyData.calculatePortValue(self.portfolio.currStocks,self.currTimestamp) - portfolioValList.append(portValue) - - print "Strategy at %d completed successfully." % self.currTimestamp - print "Current cash: " + str(self.portfolio.currCash) - print "Current stocks: %s."%self.portfolio.currStocks - print "Current portfolio value: "+ str(portValue)+"\n\n" - #print "Current portfolio value: %.2f.\n\n"%(portValue) - if noisy and timersActive: - portValue = float (self.portfolio.calcPortfolioValue(self.currTimestamp, self.dataAccess)) #self.portfolio.currCash + self.strategyData.calculatePortValue(self.portfolio.currStocks,self.currTimestamp) - portfolioValList.append(portValue) - - print "Strategy at %i took %.4f secs"%(self.currTimestamp,(time.clock()-cycTime)) - print "Exec function took: " + str(afterExec - beforeExec) - print "Time for addorders: " + str(afterAddOrders - beforeAddOrders) - - print "Strategy at %d completed successfully." % self.currTimestamp - #print "Current cash: %.2f."%(self.portfolio.currCash) - print "Current cash: " + str(self.portfolio.currCash) - print "Current stocks: %s."%self.portfolio.currStocks - #print "Current portfolio value: %.2f.\n\n"%(portValue) - print "Current portfolio value: "+ str(portValue)+"\n\n" - i+=1 - cycTime = time.clock() - - - - #self.currTimestamp += self.interval -- Unfortunately this does not work becuase of daylight saving time complications - ctr+=1 - self.currTimestamp= timestamps[ctr] - #self.strategyData.currTimestamp = self.currTimestamp - if noisy: - print "Simulation complete." - if timersActive: - print "Simulation complete in %i seconds."%(time.time() - totalTime) - - self.portfolio.close() - self.position.close() - self.order.close() - #self.strategyData.close() - - - #plotting the portfolio value - fig = Figure() - canvas = FigureCanvas(fig) - ax = fig.add_subplot(111) - ax.plot (portfolioValList) - ax.set_title('Portfolio value') - ax.grid(True) - ax.set_xlabel('time') - ax.set_ylabel('$') - canvas.print_figure('portfolio') - - - - #def run ends - - -cash = 0; comPerShare = 0.0; minCom = 0.; startTime = 0; endTime = 0; timeStep = 0; maxEffect = 0.; decayCycles = 0 -noisy = False; timersActive = False; mtm = False; isTable = False; arrayFile = 'datafiles/defaultArrayFile.pk'; listOfStocksFile="someRandomString" -def main(): - global cash,comPerShare,minCom,startTime,endTime,timeStep,maxEffect,decayCycles,noisy,timersActive,mtm,isTable,arrayFile,listOfStocksFile - # NOTE: the OptionParser class is currently not necessary, as we can just access sys.argv[1:], but if we - # want to implement optional arguments, this will make it considerably easier. - parser = OptionParser() - - # parser.parse_args() returns a tuple of (options, args) - # As of right now, we don't have any options for our program, so we only care about the three arguments: - # config file, strategy module name, strategy main function name - args = parser.parse_args()[1] - -# if len(args) != 3 and len(args) != 2: -# print "FAILURE TO INCLUDE THE CORRECT NUMBER OF ARGUMENTS; TERMINATING." -# return - if len(args) != 1: - print "FAILURE TO INCLUDE THE CORRECT NUMBER OF ARGUMENTS; TERMINATING." - return - - - configFile = 'configfiles/'+args[0] -# if len(args) == 3: -# stratName = args[2] -# else: -# stratName = "strategyMain" - if noisy: - print "About to parse configuration files. Any invalid fields found in the user-specified file will use the relevant value from the default file instead." - for fileName in ["configfiles/default.ini",configFile]: - if noisy: - print "Parsing %s now..." % filename[12:] - thisFile = open(fileName,'r') - for line in thisFile.readlines(): - # Separate the command in the config file from the arguments - if not ('#' in line): - line = line.strip().split('=') - command = line[0].strip().upper() - if(command == 'ARRAYFILE' or command =='PYTABLESFILE'): - if len(line)>1: - vals = line[1].split() - else: - vals = [] - else: - if len(line)>1: - vals = line[1].upper().split() - else: - vals = [] - # Parse commands, look for correct number of arguments, do rudimentary error checking, apply to simulator as appropriate - if command == 'CASH': - if len(vals) != 1: - print "WRONG NUMBER OF ARGUMENTS FOR CASH!" - else: - try: - cash = float(vals[0]) - except ValueError: - print "ARGUMENT FOR CASH IS NOT A FLOAT!" - - # Code for handling stocks in a starting portfolio. Implementation not correct; removing for the time being. -# elif command == "STOCK": -# if len(vals) != 2: -# print "WRONG NUMBER OF ARGUMENTS FOR STOCK!! RAAAAWR! ALSO, I NEED TO LEARN TO THROW ERRORS!" -# else: -# try: -# stocks.append([vals[0],int(vals[1])]) -# except: -# print "STOCK TAKES IN A STOCK NAME AND AN INT! AND DON'T YOU FORGET IT!" - elif command == "COMPERSHARE": - if len(vals) != 1: - print "NEED EXACTLY ONE PARAMETER FOR COMMISSIONS PER SHARE." - else: - try: - comPerShare = float(vals[0]) - except ValueError: - print "COMMISSIONS PER SHARE REQUIRES A FLOAT INPUT" - elif command == "MINCOM": - if len(vals) != 1: - print "NEED EXACTLY ONE PARAMETER FOR MINIMUM COMMISSION." - else: - try: - minCom = float(vals[0]) - except ValueError: - print "MINIMUM COMMISSIONS REQUIRES A FLOAT INPUT" - elif command == "STARTTIME": - if len(vals) != 1: - print "NEED EXACTLY ONE PARAMETER FOR START TIME." - else: - try: - startTime = long(vals[0]) - except ValueError: - print "START TIME REQUIRES A LONG INPUT" - elif command == "ENDTIME": - if len(vals) != 1: - print "NEED EXACTLY ONE PARAMETER FOR END TIME." - else: - try: - endTime = long(vals[0]) - except ValueError: - print "END TIME REQUIRES A LONG INPUT" - elif command == "TIMESTEP": - if len(vals) != 1: - print "NEED EXACTLY ONE PARAMETER FOR TIME STEP." - else: - try: - timeStep = long(vals[0]) - except ValueError: - print "TIME STEP REQUIRES A LONG INPUT" - elif command == "MAXMARKETEFFECT": - if len(vals) != 1: - print "NEED EXACTLY ONE PARAMETER FOR MAX MARKET EFFECT." - else: - try: - maxEffect = float(vals[0]) - except ValueError: - print "MAX MARKET EFFECT REQUIRES A FLOAT INPUT" - elif command == "DECAYCYCLES": - if len(vals) != 1: - print "NEED EXACTLY ONE PARAMETER FOR DECAY CYCLES." - else: - try: - decayCycles = int(vals[0]) - except ValueError: - print "DECAY CYCLES REQUIRES AN INTEGER INPUT" - elif command == "DATATYPE": - if len(vals) != 1: - print "NEED EXACTLY ONE PARAMETER FOR DATATYPE." - else: - if vals[0] == "TABLE": - isTable = True - elif vals[0] == "ARRAY": - isTable = False - else: - print "%s IS NOT A VALID PARAMETER FOR DATATYPE." % vals[0] - elif command == "ARRAYFILE": - if len(vals) != 1: - print "NEED EXACTLY ONE PARAMETER FOR ARRAYFILE." - else: - try: - arrayFile = str(vals[0]) - except ValueError: - print "ARRAYFILE REQUIRES A STRING INPUT" - elif command == "PYTABLESFILE": - if len(vals) != 1: - print "NEED EXACTLY ONE PARAMETER FOR PYTABLESFILE." - else: - try: - pytablesFile = str(vals[0]) - except ValueError: - print "PYTABLESFILE REQUIRES A STRING INPUT" - elif command == "NOISY": - noisy = True - elif command == "TIMER": - timersActive = True - elif command == "MTM": - mtm = True - elif command == "LISTOFSTOCKSFILE": - listOfStocksFile= str (vals[0]) - if not (os.path.exists(listOfStocksFile)): - print "File containing list of stocks does not exist. Will read in all files at specified paths." -# raise ValueError - - elif command != '': - print "Unrecognized command '%s'." % command - thisFile.close() - if noisy: - print "Config files finished parsing. Starting simulation." - - - # Add the strategies subdirectory to the system path so Python can find the module - sys.path.append(sys.path[0] + '/strategies') -# myStrategy = eval("__import__('%s').%s" % (args[1],stratName) ) - mySim = Simulator(cash,{}, startTime, endTime, timeStep, minCom, comPerShare, isTable, maxEffect, arrayFile, listOfStocksFile) - # Add the timestamps - if isTable: - mySim.times = mySim.addTimeStamps() - #mySim.strategyData.timestampIndex = mySim.times - else: - pass - #mySim.times = mySim.strategyData.timestampIndex - mySim.run() - -# This ensures the main function runs automatically when the program is run from the command line, but -# not if the file somehow gets imported from something else. Nifty, eh? -if __name__ == "__main__": +#import optimizers.BollingerOptimizer as Optimizer +import optimizers.BollingerOptimizer as Optimizer +import models.PortfolioModel, models.PositionModel, models.OrderModel, models.StrategyDataModel +import tables as pt, numpy as np +from optparse import OptionParser +import sys, time +import Portfolio, Position, Order, DataAccess as da , StrategyData +import os +import dircache +import numpy as np +#import curveFittingOptimizer +#import optimizers.BollingerOptimizer as Optimizer + +from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas +from matplotlib.figure import Figure + + + +class Simulator(): + def __init__(self, cash, stocks, startTime, endTime, interval, minCom, comPerShare, isTable, maxEffect, arrayFile, listOfStocksFile): + # strategy contains a reference to the strategy method specified in the command line +# self.strategy = strategy + # startTime/endTime are the timestamps marking the beginning and end of the time for which the simulation should run + self.startTime = startTime + self.currTimestamp = startTime + self.endTime = endTime + # interval is the amount of time between iterations of the strategy + self.interval = interval + # minCom is the minimum commission per transaction + self.minCom = minCom + # comPerShare is the calculated commission per share--if this is greater than the minimum commission, this is what gets used + self.comPerShare = comPerShare + # timeStampIndex and currDataTimeIndex are markers to track the current position in the list of timestamps + self.timeStampIndex = 0 + self.currDataTimeIndex = 0 + # maxEffect is the maximum percentage change in price a single transaction can have on the actual market price + self.maxEffect = maxEffect + # times becomes the list of timestamps + self.times = [] + # isTable tells the simulator whether to use the table- or array-specific methods + self.isTable = isTable + + #starting portfolio, position, and order initializations + self.portfolio = Portfolio.Portfolio(cash, stocks) + self.position = Position.Position() + self.order = Order.Order(self.isTable) + #populate the strategyData with the relevant type of data storage + if isTable: + + +# self.h5f= pt.openFile(pytablesFile, mode = "a") # if mode ='w' is used here then the file gets overwritten! + + listOfPaths=list() + #listOfPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_NASDAQ\\") + #listOfPaths.append("C:\\temp\\") + #listOfPaths.append("C:\\tempoutput\\") + listOfPaths.append("/hzr71/research/QSData/tempdata/") #Modification for gekko + self.listOfStocks= self.getStocks(listOfStocksFile, listOfPaths) + + self.dataAccess= da.DataAccess (True, listOfPaths, "/StrategyData", "StrategyData", True, self.listOfStocks, self.startTime, self.endTime) + self.strategyData = StrategyData.StrategyData("someRandomStringToNotBreakTheCode", self.dataAccess, self.isTable) + +# else: +# self.strategyData = StrategyData.StrategyData(arrayFile,self.isTable) + + def getStocks(self, pathToFile, listOfPaths): + + listOfStocks=list() + if (os.path.exists(pathToFile)): + print("Reading in stock names from file...") + f= open(pathToFile) + lines= f.readlines() + f.close() + for line1 in lines: + listOfStocks.append(line1.partition("\n")[0]) + #for done + else: + #Path does not exist + print("Reading in all stock names...") + fileExtensionToRemove=".h5" + + for path in listOfPaths: + stocksAtThisPath=list () + + stocksAtThisPath= dircache.listdir(str(path)) + #Next, throw away everything that is not a .h5 And these are our stocks! + stocksAtThisPath = [x for x in stocksAtThisPath if (str(x).find(str(fileExtensionToRemove)) > -1)] + #Now, we remove the .h5 to get the name of the stock + stocksAtThisPath = [(x.partition(str(fileExtensionToRemove))[0]) for x in stocksAtThisPath] + + for stock in stocksAtThisPath: + listOfStocks.append(stock) + return listOfStocks + #readStocksFromFile done + + + + def addTimeStamps(self): + # generates the list of timestamps + global timersActive + temp = [] + + if timersActive: + print('Generating valid timestamps') + cnt = 0 + cycTime = time.time() +# for i in self.strategyData.strategyData.iterrows(): + for ts in self.dataAccess.getTimestampArray(): + if ts not in temp: + temp.append(ts) + if timersActive: + if(cnt%1000000==0): + print('%i rows finished: %i secs elapsed'%(cnt,time.time()-cycTime)) + cnt+=1 + if timersActive: + print('all rows added: %i secs elapsed'%(time.time()-cycTime)) + #Put the list in order, convert it to a NumPy array + temp.sort() + temp = np.array(temp) + return temp + + def calcCommission(self, volume): + ''' + @summary: returns the commission on a given trade given the volume + ''' + return max(minCom,volume * self.comPerShare) + + def getCurrentDataTimestamp(self): + ''' + @summary: returns the timestamp of the most recent data available + ''' + while self.times[self.currDataTimeIndex+1] 0: +# myStockasDict = stocks[0] #Grab the first dictionary in the list +# return myStockasDict['volume'] # Get the volume +# return None + + def buyStock(self, newOrder): + ''' + @summary: function takes in an instance of OrderDetails, executes the changes to the portfolio and adds the order to the order table + @param newOrder: an instance of OrderDetails representing the new order + @warning: The Order should not be added to the order table before calling this function + ''' + ts = self.getCurrentDataTimestamp() + maxVol4Day = self.dataAccess.getStockDataItem(newOrder['symbol'], 'volume', ts)#self.getVolumePerDay(newOrder['symbol'], ts) + if newOrder['order_type'] == 'moo': + #market order open +# price = strategyData.getPrice(ts, newOrder['symbol'], 'adj_open') + price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_open', ts) + if price == None or np.isnan (price): + if noisy: + print("Price data unavailable for ts:",ts,'stock:',newOrder['symbol']) + return None + elif maxVol4Day == None or np.isnan(maxVol4Day): + if noisy: + print("Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol']) + return None + else: + print("Checking cash...") + checkAmount = min(abs(newOrder['shares']),maxVol4Day) + # New is cost the original total price (price * shares) + effect*Total Price + # Basically, you raise the cost as you buy + cost = (checkAmount * price[0]['adj_open'] + (checkAmount * price[0]['adj_open'] * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) + if(cost>self.portfolio.currCash): + #Not enough cash to buy stock + print("Not enough cash to buy stock.") + #print "Apparently not enough cash. I don't believe this. Current cash: " + str (self.portfolio.currCash) + " total cost: "+ str (cost)+ ", cost of one share: "+str (self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_open', ts)) + + return None + if abs(newOrder['shares']) > maxVol4Day: + if newOrder['shares'] < 0: + newOrder['shares'] = -maxVol4Day + else: + newOrder['shares'] = maxVol4Day + newOrder.update() + self.order.order.flush() + #__execute trade__ + #populate fill field in order + newOrder['fill/timestamp'] = ts + newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'BUY') else -newOrder['shares'] + newOrder['fill/cashChange'] = -price + newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) + newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has + #add trade to portfolio + self.portfolio.buyTransaction(newOrder) + #add position + self.position.addPosition(ts,newOrder['symbol'],newOrder['fill/quantity'],price) + elif newOrder['order_type'] == 'moc': + #market order close +# price = self.strategyData.getPrice(ts, newOrder['symbol'], 'adj_close') +# price = self.dataAccess.getData(newOrder['symbol'], 'adj_close', ts, ts)[0]['adj_close'] + + + price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_close', ts) + if price == None or np.isnan (price): + if noisy: + print("Price data unavailable for ts:",ts,'stock:',newOrder['symbol']) + return None + elif maxVol4Day == None or np.isnan(maxVol4Day): + if noisy: + print("Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol']) + return None + else: + checkAmount = min(abs(newOrder['shares']),maxVol4Day) + # New is cost the original total price (price * shares) + effect*Total Price + # Basically, you raise the cost as you buy +# cost = (checkAmount + (checkAmount * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) + cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) + if(cost>self.portfolio.currCash): + #Not enough cash to buy stock + print("Not enough cash. Current cash: " + str (self.portfolio.currCash) + " total cost: "+ str (cost)+ ", cost of one share: "+str (self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_close', ts))) + + return None + if abs(newOrder['shares']) > maxVol4Day: + if newOrder['shares'] < 0: + newOrder['shares'] = -maxVol4Day + else: + newOrder['shares'] = maxVol4Day + newOrder.update() + self.order.order.flush() + newOrder['fill/timestamp'] = ts + newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'BUY') else -newOrder['shares'] + newOrder['fill/cashChange'] = -price + newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) + newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has + #add trade to portfolio + self.portfolio.buyTransaction(newOrder) + #add position + self.position.addPosition(ts,newOrder['symbol'],newOrder['fill/quantity'],price) + elif newOrder['order_type'] == 'limit': + #limit order + price = newOrder['limit_price'] + if price == None or np.isnan (price): + if noisy: + print("Price data unavailable for ts:",ts,'stock:',newOrder['symbol']) + return None + elif maxVol4Day == None or np.isnan(maxVol4Day): + if noisy: + print("Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol']) + return None + else: +# if ((newOrder['limit_price'] > self.strategyData.getPrice(ts, newOrder['symbol'], 'adj_high')) or ( newOrder['limit_price'] < self.strategyData.getPrice(ts, newOrder['symbol'], 'adj_low'))): + if ((newOrder['limit_price'] > self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_high', ts)) or ( newOrder['limit_price'] < self.dataAccess.getData(newOrder['symbol'], 'adj_low', ts))): + #limit price outside of daily range + return None + checkAmount = min(abs(newOrder['shares']),maxVol4Day) + # New is cost the original total price (price * shares) + effect*Total Price + # Basically, you raise the cost as you buy + cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) + if(cost>self.portfolio.currCash): + #Not enough cash to buy stock + return None + if abs(newOrder['shares']) > maxVol4Day: + if newOrder['shares'] < 0: + newOrder['shares'] = -maxVol4Day + else: + newOrder['shares'] = maxVol4Day + newOrder.update() + self.order.order.flush() + #__execute trade__ + #populate fill field in order + newOrder['fill/timestamp'] = ts + newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'BUY') else -newOrder['shares'] + newOrder['fill/cashChange'] = -price + newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) + newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has + #add trade to portfolio + self.portfolio.buyTransaction(newOrder) + #add position + self.position.addPosition(ts,newOrder['symbol'],newOrder['fill/quantity'],price) + elif newOrder['order_type'] == 'vwap': + #volume weighted average price +# price = strategyData.getPrice(ts, newOrder['symbol'], 'adj_open') +# price = self.dataAccess.getData(newOrder['symbol'], 'adj_open', ts, ts)[0]['adj_close'] + price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_open', ts) + if price == None or np.isnan (price): + if noisy: + print("Price data unavailable for ts:",ts,'stock:',newOrder['symbol']) + return None + elif maxVol4Day == None or np.isnan(maxVol4Day): + if noisy: + print("Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol']) + return None + else: + checkAmount = min(abs(newOrder['shares']),maxVol4Day) + # New is cost the original total price (price * shares) + effect*Total Price + # Basically, you raise the cost as you buy + price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_close', ts)#[0]['adj_close'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_close') + price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_high', ts)#[0]['adj_high'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_high') + price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_low', ts)#[0]['adj_low'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_low') + price = price / 4. + cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) + if(cost>self.portfolio.currCash): + #Not enough cash to buy stock + return None + if abs(newOrder['shares']) > maxVol4Day: + if newOrder['shares'] < 0: + newOrder['shares'] = -maxVol4Day + else: + newOrder['shares'] = maxVol4Day + newOrder.update() + self.order.order.flush() + # New is cost the original total price (price * shares) + effect*Total Price + # Basically, you raise the cost the more you buy. + #__execute trade__ + #populate fill field in order + newOrder['fill/timestamp'] = ts + newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'BUY') else -newOrder['shares'] + newOrder['fill/cashChange'] = -price + newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) + newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has + #add trade to portfolio + self.portfolio.buyTransaction(newOrder) + #add position + self.position.addPosition(ts,newOrder['symbol'],newOrder['fill/quantity'],price) + else: + #throw invalid type error + raise TypeError("Not an existing trade type '%s'." % str(newOrder['order_type'])) + newOrder.update() + self.order.order.flush() + return price + + def sellStock(self,newOrder): + ''' + @summary: function takes in an instance of OrderDetails, executes the changes to the portfolio and adds the order to the order table + @param newOrder: an instance of OrderDetails representing the new order + @warning: The Order should not be added to the order table before calling this function + ''' + ts = self.getCurrentDataTimestamp() #need a function to get the next available time we can trade + maxVol4Day = self.dataAccess.getStockDataItem(newOrder['symbol'], 'volume', ts)#self.getVolumePerDay(newOrder['symbol'], ts) + if newOrder['order_type'] == 'moo': + #market order open + price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_open', ts)#[0]['adj_open'] #self.strategyData.getPrice(ts, newOrder['symbol'], 'adj_open') + if price == None or np.isnan (price): + if noisy: + print("Price data unavailable for",ts,newOrder['symbol']) + return None + elif maxVol4Day == None or np.isnan(maxVol4Day): + if noisy: + print("Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol']) + return None + else: + checkAmount = min(abs(newOrder['shares']),maxVol4Day) + if newOrder['task'].upper() == 'SELL': + if not (self.portfolio.hasStock(newOrder['symbol'],checkAmount)): # NEW + #Not enough shares owned to sell requested amount + print("Not enough shares owned to sell the requested amount") + return None + else: + if not (self.portfolio.hasStock(newOrder['symbol'],-checkAmount)): # NEW + #Not enough shares owned to sell requested amount + print("Not enough shares owned to sell the requested amount") + return None + cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) + if(cost>self.portfolio.currCash) and (newOrder['shares'] < 0): + #Not enough cash to cover stock + print("Not enough cash to cover stock") + return None + #__execute trade__ + #populate fill field in order + if abs(newOrder['shares']) > maxVol4Day: + if newOrder['shares'] < 0: + newOrder['shares'] = -maxVol4Day + else: + newOrder['shares'] = maxVol4Day + newOrder.update() + self.order.order.flush() + newOrder['fill/timestamp'] = ts + newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'] + newOrder['fill/cashChange'] = price #NEW + newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) + newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has + #add trade to portfolio + self.portfolio.sellTransaction(newOrder) + #remove positions according to lifo/fifo + self.position.removePosition(newOrder['symbol'],newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'],newOrder['close_type']) + elif newOrder['order_type'] == 'moc': + #market order close + price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_close', ts)#[0]['adj_close'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_close') + if price == None or np.isnan (price): + if noisy: + print("Price data unavailable for",ts,newOrder['symbol']) + return None + elif maxVol4Day == None or np.isnan(maxVol4Day): + if noisy: + print("Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol']) + return None + else: + checkAmount = min(abs(newOrder['shares']),maxVol4Day) + if newOrder['shares'] > 0: + if not (self.portfolio.hasStock(newOrder['symbol'],checkAmount)): # NEW + #Not enough shares owned to sell requested amount + print("Not enough shares owned to sell the requested amount") + return None + else: + if not (self.portfolio.hasStock(newOrder['symbol'],-checkAmount)): # NEW + #Not enough shares owned to sell requested amount + print("Not enough shares owned to sell the requested amount") + return None + cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) + if(cost>self.portfolio.currCash) and (newOrder['shares'] < 0): + #Not enough cash to cover stock + print("Not enough cash to cover stock") + return None + #__execute trade__ + #populate fill field in order + if abs(newOrder['shares']) > maxVol4Day: + if newOrder['shares'] < 0: + newOrder['shares'] = -maxVol4Day + else: + newOrder['shares'] = maxVol4Day + newOrder.update() + self.order.order.flush() + newOrder['fill/timestamp'] = ts + newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'] + newOrder['fill/cashChange'] = price + newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) + newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has + #add trade to portfolio + self.portfolio.sellTransaction(newOrder) + #remove positions according to lifo/fifo + self.position.removePosition(newOrder['symbol'],newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'],newOrder['close_type']) + elif newOrder['order_type'] == 'limit': + #limit order + price = newOrder['limit_price'] + if price == None or np.isnan (price): + if noisy: + print("Price data unavailable for",ts,newOrder['symbol']) + return None + elif maxVol4Day == None or np.isnan(maxVol4Day): + if noisy: + print("Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol']) + return None + else: + checkAmount = min(abs(newOrder['shares']),maxVol4Day) + if newOrder['shares'] > 0: + if not (self.portfolio.hasStock(newOrder['symbol'],checkAmount)): # NEW + #Not enough shares owned to sell requested amount + print("Not enough shares owned to sell the requested amount") + return None + else: + if not (self.portfolio.hasStock(newOrder['symbol'],-checkAmount)): # NEW + #Not enough shares owned to sell requested amount + return None + cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) + if(cost>self.portfolio.currCash) and (newOrder['shares'] < 0): + #Not enough cash to cover stock + print("Not enough cash to cover stock") + return None + #__execute trade__ + #populate fill field in order +# if ((newOrder['limit_price'] > strategyData.getPrice(ts, newOrder['symbol'], 'adj_high')) or ( newOrder['limit_price'] < strategyData.getPrice(ts, newOrder['symbol'], 'adj_low'))): + if ((newOrder['limit_price'] > self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_high', ts)) or ( newOrder['limit_price'] < self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_low', ts))): + #limit price outside of daily range + return None + if abs(newOrder['shares']) > maxVol4Day: + if newOrder['shares'] < 0: + newOrder['shares'] = -maxVol4Day + else: + newOrder['shares'] = maxVol4Day + newOrder.update() + self.order.order.flush() + #__execute trade__ + #populate fill field in order + newOrder['fill/timestamp'] = ts + newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'] + newOrder['fill/cashChange'] = price + newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) + newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has + #add trade to portfolio + self.portfolio.sellTransaction(newOrder) + #remove positions according to lifo/fifo + self.position.removePosition(newOrder['symbol'],newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'],newOrder['close_type']) + elif newOrder.order_type == 'vwap': + #volume weighted average price + price = self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_open', ts)#[0]['adj_open'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_open') + if price == None or np.isnan (price): + if noisy: + print("Price data unavailable for",ts,newOrder['symbol']) + return None + elif maxVol4Day == None or np.isnan(maxVol4Day): + if noisy: + print("Volume Data Not Available for ts:", ts, 'stock:', newOrder['symbol']) + return None + else: + checkAmount = min(abs(newOrder['shares']),maxVol4Day) + if newOrder['shares'] > 0: + if not (self.portfolio.hasStock(newOrder['symbol'],checkAmount)): # NEW + #Not enough shares owned to sell requested amount + print("Not enough shares owned to sell the requested amount") + return None + else: + if not (self.portfolio.hasStock(newOrder['symbol'],-checkAmount)): # NEW + #Not enough shares owned to sell requested amount + print("Not enough shares owned to sell the requested amount") + return None + price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_close', ts)#[0]['adj_close'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_close') + price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_high', ts)#[0]['adj_high'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_high') + price += self.dataAccess.getStockDataItem(newOrder['symbol'], 'adj_low', ts)#[0]['adj_low'] #strategyData.getPrice(ts, newOrder['symbol'], 'adj_low') + price = price / 4. + cost = (checkAmount * price + (checkAmount * price * self.calcEffect(maxVol4Day, checkAmount))) + self.calcCommission(checkAmount) + if(cost>self.portfolio.currCash) and (newOrder['shares'] < 0): + #Not enough cash to cover stock + print("Not enough cash to cover stock") + return None + #__execute trade__ + #populate fill field in order + if abs(newOrder['shares']) > maxVol4Day: + if newOrder['shares'] < 0: + newOrder['shares'] = -maxVol4Day + else: + newOrder['shares'] = maxVol4Day + newOrder.update() + self.order.order.flush() + newOrder['fill/timestamp'] = ts + newOrder['fill/quantity'] = newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'] + newOrder['fill/cashChange'] = price + newOrder['fill/commission'] = self.calcCommission(newOrder['shares']) + newOrder['fill/impactCost'] = newOrder['shares'] * price * self.calcEffect(maxVol4Day, newOrder['shares']) # This is the CHANGE in the total cost - what effect the volume has + #add trade to portfolio + self.portfolio.sellTransaction(newOrder) + #remove positions according to lifo/fifo + self.position.removePosition(newOrder['symbol'],newOrder['shares'] if (newOrder['task'].upper() == 'SELL') else -newOrder['shares'],newOrder['close_type']) + else: + #throw invalid type error + raise TypeError("Not an existing trade type '%s'." % str(newOrder.order_type)) + newOrder.update() + self.order.order.flush() + return price + + def execute(self): + ''' + @summary: This function iterates through the orders and attempts to execute all the ones that are still valid and unfilled + ''' + count = 0 + for order in self.order.getOrders(): + if (order['timestamp'] < self.currTimestamp): + if (order['duration'] + order['timestamp']) >= self.currTimestamp: + if order['fill/timestamp'] == 0: + #Have unfilled, valid orders + if order['task'].upper() == "BUY": + #is a buy + if self.portfolio.hasStock(order['symbol'],1): + if order['shares']>0: + result = self.buyStock(order) + if noisy: + if result is not None: + print("Succeeded in buying %d shares of %s for %.2f as %s, with close type %s. Placed at: %d. Current timestamp: %d, order #%d" % (order['shares'], order['symbol'], result, order['order_type'], order['close_type'], order['timestamp'], self.currTimestamp, count)) + #else: + #print "THIS IS MOST LIKELY WRONG- Did not succeed in buying %d shares of %s as %s; not enough cash. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count) + else: + if noisy: + print("Did not succeed in buying %d shares of %s as %s; negative values are not valid buy amounts. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count)) + elif self.portfolio.hasStock(order['symbol'],-1): + if noisy: + print("Did not succeed in buying %d shares of %s as %s; you must cover your shortsell before you can buy. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count)) + else: + result = self.buyStock(order) + if noisy: + if result: + print("Succeeded in buying %d shares of %s for %.2f as %s. Placed at: %d. Current timestamp: %d, order #%d" % (order['shares'], order['symbol'], result, order['order_type'], order['timestamp'], self.currTimestamp, count)) + else: + print("Did not succeed in buying %d shares of %s as %s. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count)) + elif order['task'].upper() == "SELL": + # is a sell + if order['shares']>0: + result = self.sellStock(order) + if noisy: + if result: + print("Succeeded in selling %d shares of %s for %.2f as %s, with close type %s. Current timestamp: %d" % (order['shares'], order['symbol'], result, order['order_type'], order['close_type'], self.currTimestamp)) + #else: + #print "Did not succeed in selling %d shares of %s as %s; not enough owned. Order valid until %d. Current timestamp: %d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], self.currTimestamp) + else: + if noisy: + print("Did not succeed in selling %d shares of %s as %s; you cannot sell a non-positive amount. Order valid until %d. Current timestamp: %d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], self.currTimestamp)) + elif order['task'].upper() == "SHORT": + #is a short sell + if self.portfolio.hasStock(order['symbol'],-1): + if order['shares']>0: + result = self.buyStock(order) + if noisy: + if result: + print("Succeeded in short selling %d shares of %s for %.2f as %s, with close type %s. Placed at: %d. Current timestamp: %d, order #%d" % (-order['shares'], order['symbol'], -result, order['order_type'], order['close_type'], order['timestamp'], self.currTimestamp, count)) + else: + print("Did not succeed in short selling %d shares of %s as %s; not enough cash??? How do you not have enough cash for a short sell?. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count)) + else: + if noisy: + print("Did not succeed in short selling %d shares of %s as %s; negative values are not valid short sell amounts. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(-order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count)) + elif self.portfolio.hasStock(order['symbol'],1): + if noisy: + print("Did not succeed in short selling %d shares of %s as %s; you cannot short sell a stock you already own. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(-order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count)) + else: + result = self.buyStock(order) + if noisy: + if result: + print("Succeeded in short selling %d shares of %s for %.2f as %s, with close type %s. Placed at: %d. Current timestamp: %d, order #%d" % (-order['shares'], order['symbol'], result, order['order_type'], order['close_type'], order['timestamp'], self.currTimestamp, count)) + else: + print("Did not succeed in short selling %d shares of %s as %s; not enough cash??? How do you not have enough cash for a short sell?. Order valid until %d. Placed at: %d. Current timestamp: %d, order #%d" %(-order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], order['timestamp'], self.currTimestamp, count)) + elif order['task'].upper() == "COVER": + # is a cover + if order['shares']>0: + result = self.sellStock(order) + if noisy: + if result: + print("Succeeded in covering %d shares of %s for %.2f as %s, with close type %s. Current timestamp: %d" % (-order['shares'], order['symbol'], result, order['order_type'], order['close_type'], self.currTimestamp)) + else: + print("Did not succeed in covering %d shares of %s as %s; not short enough or not enough cash. Order valid until %d. Current timestamp: %d" %(-order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], self.currTimestamp)) + else: + if noisy: + print("Did not succeed in covering %d shares of %s as %s; you cannot cover a non-positive amount. Order valid until %d. Current timestamp: %d" %(-order['shares'], order['symbol'], order['order_type'], order['duration'] + order['timestamp'], self.currTimestamp)) + else: + if noisy: + print("'%s' is not a valid task. Order valid until %d. Current timestamp: %d" % (order['task'].upper(), order['duration'] + order['timestamp'], self.currTimestamp)) + count += 1 + + + def addOrders(self,commands): + ''' + @summary: takes in commands (return value of strategy), parses it, and adds it in the correct format to the order data storage + ''' + if self.isTable: + for stock in commands: + newOrder = self.order.addOrder(self.getExecutionTimestamp(),stock[0],stock[1],stock[2],stock[3],stock[4],stock[5],stock[6]) + newOrder.append() + self.order.order.flush() + else: + for stock in commands: + self.order.addOrder(self.getExecutionTimestamp(),stock[0],stock[1],stock[2],stock[3],stock[4],stock[5],stock[6]) + + def run(self): + ''' + @summary: Run the simulation + ''' + + optimizer= Optimizer.Optimizer(self.listOfStocks) + #optimizer= curveFittingOptimizer.Optimizer(self.listOfStocks) + timestamps= list(self.dataAccess.getTimestampArray()) + portfolioValList= list() + + ctr=0 + while (timestamps[ctr]< self.startTime): + ctr+=1 + #while loop done + + + self.currTimestamp = timestamps[ctr] #self.startTime + + ctr2= ctr + + while (timestamps[ctr2]< self.endTime): + ctr2+=1 + if (ctr2>= len(timestamps)): + break + + #while loop done + + if (ctr2>= len (timestamps)): + self.endTime= timestamps[ctr2-1] + else: + self.endTime= timestamps[ctr2] + + if timersActive: + print("Simulation timer started at "+ str(self.currTimestamp)) + totalTime = time.time() + cycTime = time.clock() + +# self.strategyData.currTimestamp = self.currTimestamp + i=1 + while self.currTimestamp < self.endTime and self.currTimestamp < time.time(): # and self.currTimestamp < self.strategyData.timestampIndex[len(self.strategyData.timestampIndex)-2]: ************POSSIBLE BUG***** JUST TRYING OUT + # While not yet reached the end timestamp AND not yet caught up to present AND not yet reached the end of the data + # execute the existing orders, then run the strategy and add the new orders + + + + beforeExec=time.clock() + self.execute() + afterExec= time.clock() +# self.addOrders(self.strategy(self.portfolio,self.position,self.currTimestamp,self.strategyData)) +# self.addOrders(optimizer.execute(self.portfolio,self.position,self.currTimestamp,self.strategyData)) + beforeAddOrders= time.clock() + self.addOrders(optimizer.execute(self.portfolio,self.position,self.currTimestamp,self.strategyData, self.dataAccess)) + afterAddOrders= time.clock() + + if noisy or timersActive: + print('') #newline + if mtm: + #portValue = self.portfolio.currCash + self.strategyData.calculatePortValue(self.portfolio.currStocks,self.currTimestamp) + portValue= float (0.0) + print("| %i %.2f |"%(self.currTimestamp,portValue) + " Value from portfolio class: " + str (self.portfolio.calcPortfolioValue(self.currTimestamp, self.dataAccess))) + if timersActive and not noisy: + print("Strategy at %i took %.4f secs"%(self.currTimestamp,(time.clock()-cycTime))) + i+=1 + cycTime = time.clock() + if noisy and not timersActive: + portValue = (self.portfolio.calcPortfolioValue(self.currTimestamp, self.dataAccess)) #self.portfolio.currCash + self.strategyData.calculatePortValue(self.portfolio.currStocks,self.currTimestamp) + portfolioValList.append(portValue) + + print("Strategy at %d completed successfully." % self.currTimestamp) + print("Current cash: " + str(self.portfolio.currCash)) + print("Current stocks: %s."%self.portfolio.currStocks) + print("Current portfolio value: "+ str(portValue)+"\n\n") + #print "Current portfolio value: %.2f.\n\n"%(portValue) + if noisy and timersActive: + portValue = float (self.portfolio.calcPortfolioValue(self.currTimestamp, self.dataAccess)) #self.portfolio.currCash + self.strategyData.calculatePortValue(self.portfolio.currStocks,self.currTimestamp) + portfolioValList.append(portValue) + + print("Strategy at %i took %.4f secs"%(self.currTimestamp,(time.clock()-cycTime))) + print("Exec function took: " + str(afterExec - beforeExec)) + print("Time for addorders: " + str(afterAddOrders - beforeAddOrders)) + + print("Strategy at %d completed successfully." % self.currTimestamp) + #print "Current cash: %.2f."%(self.portfolio.currCash) + print("Current cash: " + str(self.portfolio.currCash)) + print("Current stocks: %s."%self.portfolio.currStocks) + #print "Current portfolio value: %.2f.\n\n"%(portValue) + print("Current portfolio value: "+ str(portValue)+"\n\n") + i+=1 + cycTime = time.clock() + + + + #self.currTimestamp += self.interval -- Unfortunately this does not work becuase of daylight saving time complications + ctr+=1 + self.currTimestamp= timestamps[ctr] + #self.strategyData.currTimestamp = self.currTimestamp + if noisy: + print("Simulation complete.") + if timersActive: + print("Simulation complete in %i seconds."%(time.time() - totalTime)) + + self.portfolio.close() + self.position.close() + self.order.close() + #self.strategyData.close() + + + #plotting the portfolio value + fig = Figure() + canvas = FigureCanvas(fig) + ax = fig.add_subplot(111) + ax.plot (portfolioValList) + ax.set_title('Portfolio value') + ax.grid(True) + ax.set_xlabel('time') + ax.set_ylabel('$') + canvas.print_figure('portfolio') + + + + #def run ends + + +cash = 0; comPerShare = 0.0; minCom = 0.; startTime = 0; endTime = 0; timeStep = 0; maxEffect = 0.; decayCycles = 0 +noisy = False; timersActive = False; mtm = False; isTable = False; arrayFile = 'datafiles/defaultArrayFile.pk'; listOfStocksFile="someRandomString" +def main(): + global cash,comPerShare,minCom,startTime,endTime,timeStep,maxEffect,decayCycles,noisy,timersActive,mtm,isTable,arrayFile,listOfStocksFile + # NOTE: the OptionParser class is currently not necessary, as we can just access sys.argv[1:], but if we + # want to implement optional arguments, this will make it considerably easier. + parser = OptionParser() + + # parser.parse_args() returns a tuple of (options, args) + # As of right now, we don't have any options for our program, so we only care about the three arguments: + # config file, strategy module name, strategy main function name + args = parser.parse_args()[1] + +# if len(args) != 3 and len(args) != 2: +# print "FAILURE TO INCLUDE THE CORRECT NUMBER OF ARGUMENTS; TERMINATING." +# return + if len(args) != 1: + print("FAILURE TO INCLUDE THE CORRECT NUMBER OF ARGUMENTS; TERMINATING.") + return + + + configFile = 'configfiles/'+args[0] +# if len(args) == 3: +# stratName = args[2] +# else: +# stratName = "strategyMain" + if noisy: + print("About to parse configuration files. Any invalid fields found in the user-specified file will use the relevant value from the default file instead.") + for fileName in ["configfiles/default.ini",configFile]: + if noisy: + print("Parsing %s now..." % filename[12:]) + thisFile = open(fileName,'r') + for line in thisFile.readlines(): + # Separate the command in the config file from the arguments + if not ('#' in line): + line = line.strip().split('=') + command = line[0].strip().upper() + if(command == 'ARRAYFILE' or command =='PYTABLESFILE'): + if len(line)>1: + vals = line[1].split() + else: + vals = [] + else: + if len(line)>1: + vals = line[1].upper().split() + else: + vals = [] + # Parse commands, look for correct number of arguments, do rudimentary error checking, apply to simulator as appropriate + if command == 'CASH': + if len(vals) != 1: + print("WRONG NUMBER OF ARGUMENTS FOR CASH!") + else: + try: + cash = float(vals[0]) + except ValueError: + print("ARGUMENT FOR CASH IS NOT A FLOAT!") + + # Code for handling stocks in a starting portfolio. Implementation not correct; removing for the time being. +# elif command == "STOCK": +# if len(vals) != 2: +# print "WRONG NUMBER OF ARGUMENTS FOR STOCK!! RAAAAWR! ALSO, I NEED TO LEARN TO THROW ERRORS!" +# else: +# try: +# stocks.append([vals[0],int(vals[1])]) +# except: +# print "STOCK TAKES IN A STOCK NAME AND AN INT! AND DON'T YOU FORGET IT!" + elif command == "COMPERSHARE": + if len(vals) != 1: + print("NEED EXACTLY ONE PARAMETER FOR COMMISSIONS PER SHARE.") + else: + try: + comPerShare = float(vals[0]) + except ValueError: + print("COMMISSIONS PER SHARE REQUIRES A FLOAT INPUT") + elif command == "MINCOM": + if len(vals) != 1: + print("NEED EXACTLY ONE PARAMETER FOR MINIMUM COMMISSION.") + else: + try: + minCom = float(vals[0]) + except ValueError: + print("MINIMUM COMMISSIONS REQUIRES A FLOAT INPUT") + elif command == "STARTTIME": + if len(vals) != 1: + print("NEED EXACTLY ONE PARAMETER FOR START TIME.") + else: + try: + startTime = int(vals[0]) + except ValueError: + print("START TIME REQUIRES A LONG INPUT") + elif command == "ENDTIME": + if len(vals) != 1: + print("NEED EXACTLY ONE PARAMETER FOR END TIME.") + else: + try: + endTime = int(vals[0]) + except ValueError: + print("END TIME REQUIRES A LONG INPUT") + elif command == "TIMESTEP": + if len(vals) != 1: + print("NEED EXACTLY ONE PARAMETER FOR TIME STEP.") + else: + try: + timeStep = int(vals[0]) + except ValueError: + print("TIME STEP REQUIRES A LONG INPUT") + elif command == "MAXMARKETEFFECT": + if len(vals) != 1: + print("NEED EXACTLY ONE PARAMETER FOR MAX MARKET EFFECT.") + else: + try: + maxEffect = float(vals[0]) + except ValueError: + print("MAX MARKET EFFECT REQUIRES A FLOAT INPUT") + elif command == "DECAYCYCLES": + if len(vals) != 1: + print("NEED EXACTLY ONE PARAMETER FOR DECAY CYCLES.") + else: + try: + decayCycles = int(vals[0]) + except ValueError: + print("DECAY CYCLES REQUIRES AN INTEGER INPUT") + elif command == "DATATYPE": + if len(vals) != 1: + print("NEED EXACTLY ONE PARAMETER FOR DATATYPE.") + else: + if vals[0] == "TABLE": + isTable = True + elif vals[0] == "ARRAY": + isTable = False + else: + print("%s IS NOT A VALID PARAMETER FOR DATATYPE." % vals[0]) + elif command == "ARRAYFILE": + if len(vals) != 1: + print("NEED EXACTLY ONE PARAMETER FOR ARRAYFILE.") + else: + try: + arrayFile = str(vals[0]) + except ValueError: + print("ARRAYFILE REQUIRES A STRING INPUT") + elif command == "PYTABLESFILE": + if len(vals) != 1: + print("NEED EXACTLY ONE PARAMETER FOR PYTABLESFILE.") + else: + try: + pytablesFile = str(vals[0]) + except ValueError: + print("PYTABLESFILE REQUIRES A STRING INPUT") + elif command == "NOISY": + noisy = True + elif command == "TIMER": + timersActive = True + elif command == "MTM": + mtm = True + elif command == "LISTOFSTOCKSFILE": + listOfStocksFile= str (vals[0]) + if not (os.path.exists(listOfStocksFile)): + print("File containing list of stocks does not exist. Will read in all files at specified paths.") +# raise ValueError + + elif command != '': + print("Unrecognized command '%s'." % command) + thisFile.close() + if noisy: + print("Config files finished parsing. Starting simulation.") + + + # Add the strategies subdirectory to the system path so Python can find the module + sys.path.append(sys.path[0] + '/strategies') +# myStrategy = eval("__import__('%s').%s" % (args[1],stratName) ) + mySim = Simulator(cash,{}, startTime, endTime, timeStep, minCom, comPerShare, isTable, maxEffect, arrayFile, listOfStocksFile) + # Add the timestamps + if isTable: + mySim.times = mySim.addTimeStamps() + #mySim.strategyData.timestampIndex = mySim.times + else: + pass + #mySim.times = mySim.strategyData.timestampIndex + mySim.run() + +# This ensures the main function runs automatically when the program is run from the command line, but +# not if the file somehow gets imported from something else. Nifty, eh? +if __name__ == "__main__": main() \ No newline at end of file diff --git a/Legacy/Legacy/simulator/StrategyData.py b/Legacy/Legacy/simulator/StrategyData.py index 3aef6f047..d23b889ed 100644 --- a/Legacy/Legacy/simulator/StrategyData.py +++ b/Legacy/Legacy/simulator/StrategyData.py @@ -1,447 +1,447 @@ -import tables as pt, numpy as np, pickle -from models.StrategyDataModel import StrategyDataModel -import numpy as np - -class StrategyData: - def __init__(self,dataFile, dataAccess, isTable = False): - ''' - @param dataFile: The filename of the data file (array or pytables) - @param isTable: The runtype, true for table false for array - @param dataAcess: is a DataAccess object that is used to access the stockData - ''' - #for pytables - self.isTable = isTable - self.currTimestamp = 0 - self.dataAccess=dataAccess - if(isTable): - isTable #do nothing.. just so that I don't have to remove the if/else -# self.strategyDataFile = pt.openFile(dataFile, mode = "r") -# self.strategyData = self.strategyDataFile.root.StrategyData.StrategyData -# self.timestampIndex = None -# self.stocksIndex = self.findStocks() - else: - self.prevTsIdx = 0 - f = open(dataFile,'r') - ts = pickle.load(f) - st = pickle.load(f) - pA = pickle.load(f) - f.close() - self.symbolIndex = st - self.timestampIndex = ts - self.priceArray = pA - - def findStocks(self): - ''' - @summary: Populates the symbolIndex for table run - ''' - temp = [] - for i in self.strategyData.iterrows(): - if i['symbol'] not in temp: - temp.append(self.cloneRow(i)['symbol']) - temp.sort() - return temp - - def calculatePortValue(self,stocks,timestamp): - ''' - @param stocks: the current stocks you hold as represented by currStocks in portfolio - @param timestamp: the timestamp used to calculate the present value of stocks - @summary: Calculates the current portfolio value: cash + stocks. If the value of a stock on a particular day is NaN then it keeps going back in time (upto 20 days) to find the first nonNan stock value. If a non NaN value is not found then the value of the portfolio is NaN - - ''' - - total=0 - DAY=86400 - for stock in stocks: - priceOfStock= self.dataAccess.getStockDataItem (stock, 'adj_close', timestamp- DAY) #close of previous day - if not(np.isnan(priceOfStock)): - total+= priceOfStock - else: - - #Keep looking back in time till we get a non NaN closing value - ctr=2 - while (np.isnan(priceOfStock) and ctr < 20): - priceOfStock= self.dataAccess.getStockDataItem (stock, 'adj_close', timestamp- (ctr*DAY)) - ctr+=1 - - if np.isnan(priceOfStock): - return np.NaN - - total+= priceOfStock - - return total - - - -# total = 0 -# for stock in stocks: -# prices = self.dataAccess.getStockDataList(stock, 'adj_close', timestamp- 86400, timestamp) #self.getPrices(timestamp - 86400, timestamp, stock, 'adj_close') -# i = 86400 -# count = 0 -# while(len(prices)==0 and count<10): -# prices = self.dataAccess.getStockDataList(stock, 'adj_close',timestamp - i - 86400, timestamp - i) #self.getPrices(timestamp - i, timestamp - i - 86400, stock, 'adj_close') -# i += 86400 -# count+=1 -# if(len(prices) != 0): -# total += prices[len(prices)-1] * stocks[stock] -# return total - #calculatePortValue -# def getStocks(self, startTime=None, endTime=None, ticker=None): -# ''' -# Returns a list of dictionaries that contain all of the valid stock data as keys -# or an empty list if no results are found -# Can be called independently or used as part of the getPrices function -# startTime: checks stocks >= startTime -# endTime: checks stocks <= endTime -# ticker: the ticker/symbol of the stock or a list of tickers -# ''' -# if self.isTable: -# if endTime == None: -# endTime = self.currTimestamp -# if endTime > self.currTimestamp: -# print 'Tried to access a future time %i, endTime set to %i' %(endTime, self.currTimestamp) -# endTime = self.currTimestamp -# tempList = [] -# if(ticker!=None): -# if(type(ticker)==str): -# for row in self.strategyData.where('symbol=="%s"'%ticker): -# if(startTime!=None and endTime!=None): -# if(row['timestamp']>=startTime and row['timestamp']=startTime and row['timestamp']=startTime and row['timestamp']=startTime and row['timestamp']=startTime and row['timestamp']=startTime and row['timestamp']= startTime -# endTime: checks stocks <= endTime -# ticker: the ticker/symbol of the stock or a list of tickers -# description: the field from data that is desired IE. adj_high -# ''' -# if self.isTable: -# rows = self.getStocks(startTime, endTime, ticker) -# result = [] -# if(description==None): -# for row in rows: -# row = self.cloneRow(row) -# result.append((row['adj_high'],row['adj_low'],row['adj_open'],row['adj_close'],row['close'])) -# else: -# for row in rows: -# result.append(self.cloneRow(row)[description]) -# return result -# else: -# return self.getPricesArray(startTime, endTime, ticker, description) - - def cloneRow(self,row): - ''' - @summary: Makes a copy of the row so that the correct information will be appended to the list - ''' - dct = {} - dct['symbol'] = row['symbol'] - dct['exchange'] = row['exchange'] - dct['adj_high'] = row['adj_high'] - dct['adj_low'] = row['adj_low'] - dct['adj_open'] = row['adj_open'] - dct['adj_close'] = row['adj_close'] - dct['close'] = row['close'] - dct['volume'] = row['volume'] - dct['timestamp'] = row['timestamp'] - dct['date'] = row['date'] - dct['interval'] = row['interval'] - return dct - -# def getStocksArray(self, startTime=None, endTime=None, ticker=None): -# ''' -# Returns a list of dictionaries that contain all of the valid stock data as keys -# or an empty list if no results are found -# Can be called independently or used as part of the getPrices function -# startTime: checks stocks >= startTime -# endTime: checks stocks <= endTime -# ticker: the ticker/symbol of the stock or a list of tickers -# ''' -# if endTime == None: -# endTime = self.currTimestamp -# if endTime > self.currTimestamp: -# print 'Tried to access a future time %i, endTime set to %i' %(endTime, self.currTimestamp) -# endTime = self.currTimestamp -# if ticker != None: -# if type(ticker)==str: -# tickIdxList = [] -# tickerIdx = self.symbolIndex.searchsorted(ticker) -# if tickerIdx < self.symbolIndex.size and self.symbolIndex[tickerIdx] == ticker: -# tickIdxList.append(tickerIdx) -# elif type(ticker)==list: -# for tick in tickerIdx: -# tickerIdx = self.symbolIndex.searchsorted(ticker) -# if tickerIdx < self.symbolIndex.size and self.symbolIndex[tickerIdx] == ticker: -# tickIdxList.append(tickerIdx) -# else: -# tickerIdx = None -# if startTime != None: -# startIdx = self.timestampIndex.searchsorted(startTime, 'left') -# else: -# startIdx = None -# if endTime != None: -# endIdx = self.timestampIndex.searchsorted(endTime, 'left') -# else: -# endIdx = None -# if tickerIdx != None: -# result = np.array([]) -# for tickerIdx in tickIdxList: -# result = np.append(result,self.priceArray[startIdx:endIdx,tickerIdx]) -# return result -# else: -# result = self.priceArray[startIdx:endIdx,:] -# if len(result) ==0: -# return [] -# else: -# return result[0] - - -# def getPriceArray(self, timestamp, ticker, description): -# ''' -# timestamp: the exact timestamp of the desired stock data -# ticker: the ticker/symbol of the stock -# description: the field from data that is desired IE. adj_high -# NOTE: If the data is incorrect or invalid, the function will return None -# ''' -# tsIdx = self.timestampIndex.searchsorted(timestamp) -# if tsIdx >= self.timestampIndex.size or self.timestampIndex[tsIdx] != timestamp: -# return None #NaN -# tickerIdx = self.symbolIndex.searchsorted(ticker) -# if tickerIdx >= self.symbolIndex.size or self.symbolIndex[tickerIdx] != ticker: -# return None #NaN -# return self.priceArray[tsIdx,tickerIdx][description] - -# def getPricesArray(self, startTime=None, endTime=None, ticker=None, description=None): -# ''' -# Returns a list of prices for the given description: [adj_high0, adj_high1, adj_high2...] -# or a tuple if no description is given: [ (adj_high0, adj_low0, adj_open0, adj_close0, close0), (adj_high1, adj_low1...), .... ] -# startTime: checks stocks >= startTime -# endTime: checks stocks <= endTime -# ticker: the ticker/symbol of the stock or a list of tickers -# description: the field from data that is desired IE. adj_high -# description: -# ''' -# rows = self.getStocksArray(startTime, endTime, ticker) -# result = [] -# if(description==None): -# for row in rows: -# result.append((row['adj_high'],row['adj_low'],row['adj_open'],row['adj_close'],row['close'])) -# else: -# for row in rows: -# result.append(row[description]) -# return result - - def close(self): - if self.isTable: - self.isTable -# self.strategyDataFile.close() - - class OutputOrder: - ''' - @summary: Subclass to make adding strategies easier - ''' - def __init__(self,symbol = "",volume = 0,task = "",duration = 0,closeType = "",orderType = "",limitPrice = 0): - self.symbol = symbol - self.volume = volume - self.task = task - self.duration = duration - self.closeType = closeType - self.orderType = orderType - self.limitPrice = limitPrice - - def getOutput(self): - if self.symbol == "" or type(self.symbol) != str: - print "Invalid symbol %s in output." % str(self.symbol) - return None - if self.volume == 0 or type(self.volume) != int: - print "Invalid volume %s in output." % str(self.volume) - return None - if self.task == "" or type(self.task) != str: - print "Invalid task %s in output." % str(self.task) - return None - if self.duration <= 0 or type(self.duration) != int: - print "Invalid duration %s in output." % str(self.duration) - return None - if self.orderType == "" or type(self.orderType) != str: - print "Invalid orderType %s in output." % str(self.orderType) - return None - if type(self.task) != str: - print "Invalid closeType %s specified." % str(self.task) - return None - if self.task.upper() == "SELL" or self.task.upper() == "COVER": - if self.closeType == "" or type(self.closeType) != str: - print "Invalid closeType %s specified for %s." % (str(self.closeType),self.task) - return None - if type(self.orderType) != str: - print "Invalid orderType %s specified." % str(self.orderType) - if self.orderType.upper() == "LIMIT": - if self.limitPrice == 0 or type(self.limitPrice) != int: - print "Invalid limitPrice specified." - return None - if self.task.upper() not in ["BUY","SELL","SHORT","COVER"]: - print "Invalid task %s specified." %self.task - return None - if self.orderType.upper() not in ["LIMIT","MOC","MOO","VWAP"]: - print "Invalid orderType %s specified." % self.orderType - return None - return (self.task,self.volume,self.symbol,self.orderType,self.duration,self.closeType,self.limitPrice) - - #END OutputOrder SUBLCLASS - - -def generateKnownArray(): - timestamps = np.array([]) - stocks = np.array([]) - for i in range(10,100): - timestamps = np.append(timestamps, i*86400) - for i in range(3): - stocks = np.append(stocks,'stock%i'%i) - priceArray = np.ndarray(shape=(timestamps.size,stocks.size),dtype=np.object) - for i in range(timestamps.size): - for j in range(stocks.size): - row = {} - row['exchange'] = 'NYSE' - row['symbol'] = stocks[j] - row['adj_open'] = (timestamps[i]/86400) * (j+1) - row['adj_close'] = (timestamps[i]/86400) * (j+1) - row['adj_high'] = (timestamps[i]/86400) * (j+1) - row['adj_low'] = (timestamps[i]/86400) * (j+1) - row['close'] = (timestamps[i]/86400) * (j+1) - row['volume'] = 200 - row['timestamp'] = timestamps[i] - row['when_available'] = timestamps[i] - row['interval'] = 86400 - priceArray[i,j] = row - return (timestamps, stocks, priceArray) -def generateRandomArray(): - import random - random.seed(1) - #86400 seconds in a day - timestamps = np.array([]) - stocks = np.array([]) - for i in range(10,100): #timestamps - timestamps = np.append(timestamps,i*86400) - for i in range(30): #stocks - stocks = np.append(stocks,'stock%.6i'%i) - - priceArray = np.ndarray( shape=(timestamps.size, stocks.size), dtype=np.object) - for i in range(timestamps.size): - for j in range(stocks.size): - - row = {} - if j ==0: - row['exchange'] = 'NYSE' - row['symbol'] = stocks[j] - row['adj_open'] = 10 - row['adj_close'] = 20 - row['adj_high'] = 22 - row['adj_low'] = 7 - row['close'] = 20 - row['volume'] = 200 - row['timestamp'] = timestamps[i] - row['when_available'] = timestamps[i] - row['interval'] = 86400 - else: - adjOpen = random.random() * random.randint(1,100) - adjClose = random.random() * random.randint(1,100) - row['exchange'] = 'NYSE' - row['symbol'] = stocks[j] - row['adj_open'] = adjOpen - row['adj_close'] = adjClose - row['adj_high'] = max(adjOpen,adjClose) * random.randint(1,5) - row['adj_low'] = min(adjOpen,adjClose) / random.randint(1,5) - row['close'] = adjClose - row['volume'] = random.randint(1000,10000) - row['timestamp'] = timestamps[i] - row['when_available'] = timestamps[i] - row['interval'] = 86400 - priceArray[i,j] = row - if i%10==0: - print i, - if i%100==0: - print '' - print '' - ''' - pickle_output = open('randomArrayFile.pkl','w') - pickler = pickle.dump(timestamps,pickle_output) - pickler = pickle.dump(stocks,pickle_output) - pickler = pickle.dump(priceArray,pickle_output) - pickle_output.close() - ''' - return (timestamps, stocks, priceArray) - -def methodTest(): - strat = StrategyData('models/PriceTestData.h5') - print strat.getStocks(startTime=0, ticker='KO') - -def classTest(): - ''' - Needs to be updated to reflect move from data class to interpreter class - ''' - rows = getStocks(ticker = 'KO') - rows = getStocks(1020, 1050) - for row in rows: - print row['symbol'], row['exchange'], row['timestamp'],\ - row['when_available'], row['interval'], row['data'] - - price = getPrice('adj_high', 1020, 'KO') - print price - prices = getPrices('adj_high',ticker='KO') - print prices \ No newline at end of file +import tables as pt, numpy as np, pickle +from models.StrategyDataModel import StrategyDataModel +import numpy as np + +class StrategyData: + def __init__(self,dataFile, dataAccess, isTable = False): + ''' + @param dataFile: The filename of the data file (array or pytables) + @param isTable: The runtype, true for table false for array + @param dataAcess: is a DataAccess object that is used to access the stockData + ''' + #for pytables + self.isTable = isTable + self.currTimestamp = 0 + self.dataAccess=dataAccess + if(isTable): + isTable #do nothing.. just so that I don't have to remove the if/else +# self.strategyDataFile = pt.openFile(dataFile, mode = "r") +# self.strategyData = self.strategyDataFile.root.StrategyData.StrategyData +# self.timestampIndex = None +# self.stocksIndex = self.findStocks() + else: + self.prevTsIdx = 0 + f = open(dataFile,'r') + ts = pickle.load(f) + st = pickle.load(f) + pA = pickle.load(f) + f.close() + self.symbolIndex = st + self.timestampIndex = ts + self.priceArray = pA + + def findStocks(self): + ''' + @summary: Populates the symbolIndex for table run + ''' + temp = [] + for i in self.strategyData.iterrows(): + if i['symbol'] not in temp: + temp.append(self.cloneRow(i)['symbol']) + temp.sort() + return temp + + def calculatePortValue(self,stocks,timestamp): + ''' + @param stocks: the current stocks you hold as represented by currStocks in portfolio + @param timestamp: the timestamp used to calculate the present value of stocks + @summary: Calculates the current portfolio value: cash + stocks. If the value of a stock on a particular day is NaN then it keeps going back in time (upto 20 days) to find the first nonNan stock value. If a non NaN value is not found then the value of the portfolio is NaN + + ''' + + total=0 + DAY=86400 + for stock in stocks: + priceOfStock= self.dataAccess.getStockDataItem (stock, 'adj_close', timestamp- DAY) #close of previous day + if not(np.isnan(priceOfStock)): + total+= priceOfStock + else: + + #Keep looking back in time till we get a non NaN closing value + ctr=2 + while (np.isnan(priceOfStock) and ctr < 20): + priceOfStock= self.dataAccess.getStockDataItem (stock, 'adj_close', timestamp- (ctr*DAY)) + ctr+=1 + + if np.isnan(priceOfStock): + return np.NaN + + total+= priceOfStock + + return total + + + +# total = 0 +# for stock in stocks: +# prices = self.dataAccess.getStockDataList(stock, 'adj_close', timestamp- 86400, timestamp) #self.getPrices(timestamp - 86400, timestamp, stock, 'adj_close') +# i = 86400 +# count = 0 +# while(len(prices)==0 and count<10): +# prices = self.dataAccess.getStockDataList(stock, 'adj_close',timestamp - i - 86400, timestamp - i) #self.getPrices(timestamp - i, timestamp - i - 86400, stock, 'adj_close') +# i += 86400 +# count+=1 +# if(len(prices) != 0): +# total += prices[len(prices)-1] * stocks[stock] +# return total + #calculatePortValue +# def getStocks(self, startTime=None, endTime=None, ticker=None): +# ''' +# Returns a list of dictionaries that contain all of the valid stock data as keys +# or an empty list if no results are found +# Can be called independently or used as part of the getPrices function +# startTime: checks stocks >= startTime +# endTime: checks stocks <= endTime +# ticker: the ticker/symbol of the stock or a list of tickers +# ''' +# if self.isTable: +# if endTime == None: +# endTime = self.currTimestamp +# if endTime > self.currTimestamp: +# print 'Tried to access a future time %i, endTime set to %i' %(endTime, self.currTimestamp) +# endTime = self.currTimestamp +# tempList = [] +# if(ticker!=None): +# if(type(ticker)==str): +# for row in self.strategyData.where('symbol=="%s"'%ticker): +# if(startTime!=None and endTime!=None): +# if(row['timestamp']>=startTime and row['timestamp']=startTime and row['timestamp']=startTime and row['timestamp']=startTime and row['timestamp']=startTime and row['timestamp']=startTime and row['timestamp']= startTime +# endTime: checks stocks <= endTime +# ticker: the ticker/symbol of the stock or a list of tickers +# description: the field from data that is desired IE. adj_high +# ''' +# if self.isTable: +# rows = self.getStocks(startTime, endTime, ticker) +# result = [] +# if(description==None): +# for row in rows: +# row = self.cloneRow(row) +# result.append((row['adj_high'],row['adj_low'],row['adj_open'],row['adj_close'],row['close'])) +# else: +# for row in rows: +# result.append(self.cloneRow(row)[description]) +# return result +# else: +# return self.getPricesArray(startTime, endTime, ticker, description) + + def cloneRow(self,row): + ''' + @summary: Makes a copy of the row so that the correct information will be appended to the list + ''' + dct = {} + dct['symbol'] = row['symbol'] + dct['exchange'] = row['exchange'] + dct['adj_high'] = row['adj_high'] + dct['adj_low'] = row['adj_low'] + dct['adj_open'] = row['adj_open'] + dct['adj_close'] = row['adj_close'] + dct['close'] = row['close'] + dct['volume'] = row['volume'] + dct['timestamp'] = row['timestamp'] + dct['date'] = row['date'] + dct['interval'] = row['interval'] + return dct + +# def getStocksArray(self, startTime=None, endTime=None, ticker=None): +# ''' +# Returns a list of dictionaries that contain all of the valid stock data as keys +# or an empty list if no results are found +# Can be called independently or used as part of the getPrices function +# startTime: checks stocks >= startTime +# endTime: checks stocks <= endTime +# ticker: the ticker/symbol of the stock or a list of tickers +# ''' +# if endTime == None: +# endTime = self.currTimestamp +# if endTime > self.currTimestamp: +# print 'Tried to access a future time %i, endTime set to %i' %(endTime, self.currTimestamp) +# endTime = self.currTimestamp +# if ticker != None: +# if type(ticker)==str: +# tickIdxList = [] +# tickerIdx = self.symbolIndex.searchsorted(ticker) +# if tickerIdx < self.symbolIndex.size and self.symbolIndex[tickerIdx] == ticker: +# tickIdxList.append(tickerIdx) +# elif type(ticker)==list: +# for tick in tickerIdx: +# tickerIdx = self.symbolIndex.searchsorted(ticker) +# if tickerIdx < self.symbolIndex.size and self.symbolIndex[tickerIdx] == ticker: +# tickIdxList.append(tickerIdx) +# else: +# tickerIdx = None +# if startTime != None: +# startIdx = self.timestampIndex.searchsorted(startTime, 'left') +# else: +# startIdx = None +# if endTime != None: +# endIdx = self.timestampIndex.searchsorted(endTime, 'left') +# else: +# endIdx = None +# if tickerIdx != None: +# result = np.array([]) +# for tickerIdx in tickIdxList: +# result = np.append(result,self.priceArray[startIdx:endIdx,tickerIdx]) +# return result +# else: +# result = self.priceArray[startIdx:endIdx,:] +# if len(result) ==0: +# return [] +# else: +# return result[0] + + +# def getPriceArray(self, timestamp, ticker, description): +# ''' +# timestamp: the exact timestamp of the desired stock data +# ticker: the ticker/symbol of the stock +# description: the field from data that is desired IE. adj_high +# NOTE: If the data is incorrect or invalid, the function will return None +# ''' +# tsIdx = self.timestampIndex.searchsorted(timestamp) +# if tsIdx >= self.timestampIndex.size or self.timestampIndex[tsIdx] != timestamp: +# return None #NaN +# tickerIdx = self.symbolIndex.searchsorted(ticker) +# if tickerIdx >= self.symbolIndex.size or self.symbolIndex[tickerIdx] != ticker: +# return None #NaN +# return self.priceArray[tsIdx,tickerIdx][description] + +# def getPricesArray(self, startTime=None, endTime=None, ticker=None, description=None): +# ''' +# Returns a list of prices for the given description: [adj_high0, adj_high1, adj_high2...] +# or a tuple if no description is given: [ (adj_high0, adj_low0, adj_open0, adj_close0, close0), (adj_high1, adj_low1...), .... ] +# startTime: checks stocks >= startTime +# endTime: checks stocks <= endTime +# ticker: the ticker/symbol of the stock or a list of tickers +# description: the field from data that is desired IE. adj_high +# description: +# ''' +# rows = self.getStocksArray(startTime, endTime, ticker) +# result = [] +# if(description==None): +# for row in rows: +# result.append((row['adj_high'],row['adj_low'],row['adj_open'],row['adj_close'],row['close'])) +# else: +# for row in rows: +# result.append(row[description]) +# return result + + def close(self): + if self.isTable: + self.isTable +# self.strategyDataFile.close() + + class OutputOrder: + ''' + @summary: Subclass to make adding strategies easier + ''' + def __init__(self,symbol = "",volume = 0,task = "",duration = 0,closeType = "",orderType = "",limitPrice = 0): + self.symbol = symbol + self.volume = volume + self.task = task + self.duration = duration + self.closeType = closeType + self.orderType = orderType + self.limitPrice = limitPrice + + def getOutput(self): + if self.symbol == "" or type(self.symbol) != str: + print("Invalid symbol %s in output." % str(self.symbol)) + return None + if self.volume == 0 or type(self.volume) != int: + print("Invalid volume %s in output." % str(self.volume)) + return None + if self.task == "" or type(self.task) != str: + print("Invalid task %s in output." % str(self.task)) + return None + if self.duration <= 0 or type(self.duration) != int: + print("Invalid duration %s in output." % str(self.duration)) + return None + if self.orderType == "" or type(self.orderType) != str: + print("Invalid orderType %s in output." % str(self.orderType)) + return None + if type(self.task) != str: + print("Invalid closeType %s specified." % str(self.task)) + return None + if self.task.upper() == "SELL" or self.task.upper() == "COVER": + if self.closeType == "" or type(self.closeType) != str: + print("Invalid closeType %s specified for %s." % (str(self.closeType),self.task)) + return None + if type(self.orderType) != str: + print("Invalid orderType %s specified." % str(self.orderType)) + if self.orderType.upper() == "LIMIT": + if self.limitPrice == 0 or type(self.limitPrice) != int: + print("Invalid limitPrice specified.") + return None + if self.task.upper() not in ["BUY","SELL","SHORT","COVER"]: + print("Invalid task %s specified." %self.task) + return None + if self.orderType.upper() not in ["LIMIT","MOC","MOO","VWAP"]: + print("Invalid orderType %s specified." % self.orderType) + return None + return (self.task,self.volume,self.symbol,self.orderType,self.duration,self.closeType,self.limitPrice) + + #END OutputOrder SUBLCLASS + + +def generateKnownArray(): + timestamps = np.array([]) + stocks = np.array([]) + for i in range(10,100): + timestamps = np.append(timestamps, i*86400) + for i in range(3): + stocks = np.append(stocks,'stock%i'%i) + priceArray = np.ndarray(shape=(timestamps.size,stocks.size),dtype=np.object) + for i in range(timestamps.size): + for j in range(stocks.size): + row = {} + row['exchange'] = 'NYSE' + row['symbol'] = stocks[j] + row['adj_open'] = (timestamps[i]/86400) * (j+1) + row['adj_close'] = (timestamps[i]/86400) * (j+1) + row['adj_high'] = (timestamps[i]/86400) * (j+1) + row['adj_low'] = (timestamps[i]/86400) * (j+1) + row['close'] = (timestamps[i]/86400) * (j+1) + row['volume'] = 200 + row['timestamp'] = timestamps[i] + row['when_available'] = timestamps[i] + row['interval'] = 86400 + priceArray[i,j] = row + return (timestamps, stocks, priceArray) +def generateRandomArray(): + import random + random.seed(1) + #86400 seconds in a day + timestamps = np.array([]) + stocks = np.array([]) + for i in range(10,100): #timestamps + timestamps = np.append(timestamps,i*86400) + for i in range(30): #stocks + stocks = np.append(stocks,'stock%.6i'%i) + + priceArray = np.ndarray( shape=(timestamps.size, stocks.size), dtype=np.object) + for i in range(timestamps.size): + for j in range(stocks.size): + + row = {} + if j ==0: + row['exchange'] = 'NYSE' + row['symbol'] = stocks[j] + row['adj_open'] = 10 + row['adj_close'] = 20 + row['adj_high'] = 22 + row['adj_low'] = 7 + row['close'] = 20 + row['volume'] = 200 + row['timestamp'] = timestamps[i] + row['when_available'] = timestamps[i] + row['interval'] = 86400 + else: + adjOpen = random.random() * random.randint(1,100) + adjClose = random.random() * random.randint(1,100) + row['exchange'] = 'NYSE' + row['symbol'] = stocks[j] + row['adj_open'] = adjOpen + row['adj_close'] = adjClose + row['adj_high'] = max(adjOpen,adjClose) * random.randint(1,5) + row['adj_low'] = min(adjOpen,adjClose) / random.randint(1,5) + row['close'] = adjClose + row['volume'] = random.randint(1000,10000) + row['timestamp'] = timestamps[i] + row['when_available'] = timestamps[i] + row['interval'] = 86400 + priceArray[i,j] = row + if i%10==0: + print(i, end=' ') + if i%100==0: + print('') + print('') + ''' + pickle_output = open('randomArrayFile.pkl','w') + pickler = pickle.dump(timestamps,pickle_output) + pickler = pickle.dump(stocks,pickle_output) + pickler = pickle.dump(priceArray,pickle_output) + pickle_output.close() + ''' + return (timestamps, stocks, priceArray) + +def methodTest(): + strat = StrategyData('models/PriceTestData.h5') + print(strat.getStocks(startTime=0, ticker='KO')) + +def classTest(): + ''' + Needs to be updated to reflect move from data class to interpreter class + ''' + rows = getStocks(ticker = 'KO') + rows = getStocks(1020, 1050) + for row in rows: + print(row['symbol'], row['exchange'], row['timestamp'],\ + row['when_available'], row['interval'], row['data']) + + price = getPrice('adj_high', 1020, 'KO') + print(price) + prices = getPrices('adj_high',ticker='KO') + print(prices) \ No newline at end of file diff --git a/Legacy/Visualizer/AccessData.py b/Legacy/Visualizer/AccessData.py index 76f6cdeb1..31704a1d4 100644 --- a/Legacy/Visualizer/AccessData.py +++ b/Legacy/Visualizer/AccessData.py @@ -73,8 +73,8 @@ def DataParameter(PandasObject, featureslist, symbols, timestamps): MinFeat.append(np.amin(np.min(PandasObject[feature], axis=0))) MaxFeat.append(np.amax(np.max(PandasObject[feature], axis=0))) - dMinFeat=dict(zip(featureslist, MinFeat)) - dMaxFeat=dict(zip(featureslist, MaxFeat)) + dMinFeat=dict(list(zip(featureslist, MinFeat))) + dMaxFeat=dict(list(zip(featureslist, MaxFeat))) return(dMinFeat, dMaxFeat, startday, endday) @@ -94,4 +94,4 @@ def GetData(directorylocation): if __name__ == '__main__': directorylocation = os.environ['QS']+'/Tools/Visualizer/Data/Dow_2009-01-01_2010-12-31/' GetData(directorylocation) - print "The access functions are working" + print("The access functions are working") diff --git a/Legacy/Visualizer/CsvData.py b/Legacy/Visualizer/CsvData.py index db01a49c2..c8b0e3ea5 100644 --- a/Legacy/Visualizer/CsvData.py +++ b/Legacy/Visualizer/CsvData.py @@ -30,7 +30,7 @@ def csv_Dataconverter(datadirectory, ip_path): f = open(ip_path) data = csv.reader(f) - fields = data.next() + fields = next(data) featureslist = fields[2:] timestamps=[] @@ -56,7 +56,7 @@ def csv_Dataconverter(datadirectory, ip_path): f.close() data = csv.reader(open(ip_path)) - data.next() + next(data) for row in data: timestamp = dt.datetime.strptime(row[0], "%Y-%m-%d") @@ -103,7 +103,7 @@ def csv_Dataconverter(datadirectory, ip_path): pickle.dump(Numpyarray_Final,open(op_folderpath +'ALLDATA.pkl', 'wb' ),-1) - print 'All data has been converted' + print('All data has been converted') def main(): datadirectory = 'TestCSV' diff --git a/Legacy/Visualizer/FormatData.py b/Legacy/Visualizer/FormatData.py index 06e200c88..a9b47a71b 100644 --- a/Legacy/Visualizer/FormatData.py +++ b/Legacy/Visualizer/FormatData.py @@ -31,7 +31,7 @@ def genData(): if not os.path.exists(op_folderpath): os.mkdir(op_folderpath) - print "Data was missing" + print("Data was missing") return op_folderpath = op_folderpath + '/' diff --git a/Legacy/Visualizer/GenerateData.py b/Legacy/Visualizer/GenerateData.py index 12e042abb..cbb87fe6d 100644 --- a/Legacy/Visualizer/GenerateData.py +++ b/Legacy/Visualizer/GenerateData.py @@ -55,13 +55,13 @@ def genData(startday, endday, datadirectory, symbols): all_symbols = dataobj.get_all_symbols() badsymbols=set(symbols)-set(all_symbols) if len(list(badsymbols))>0: - print "Some Symbols are not valid" + str(badsymbols) + print("Some Symbols are not valid" + str(badsymbols)) symbols=list(set(symbols)-badsymbols) lsKeys = ['open', 'high', 'low', 'close', 'volume'] ldfData = dataobj.get_data( timestamps, symbols, lsKeys ) - dData = dict(zip(lsKeys, ldfData)) + dData = dict(list(zip(lsKeys, ldfData))) # Creating the 3D Matrix diff --git a/Legacy/Visualizer/Visualizer.py b/Legacy/Visualizer/Visualizer.py index 0413b7681..e98c014fc 100644 --- a/Legacy/Visualizer/Visualizer.py +++ b/Legacy/Visualizer/Visualizer.py @@ -1680,7 +1680,7 @@ def make_movie(self): text, ok = QtGui.QInputDialog.getText(self, 'Input Dialog', 'Enter name of movie:') if ok: if len(text)<1: - print "Movie name Invalid" + print("Movie name Invalid") return folderpath= folderpath + str(text) else: diff --git a/Legacy/csvconverter/compustat_csv_to_pkl.py b/Legacy/csvconverter/compustat_csv_to_pkl.py index 67d851653..88a480b54 100644 --- a/Legacy/csvconverter/compustat_csv_to_pkl.py +++ b/Legacy/csvconverter/compustat_csv_to_pkl.py @@ -31,7 +31,7 @@ def _dumpFiles( dData, lSets, lsOutPaths ): @param lSets: List of symbol sets, each corresponds to a directory, e.g. NYSE, NASDAQ. @param lsOutPaths: List of path strings, same indexes as lSets. ''' - lKeys = dData.keys() + lKeys = list(dData.keys()) for key in lKeys: for i, symSet in enumerate( lSets ): @@ -51,7 +51,7 @@ def _analyze(): try: rootdir = os.environ['QSDATA'] except KeyError: - print "Please be sure to set the value for QSDATA in config.sh or local.sh\n" + print("Please be sure to set the value for QSDATA in config.sh or local.sh\n") ''' Create lists of input and output paths ''' fFile = ( rootdir + "/Raw/Compustat/Compustat.csv") @@ -82,17 +82,17 @@ def _analyze(): used[j] = 1 if( i % 10000 == 0 ): - print (i / 1378625.0)*100, '%' + print((i / 1378625.0)*100, '%') - print 'Bad (non float) labels:' - print badSet + print('Bad (non float) labels:') + print(badSet) for i,label in enumerate(lsLabels): if label in badSet: del lsLabels[i] - print '\n\nGood (float) labels:' - print lsLabels + print('\n\nGood (float) labels:') + print(lsLabels) return @@ -102,7 +102,7 @@ def convert (): @summary: Converts a Compustat CSV file to pickle files of numpy arrays. ''' - print "Starting..."+ str(time.strftime("%H:%M:%S")) + print("Starting..."+ str(time.strftime("%H:%M:%S"))) ''' Write every so often to save memory, 20k lines is usually < .5GB ''' lSaveMem = 20000 @@ -110,7 +110,7 @@ def convert (): try: rootdir = os.environ['QSDATA'] except KeyError: - print "Please be sure to set the value for QSDATA in config.sh or local.sh\n" + print("Please be sure to set the value for QSDATA in config.sh or local.sh\n") ''' Create lists of input and output paths ''' fFile = ( rootdir + "/Raw/Compustat/Compustat.csv") @@ -148,10 +148,10 @@ def convert (): setAmex = set( Access.get_symbols_in_sublist("/US/AMEX") ) ''' If stock appears in more than one index, remove to avoid ambiguity ''' - print 'Ignoring duplicate stocks:', + print('Ignoring duplicate stocks:', end=' ') dup1 = setNyse.intersection( setNasdaq.union(setAmex)) dup2 = setNasdaq.intersection( setAmex ) - print dup1.union(dup2) + print(dup1.union(dup2)) setNyse = setNyse - dup1.union(dup2) setAmex = setAmex - dup1.union(dup2) @@ -193,7 +193,7 @@ def convert (): ''' Dict of ticker->numpy array mapping ''' dData = dict() - print '' + print('') ''' Main loop, iterate over the rows in the csv file ''' @@ -214,8 +214,8 @@ def convert (): ''' If the file exists (temporary memory saving measure), read it in and delete file from disk ''' if( os.path.isfile(sFilename)): - if dData.has_key(sTic): - print 'File should not be both on disk and in dict' + if sTic in dData: + print('File should not be both on disk and in dict') sys.exit("FAILURE") fIn = open( sFilename, 'rb' ) @@ -231,19 +231,19 @@ def convert (): row[i] = 'nan' ''' Add row if data exists, if not, create new array ''' - if dData.has_key(sTic): + if sTic in dData: dData[sTic] = np.vstack( (dData[sTic], np.array([fDate] + [row[i] for i in llUseCols], dtype=np.float)) ) else: dData[sTic]= np.array( [fDate] + [row[i] for i in llUseCols], dtype=np.float ) if( (j+1) % 1000 == 0): fDone = (j / 1378625.0) * 100 - print '\rApprox %.2lf%%'%((j / 1378625.0) * 100), + print('\rApprox %.2lf%%'%((j / 1378625.0) * 100), end=' ') if( (j+1) % lSaveMem == 0): ''' Write all the pickle files we currently have ''' - print '\nWriting %i lines to pickle files to save memory\n'%(lSaveMem) + print('\nWriting %i lines to pickle files to save memory\n'%(lSaveMem)) _dumpFiles( dData, lSets, lsOutPaths) ''' Remember to delete! ''' del dData @@ -252,12 +252,12 @@ def convert (): # Done writing files # Done with main loop - print '' - print 'Writing final pickle files\n' + print('') + print('Writing final pickle files\n') _dumpFiles( dData, lSets, lsOutPaths) del dData - print "Finished..."+ str(time.strftime("%H:%M:%S")) + print("Finished..."+ str(time.strftime("%H:%M:%S"))) return diff --git a/Legacy/csvconverter/csvapi.py b/Legacy/csvconverter/csvapi.py index 6cc534894..ffbcde8c5 100644 --- a/Legacy/csvconverter/csvapi.py +++ b/Legacy/csvconverter/csvapi.py @@ -1,439 +1,439 @@ -import numpy as np -import dircache -from sets import Set -import time -import tables as pt -import sys -import time -import os -from optparse import OptionParser - -class TimestampsModel (pt.IsDescription): - timestamp = pt.Time64Col() -#class TimestampsModel ends - -class StrategyDataModel(pt.IsDescription): - symbol = pt.StringCol(30) #30 char string; Ticker - exchange = pt.StringCol(10) #10 char string; NYSE, NASDAQ, etc. - adj_high = pt.Float32Col() - adj_low = pt.Float32Col() - adj_open = pt.Float32Col() - adj_close = pt.Float32Col() - close = pt.Float32Col() - volume = pt.Float32Col() #Changing from Int32Col() - timestamp = pt.Time64Col() - date = pt.Int32Col() - interval = pt.Time64Col() -#class StrategyDataModel done - -class StockPriceData: - def __init__(self): - self.filt_list=[] - self.timestamps=[] - - def getSymbols(self, listOfPaths, fileExtensionToRemove): - ''' - @bug: This might not work if the path contains a folder whose name has .csv, or is some random file that has a .csv in it.. - So, lets assume that whoever is using this is not going to "cheat" us - ''' - listOflistOfStocks=list() - for path in listOfPaths: - stocksAtThisPath=list () - stocksAtThisPath= dircache.listdir(str(path)) - #Next, throw away everything that is not a .csv And these are our stocks! - stocksAtThisPath = filter (lambda x:(str(x).find(str(fileExtensionToRemove)) > -1), stocksAtThisPath) - #Now, we remove the .csv to get the name of the stock - stocksAtThisPath = map(lambda x:(x.partition(str(fileExtensionToRemove))[0]),stocksAtThisPath) - - #Then add that list to listOflistOfStocks - listOflistOfStocks.append(stocksAtThisPath) - return listOflistOfStocks - #getSymbols done - -#build the array - def getData(self, listOfListOfStocks, listOfInputPaths, startDate, endDate, listOfOutputPaths): - ''' - @summary: This is where all the work happens - @attention: Assumption here is that past data never changes - @bug: The exchange is currently set pretty randomly - ''' - - #Finding no. of stocks - noOfStocks=0 - for stock_list in listOfListOfStocks: - noOfStocks+= len (stock_list) - #for stock in stock_list: - #print str(stock) - - print "No. of stocks: " + str(noOfStocks) - print "No. of timestamps: " + str(len(self.timestamps)) - - - listIndex=-1 - ctr=1; - for inputFileFolder in listOfInputPaths: - listIndex+=1 - outputFileFolder= str(listOfOutputPaths[listIndex]) - stocks_list= listOfListOfStocks[listIndex] - for i in range(0, len(stocks_list)): # - self.count_of_non_existent_stocks): - print str(stocks_list[i]) +" "+str(ctr)+" of "+ str(noOfStocks)+" "+ str(time.strftime("%H:%M:%S")) - ctr= ctr+1 - - beginTS= startDate - - #Check if the file exists - if (os.path.exists(str(outputFileFolder) + str(stocks_list[i]+".h5"))): - - #Checking the last timestamp in the hdf file - h5f=pt.openFile(outputFileFolder + str(stocks_list[i]+".h5"), mode = "a") - print "Updating " +str(outputFileFolder + str(stocks_list[i]+".h5")) - table= h5f.getNode('/StrategyData', 'StrategyData') - beginTS= int(time.strftime("%Y%m%d", time.gmtime(table[table.nrows-1]['timestamp']))) #+ 1 #POSSIBLE BUG? - if (str(beginTS) >= self.timestamps[len(self.timestamps)-1]): #if (os.path.getmtime(str(outputFileFolder)+str(stocks_list[i])+".h5") > os.path.getmtime(str(self.dirname+ "/"+ str(stocks_list[i]+".CSV")))): - #The hdf5 file for this stock has been modified after the CSV file was modified. Ergo- no changes need to be made to it now.. - print str(stocks_list[i])+".h5 already is up to date. "+ str(time.strftime("%H:%M:%S")) - h5f.close() - continue - else: - #File is present but not upto date - beginTS= int(time.strftime("%Y%m%d", time.gmtime(table[table.nrows-1]['timestamp']))) - else: - #The only foreseeable reason why there might be an exception here is that the hdf file does not exist. So, creating it. - print "Creating file: " + str(outputFileFolder) + str(stocks_list[i]+".h5")+" "+ str(time.strftime("%H:%M:%S")) - - h5f = pt.openFile(str(outputFileFolder) + str(stocks_list[i]+".h5"), mode = "w") - group = h5f.createGroup("/", 'StrategyData') - table = h5f.createTable(group, 'StrategyData', StrategyDataModel) - beginTS= startDate - #else done - - f=open(str(inputFileFolder)+str(stocks_list[i]+str(".CSV"))) - jk=f.readlines() - f.close() - jk.pop(0) - - self.filt_list=list() - filt_list_temp=filter(lambda x: (int(x.split(',')[1])> int(beginTS)) ,jk) #Because we only want timestamps strictly greater than the last timestamp currently in the file. - filt_list_temp=filter(lambda x: (int(x.split(',')[1])<= int(endDate)) ,filt_list_temp) - filt_list_temp=map(lambda x:(x.split(',')[0],x.split(',')[1],x.split(',')[2],x.split(',')[3],x.split(',')[4],x.split(',')[5],x.split(',')[6],(x.split(',')[7]).strip()),filt_list_temp) - - self.filt_list.append(filt_list_temp) - - if (table.nrows > 0): - #we are appending to an old file and not creating a new file.. - - tsStartIndex= np.array(self.timestamps).searchsorted(beginTS) +1 - - else: - #creating a new file... - tsStartIndex =0 - #if (table.nrows > 0) done - - k = 0 - for j in range(tsStartIndex, len(self.timestamps)): - if (k< len(self.filt_list[0])): - - if((self.timestamps[j])< (self.filt_list[0][k][1])): - row=table.row - row['exchange'] = 'NYSE' - row['symbol'] = self.filt_list[0][k][0] - row['adj_open'] = np.NaN - row['adj_close'] = np.NaN - row['adj_high'] = np.NaN - row['adj_low'] = np.NaN - row['close'] = np.NaN - row['volume'] = np.NaN - parseddate = time.strptime(self.timestamps[j],'%Y%m%d') -# row['date'] = self.timestamps[j] - row['timestamp'] = time.mktime(parseddate) - row.append() - - elif(self.timestamps[j]==self.filt_list[0][k][1]): - row=table.row - row['exchange'] = 'NASDAQ' - row['symbol'] = self.filt_list[0][k][0] - row['adj_open'] = float(self.filt_list[0][k][2]) - row['adj_close'] = float(self.filt_list[0][k][5]) - row['adj_high'] = float(self.filt_list[0][k][3]) - row['adj_low'] = float(self.filt_list[0][k][4]) - row['close'] = float(self.filt_list[0][k][7]) - row['volume'] = int(self.filt_list[0][k][6]) - parseddate = time.strptime(self.timestamps[j],'%Y%m%d') -# row['date'] = self.timestamps[j] - row['timestamp'] = time.mktime(parseddate) - row.append() - - k=k+1 - else: - print"###############Something has gone wrong. A stock had a timestamp which was not in the timestamp list..." - print "TS: " + str(self.timestamps[j]) + ", Stock: " + str (self.filt_list[0][k][1]) - k=k+1 - #should stop executing here? Naah -# sys.exit() - - else: - row=table.row - row['exchange'] = 'NYSE' - row['symbol'] = stocks_list[i] #self.filt_list[0][len(self.filt_list[0])-1][0] ####NOTE. POSSIBLE BUG? - row['adj_open'] = np.NaN - row['adj_close'] = np.NaN - row['adj_high'] = np.NaN - row['adj_low'] = np.NaN - row['close'] = np.NaN - row['volume'] = np.NaN - parseddate = time.strptime(self.timestamps[j],'%Y%m%d') -# row['date'] = self.timestamps[j] - row['timestamp'] = time.mktime(parseddate) -# row['interval'] = 86400 - row.append() - - #for j in range(len(self.timestamps)) ends - table.flush() - h5f.close() - #for i in range(0, stocks.size) done - - print "Writing data done. "+ str(time.strftime("%H:%M:%S")) - - - def makeOrUpdateTimestampsFile(self, fileName, listOflistOfStocks, listOfInputPaths, startDate, endDate): - ''' - @bug: Formerly did not take care of DST - @attention: fixed DST bug. No known DST problems now. - ''' - - DAY=86400 - - if (os.path.exists(fileName)): - print "Updating timestamps" - h5f = pt.openFile(str(fileName), mode = "a") - table=h5f.getNode('/timestamps','timestamps') - - lastTSFromFile= str(time.strftime("%Y%m%d", time.gmtime(table[table.nrows-1]['timestamp']))) - - if (str(startDate)<= lastTSFromFile): - startDate=str(time.strftime("%Y%m%d", time.gmtime(table[table.nrows-1]['timestamp']))) # TO FIX DST BUG - - else: - print "Creating new timestamp file" - h5f = pt.openFile(str(fileName), mode = "w") - group = h5f.createGroup("/", 'timestamps') - table = h5f.createTable(group, 'timestamps', TimestampsModel) - - print "start: " + str(startDate)+", end: "+ str(endDate) - - tslist=list() - ctr=1 - if (str(startDate) <= str(endDate)): - listIndex=-1 - for path in listOfInputPaths: - listIndex+=1 - for stock in listOflistOfStocks[listIndex]: - #print str(stock)+" "+str(ctr)+" "+str(time.strftime("%H:%M:%S")) - ctr+=1 - - f=open(str(path)+str(stock+str(".CSV"))) - j=f.readlines() - j.pop(0) #To remove the "header" row - f.close() - - filt_list_temp=filter(lambda x: (int(x.split(',')[1])> int(startDate)) ,j) # To fix DST bug - filt_list_temp=filter(lambda x: (int(x.split(',')[1])<= int(endDate)) ,filt_list_temp) - - if not (filt_list_temp): - print str(stock.split('.')[0]) + " didn't exist in this period\n" - #ENHANCEMENT- CAN ALSO REMOVE STOCK FROM THE STOCKLIST - #This can not be done right now- because if a stock did not exist- but another stock did exist then NaNs have to be added to the stock that did not exist. - else: - #it existed and now we need the timestamps - filt_list_temp=map(lambda x:(x.split(',')[1]),filt_list_temp) - filt_list_temp= map(lambda item:(time.mktime(time.strptime(item,'%Y%m%d'))), filt_list_temp) - - for item in filt_list_temp: - try: - tslist.index(int(item)) - except: - tslist.append(int(item)) - - - if (len(tslist)>0): - if (self.continueChecking(tslist, startDate, endDate)== False): - break #All dates are covered.. - - #for stock in stocks_list done - - tslist.sort() #this should all fit into memory - - for ts in tslist: - row= table.row - row['timestamp']= ts - #print "Adding timestamp " + str (ts) - row.append() - #for ts in tslist ends - - table.flush() - h5f.close() - - #makeTimestampsFile ends - - - def continueChecking(self, tsList, beginTS, endTS): - ''' - @summary: This function basically checks if a day that we haven't found any trades on is a weekend. If so- we don't need to keep looking. The converter will work just fine even without this function- but it will take more time- because it will keep looking for timestamps that it is not going to find. - @bug: There is a Daylight savings time bug here too- but it won't adversely affect anything because DST always happens over the weekends! Though if the time change happens on a weekday sometime in the distant past/future this function may break. - ''' - - index=1 - DAY=86400 - - while (index < len(tsList)): - if (int(tsList[index])- int(tsList[index -1]) > DAY): - tempTS= tsList[index-1] + DAY - while (tempTS< tsList[index]): - timeStruct= time.gmtime(tempTS) - if not ((timeStruct[6] == 5) or (timeStruct[6] == 6)): - #Keep looking - return True #if its not a Saturday or a Sunday then keep looking - tempTS+=DAY - #while (tempTS< tsList[index]) ends - index+=1 - #while ends - #Checking from beginTS to start of list - tempTS=time.mktime(time.strptime(str(beginTS),'%Y%m%d')) - - while (int(tsList[0])- int(tempTS) > DAY): - timeStruct= time.gmtime((tempTS)) - if not ((timeStruct[6] == 5) or (timeStruct[6] == 6)): - return True - #if not... ends - tempTS+=DAY - #while (tsList[0]- tempTS > DAY) ends - #Checking from endTS to end of list - tempTS=time.mktime(time.strptime(str(endTS),'%Y%m%d')) - - while (int(tempTS)- int(tsList[len(tsList)-1]) > DAY): - timeStruct= time.gmtime(tempTS) - if not ((timeStruct[6] == 5) or (timeStruct[6] == 6)): - return True - #if not... ends - tempTS+=DAY - #while (tempTS- tsList[len(tsList)-1] > DAY) ends - - print "Smartly figured out that we don't need to continue" - return False #stop looking for more timestamps because all the timestamps that can be in the list are now there.. - #we will not get any more timestamps by looking for more..because there just aren't any left... cool huh? - #continueChecking ends - - - def readTimestampsFromFile(self, fileName, beginTS, endTS): - h5f = pt.openFile(str(fileName), mode = "a") - fileIterator= h5f.root.timestamps.timestamps - - tslist=[] - for row in fileIterator.iterrows(): - temp= str(time.strftime("%Y%m%d", time.gmtime(row['timestamp']))) - - if (temp>= str(beginTS)) and (temp <= str(endTS)): - tslist.append(temp) - - if (temp > str(endTS)): - break - - h5f.close() - - self.timestamps=tslist - #readTimestampsFromFile ends - - def keepHDFFilesInSyncWithCSV(self, listOfInputPaths, listOfOutputPaths): - ''' - @summary: This function removes HDF files that correspond to CSV files that existed in the past- but don't exist anymore. Possibly because the stock was delisted or something like that. - ''' - print "Removing HDF files for which there is no corresponding CSV file" - listOfListOfHdfFiles=self.getSymbols(listOfOutputPaths, ".h5") - listOfListOfCsvFiles=self.getSymbols(listOfInputPaths, ".csv") #I guess this isn't really necessary, we could just reuse the stock list or something - #but let's just keep things "proper" - ctr=-1 - for listofHDFFiles in listOfListOfHdfFiles: - ctr+=1 - for hdfFile in listofHDFFiles: - try: - #Check if the HDF file exists... - listOfListOfCsvFiles[ctr].index(hdfFile) - except: - print "Removing "+str(listOfOutputPaths[ctr]) + str(hdfFile)+".h5" - os.remove(str(listOfOutputPaths[ctr]) + str(hdfFile)+".h5") - #if ends - #for hdfFile in listOfListOfHdfFiles ends - #for listofHDFFiles in listOfListOfHdfFiles ends - - print "Done removing HDF files (if any)" - #keepHDFFilesInSyncWithCSV done - - - -if __name__ == "__main__": - ''' - @attention: The HDF file containing the timestamps should not be in any of the output paths because, if it is, then it will be deleted at the end. - ''' - - print "Starting..."+ str(time.strftime("%H:%M:%S")) - - parser = OptionParser() - args = parser.parse_args()[1] - endDate= args[0] - print "End date is: " + str (endDate) - - - #Date to start reading data Format: YYYYMMDD - startDate = 19840101 - - #Date to end reading data Format: YYYYMMDD - #endDate = 20100831 - - #The complete path to the file containing the list of timestamps. This should not be in the output folder because it will be removed by the keepHDFFilesInSyncWithCSV function! - timestampsFile="C:\\generated data files\\timestamp files\\timestamps.h5" - - - spd = StockPriceData() - - #Remember the '\\' at the end... - listOfInputPaths= list() - listOfInputPaths.append("C:\\Trading data text\\Stocks\\Delisted Securities\\US Recent\\") - listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\\AMEX\\") - listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\\Delisted Securities\\") - listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\OTC\\") - listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\\NASDAQ\\") - listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\NYSE\\") - listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\\NYSE Arca\\") - - listOfOutputPaths= list() - listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\Delisted_US_Recent\\") - listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_AMEX\\") - listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_Delisted\\") - listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\OTC\\") - listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_NASDAQ\\") - listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_NYSE\\") - listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_NYSE Arca\\") - - #If the output paths don't exist, then create them... - for path in listOfOutputPaths: - if not (os.access(path, os.F_OK)): - #Path does not exist, so create it - os.makedirs(path) - #done making all output paths! - - if (len(listOfInputPaths)!= len(listOfOutputPaths)): - print "No. of input paths not equal to the number of output paths.. quitting" - sys.exit("FAILURE") - listOfListOfStocks=spd.getSymbols(listOfInputPaths, ".csv") - - - if(endDate -1)] + #Now, we remove the .csv to get the name of the stock + stocksAtThisPath = [(x.partition(str(fileExtensionToRemove))[0]) for x in stocksAtThisPath] + + #Then add that list to listOflistOfStocks + listOflistOfStocks.append(stocksAtThisPath) + return listOflistOfStocks + #getSymbols done + +#build the array + def getData(self, listOfListOfStocks, listOfInputPaths, startDate, endDate, listOfOutputPaths): + ''' + @summary: This is where all the work happens + @attention: Assumption here is that past data never changes + @bug: The exchange is currently set pretty randomly + ''' + + #Finding no. of stocks + noOfStocks=0 + for stock_list in listOfListOfStocks: + noOfStocks+= len (stock_list) + #for stock in stock_list: + #print str(stock) + + print("No. of stocks: " + str(noOfStocks)) + print("No. of timestamps: " + str(len(self.timestamps))) + + + listIndex=-1 + ctr=1; + for inputFileFolder in listOfInputPaths: + listIndex+=1 + outputFileFolder= str(listOfOutputPaths[listIndex]) + stocks_list= listOfListOfStocks[listIndex] + for i in range(0, len(stocks_list)): # - self.count_of_non_existent_stocks): + print(str(stocks_list[i]) +" "+str(ctr)+" of "+ str(noOfStocks)+" "+ str(time.strftime("%H:%M:%S"))) + ctr= ctr+1 + + beginTS= startDate + + #Check if the file exists + if (os.path.exists(str(outputFileFolder) + str(stocks_list[i]+".h5"))): + + #Checking the last timestamp in the hdf file + h5f=pt.openFile(outputFileFolder + str(stocks_list[i]+".h5"), mode = "a") + print("Updating " +str(outputFileFolder + str(stocks_list[i]+".h5"))) + table= h5f.getNode('/StrategyData', 'StrategyData') + beginTS= int(time.strftime("%Y%m%d", time.gmtime(table[table.nrows-1]['timestamp']))) #+ 1 #POSSIBLE BUG? + if (str(beginTS) >= self.timestamps[len(self.timestamps)-1]): #if (os.path.getmtime(str(outputFileFolder)+str(stocks_list[i])+".h5") > os.path.getmtime(str(self.dirname+ "/"+ str(stocks_list[i]+".CSV")))): + #The hdf5 file for this stock has been modified after the CSV file was modified. Ergo- no changes need to be made to it now.. + print(str(stocks_list[i])+".h5 already is up to date. "+ str(time.strftime("%H:%M:%S"))) + h5f.close() + continue + else: + #File is present but not upto date + beginTS= int(time.strftime("%Y%m%d", time.gmtime(table[table.nrows-1]['timestamp']))) + else: + #The only foreseeable reason why there might be an exception here is that the hdf file does not exist. So, creating it. + print("Creating file: " + str(outputFileFolder) + str(stocks_list[i]+".h5")+" "+ str(time.strftime("%H:%M:%S"))) + + h5f = pt.openFile(str(outputFileFolder) + str(stocks_list[i]+".h5"), mode = "w") + group = h5f.createGroup("/", 'StrategyData') + table = h5f.createTable(group, 'StrategyData', StrategyDataModel) + beginTS= startDate + #else done + + f=open(str(inputFileFolder)+str(stocks_list[i]+str(".CSV"))) + jk=f.readlines() + f.close() + jk.pop(0) + + self.filt_list=list() + filt_list_temp=[x for x in jk if (int(x.split(',')[1])> int(beginTS))] #Because we only want timestamps strictly greater than the last timestamp currently in the file. + filt_list_temp=[x for x in filt_list_temp if (int(x.split(',')[1])<= int(endDate))] + filt_list_temp=[(x.split(',')[0],x.split(',')[1],x.split(',')[2],x.split(',')[3],x.split(',')[4],x.split(',')[5],x.split(',')[6],(x.split(',')[7]).strip()) for x in filt_list_temp] + + self.filt_list.append(filt_list_temp) + + if (table.nrows > 0): + #we are appending to an old file and not creating a new file.. + + tsStartIndex= np.array(self.timestamps).searchsorted(beginTS) +1 + + else: + #creating a new file... + tsStartIndex =0 + #if (table.nrows > 0) done + + k = 0 + for j in range(tsStartIndex, len(self.timestamps)): + if (k< len(self.filt_list[0])): + + if((self.timestamps[j])< (self.filt_list[0][k][1])): + row=table.row + row['exchange'] = 'NYSE' + row['symbol'] = self.filt_list[0][k][0] + row['adj_open'] = np.NaN + row['adj_close'] = np.NaN + row['adj_high'] = np.NaN + row['adj_low'] = np.NaN + row['close'] = np.NaN + row['volume'] = np.NaN + parseddate = time.strptime(self.timestamps[j],'%Y%m%d') +# row['date'] = self.timestamps[j] + row['timestamp'] = time.mktime(parseddate) + row.append() + + elif(self.timestamps[j]==self.filt_list[0][k][1]): + row=table.row + row['exchange'] = 'NASDAQ' + row['symbol'] = self.filt_list[0][k][0] + row['adj_open'] = float(self.filt_list[0][k][2]) + row['adj_close'] = float(self.filt_list[0][k][5]) + row['adj_high'] = float(self.filt_list[0][k][3]) + row['adj_low'] = float(self.filt_list[0][k][4]) + row['close'] = float(self.filt_list[0][k][7]) + row['volume'] = int(self.filt_list[0][k][6]) + parseddate = time.strptime(self.timestamps[j],'%Y%m%d') +# row['date'] = self.timestamps[j] + row['timestamp'] = time.mktime(parseddate) + row.append() + + k=k+1 + else: + print("###############Something has gone wrong. A stock had a timestamp which was not in the timestamp list...") + print("TS: " + str(self.timestamps[j]) + ", Stock: " + str (self.filt_list[0][k][1])) + k=k+1 + #should stop executing here? Naah +# sys.exit() + + else: + row=table.row + row['exchange'] = 'NYSE' + row['symbol'] = stocks_list[i] #self.filt_list[0][len(self.filt_list[0])-1][0] ####NOTE. POSSIBLE BUG? + row['adj_open'] = np.NaN + row['adj_close'] = np.NaN + row['adj_high'] = np.NaN + row['adj_low'] = np.NaN + row['close'] = np.NaN + row['volume'] = np.NaN + parseddate = time.strptime(self.timestamps[j],'%Y%m%d') +# row['date'] = self.timestamps[j] + row['timestamp'] = time.mktime(parseddate) +# row['interval'] = 86400 + row.append() + + #for j in range(len(self.timestamps)) ends + table.flush() + h5f.close() + #for i in range(0, stocks.size) done + + print("Writing data done. "+ str(time.strftime("%H:%M:%S"))) + + + def makeOrUpdateTimestampsFile(self, fileName, listOflistOfStocks, listOfInputPaths, startDate, endDate): + ''' + @bug: Formerly did not take care of DST + @attention: fixed DST bug. No known DST problems now. + ''' + + DAY=86400 + + if (os.path.exists(fileName)): + print("Updating timestamps") + h5f = pt.openFile(str(fileName), mode = "a") + table=h5f.getNode('/timestamps','timestamps') + + lastTSFromFile= str(time.strftime("%Y%m%d", time.gmtime(table[table.nrows-1]['timestamp']))) + + if (str(startDate)<= lastTSFromFile): + startDate=str(time.strftime("%Y%m%d", time.gmtime(table[table.nrows-1]['timestamp']))) # TO FIX DST BUG + + else: + print("Creating new timestamp file") + h5f = pt.openFile(str(fileName), mode = "w") + group = h5f.createGroup("/", 'timestamps') + table = h5f.createTable(group, 'timestamps', TimestampsModel) + + print("start: " + str(startDate)+", end: "+ str(endDate)) + + tslist=list() + ctr=1 + if (str(startDate) <= str(endDate)): + listIndex=-1 + for path in listOfInputPaths: + listIndex+=1 + for stock in listOflistOfStocks[listIndex]: + #print str(stock)+" "+str(ctr)+" "+str(time.strftime("%H:%M:%S")) + ctr+=1 + + f=open(str(path)+str(stock+str(".CSV"))) + j=f.readlines() + j.pop(0) #To remove the "header" row + f.close() + + filt_list_temp=[x for x in j if (int(x.split(',')[1])> int(startDate))] # To fix DST bug + filt_list_temp=[x for x in filt_list_temp if (int(x.split(',')[1])<= int(endDate))] + + if not (filt_list_temp): + print(str(stock.split('.')[0]) + " didn't exist in this period\n") + #ENHANCEMENT- CAN ALSO REMOVE STOCK FROM THE STOCKLIST + #This can not be done right now- because if a stock did not exist- but another stock did exist then NaNs have to be added to the stock that did not exist. + else: + #it existed and now we need the timestamps + filt_list_temp=[(x.split(',')[1]) for x in filt_list_temp] + filt_list_temp= [(time.mktime(time.strptime(item,'%Y%m%d'))) for item in filt_list_temp] + + for item in filt_list_temp: + try: + tslist.index(int(item)) + except: + tslist.append(int(item)) + + + if (len(tslist)>0): + if (self.continueChecking(tslist, startDate, endDate)== False): + break #All dates are covered.. + + #for stock in stocks_list done + + tslist.sort() #this should all fit into memory + + for ts in tslist: + row= table.row + row['timestamp']= ts + #print "Adding timestamp " + str (ts) + row.append() + #for ts in tslist ends + + table.flush() + h5f.close() + + #makeTimestampsFile ends + + + def continueChecking(self, tsList, beginTS, endTS): + ''' + @summary: This function basically checks if a day that we haven't found any trades on is a weekend. If so- we don't need to keep looking. The converter will work just fine even without this function- but it will take more time- because it will keep looking for timestamps that it is not going to find. + @bug: There is a Daylight savings time bug here too- but it won't adversely affect anything because DST always happens over the weekends! Though if the time change happens on a weekday sometime in the distant past/future this function may break. + ''' + + index=1 + DAY=86400 + + while (index < len(tsList)): + if (int(tsList[index])- int(tsList[index -1]) > DAY): + tempTS= tsList[index-1] + DAY + while (tempTS< tsList[index]): + timeStruct= time.gmtime(tempTS) + if not ((timeStruct[6] == 5) or (timeStruct[6] == 6)): + #Keep looking + return True #if its not a Saturday or a Sunday then keep looking + tempTS+=DAY + #while (tempTS< tsList[index]) ends + index+=1 + #while ends + #Checking from beginTS to start of list + tempTS=time.mktime(time.strptime(str(beginTS),'%Y%m%d')) + + while (int(tsList[0])- int(tempTS) > DAY): + timeStruct= time.gmtime((tempTS)) + if not ((timeStruct[6] == 5) or (timeStruct[6] == 6)): + return True + #if not... ends + tempTS+=DAY + #while (tsList[0]- tempTS > DAY) ends + #Checking from endTS to end of list + tempTS=time.mktime(time.strptime(str(endTS),'%Y%m%d')) + + while (int(tempTS)- int(tsList[len(tsList)-1]) > DAY): + timeStruct= time.gmtime(tempTS) + if not ((timeStruct[6] == 5) or (timeStruct[6] == 6)): + return True + #if not... ends + tempTS+=DAY + #while (tempTS- tsList[len(tsList)-1] > DAY) ends + + print("Smartly figured out that we don't need to continue") + return False #stop looking for more timestamps because all the timestamps that can be in the list are now there.. + #we will not get any more timestamps by looking for more..because there just aren't any left... cool huh? + #continueChecking ends + + + def readTimestampsFromFile(self, fileName, beginTS, endTS): + h5f = pt.openFile(str(fileName), mode = "a") + fileIterator= h5f.root.timestamps.timestamps + + tslist=[] + for row in fileIterator.iterrows(): + temp= str(time.strftime("%Y%m%d", time.gmtime(row['timestamp']))) + + if (temp>= str(beginTS)) and (temp <= str(endTS)): + tslist.append(temp) + + if (temp > str(endTS)): + break + + h5f.close() + + self.timestamps=tslist + #readTimestampsFromFile ends + + def keepHDFFilesInSyncWithCSV(self, listOfInputPaths, listOfOutputPaths): + ''' + @summary: This function removes HDF files that correspond to CSV files that existed in the past- but don't exist anymore. Possibly because the stock was delisted or something like that. + ''' + print("Removing HDF files for which there is no corresponding CSV file") + listOfListOfHdfFiles=self.getSymbols(listOfOutputPaths, ".h5") + listOfListOfCsvFiles=self.getSymbols(listOfInputPaths, ".csv") #I guess this isn't really necessary, we could just reuse the stock list or something + #but let's just keep things "proper" + ctr=-1 + for listofHDFFiles in listOfListOfHdfFiles: + ctr+=1 + for hdfFile in listofHDFFiles: + try: + #Check if the HDF file exists... + listOfListOfCsvFiles[ctr].index(hdfFile) + except: + print("Removing "+str(listOfOutputPaths[ctr]) + str(hdfFile)+".h5") + os.remove(str(listOfOutputPaths[ctr]) + str(hdfFile)+".h5") + #if ends + #for hdfFile in listOfListOfHdfFiles ends + #for listofHDFFiles in listOfListOfHdfFiles ends + + print("Done removing HDF files (if any)") + #keepHDFFilesInSyncWithCSV done + + + +if __name__ == "__main__": + ''' + @attention: The HDF file containing the timestamps should not be in any of the output paths because, if it is, then it will be deleted at the end. + ''' + + print("Starting..."+ str(time.strftime("%H:%M:%S"))) + + parser = OptionParser() + args = parser.parse_args()[1] + endDate= args[0] + print("End date is: " + str (endDate)) + + + #Date to start reading data Format: YYYYMMDD + startDate = 19840101 + + #Date to end reading data Format: YYYYMMDD + #endDate = 20100831 + + #The complete path to the file containing the list of timestamps. This should not be in the output folder because it will be removed by the keepHDFFilesInSyncWithCSV function! + timestampsFile="C:\\generated data files\\timestamp files\\timestamps.h5" + + + spd = StockPriceData() + + #Remember the '\\' at the end... + listOfInputPaths= list() + listOfInputPaths.append("C:\\Trading data text\\Stocks\\Delisted Securities\\US Recent\\") + listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\\AMEX\\") + listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\\Delisted Securities\\") + listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\OTC\\") + listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\\NASDAQ\\") + listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\NYSE\\") + listOfInputPaths.append ("C:\\Trading data text\\Stocks\\US\\NYSE Arca\\") + + listOfOutputPaths= list() + listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\Delisted_US_Recent\\") + listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_AMEX\\") + listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_Delisted\\") + listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\OTC\\") + listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_NASDAQ\\") + listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_NYSE\\") + listOfOutputPaths.append("C:\\generated data files\\one stock per file\\maintain folder structure\\US_NYSE Arca\\") + + #If the output paths don't exist, then create them... + for path in listOfOutputPaths: + if not (os.access(path, os.F_OK)): + #Path does not exist, so create it + os.makedirs(path) + #done making all output paths! + + if (len(listOfInputPaths)!= len(listOfOutputPaths)): + print("No. of input paths not equal to the number of output paths.. quitting") + sys.exit("FAILURE") + listOfListOfStocks=spd.getSymbols(listOfInputPaths, ".csv") + + + if(endDate -1), stocks_at_this_path) + filtered_names= [x for x in stocks_at_this_path if (str(x).find(str(fileExtensionToRemove)) > -1)] #Now, we remove the .csv to get the name of the stock - filtered_names = map(lambda x:(x.partition(str(fileExtensionToRemove))[0]),filtered_names) + filtered_names = [(x.partition(str(fileExtensionToRemove))[0]) for x in filtered_names] stock_ctr = -1 for stock in filtered_names: stock_ctr = stock_ctr + 1 - print "csv_to_pkl: processing: " + str (path + stock) + print("csv_to_pkl: processing: " + str (path + stock)) #read in the stock date from the CSV file stock_data= np.loadtxt (path + stock+".csv", np.float, None, ",", None, 1, use_cols) stock_data_shape = stock_data.shape @@ -145,7 +145,7 @@ def main (): f.close() #for stock in stocks_at_this_path ends #for path in listOfInputPaths ends - print "Finished..."+ str(time.strftime("%H:%M:%S")) + print("Finished..."+ str(time.strftime("%H:%M:%S"))) #main ends diff --git a/Legacy/csvconverter/yahoo_csv_to_pkl.py b/Legacy/csvconverter/yahoo_csv_to_pkl.py index 09a4f2bca..2ac34d508 100644 --- a/Legacy/csvconverter/yahoo_csv_to_pkl.py +++ b/Legacy/csvconverter/yahoo_csv_to_pkl.py @@ -19,13 +19,13 @@ def main (): - print "Starting..."+ str(time.strftime("%H:%M:%S")) + print("Starting..."+ str(time.strftime("%H:%M:%S"))) try: rootdir = os.environ['QSDATA'] except KeyError: #rootdir = "/hzr71/research/QSData" - print "Please be sure to set the value for QSDATA in config.sh or local.sh\n" + print("Please be sure to set the value for QSDATA in config.sh or local.sh\n") fileExtensionToRemove = ".csv" @@ -61,23 +61,23 @@ def main (): if (len(listOfInputPaths)!= len(listOfOutputPaths)): - print "No. of input paths not equal to the number of output paths.. quitting" + print("No. of input paths not equal to the number of output paths.. quitting") sys.exit("FAILURE") #if ends path_ctr = -1; - use_cols = range (1, 7 + 1) # will now use cols 1 to 7 + use_cols = list(range(1, 7 + 1)) # will now use cols 1 to 7 for path in listOfInputPaths: path_ctr = path_ctr + 1; stocks_at_this_path = dircache.listdir(str(path)) #Next, throw away everything that is not a .csv And these are our stocks! Example: this should throw away the '$' folder in the NYSE folder - filtered_names= filter (lambda x:(str(x).find(str(fileExtensionToRemove)) > -1), stocks_at_this_path) + filtered_names= [x for x in stocks_at_this_path if (str(x).find(str(fileExtensionToRemove)) > -1)] #Now, we remove the .csv to get the name of the stock - filtered_names = map(lambda x:(x.partition(str(fileExtensionToRemove))[0]),filtered_names) + filtered_names = [(x.partition(str(fileExtensionToRemove))[0]) for x in filtered_names] stock_ctr = -1 for stock in filtered_names: stock_ctr = stock_ctr + 1 - print "Reading file: " + str (path + stock) + print("Reading file: " + str (path + stock)) #read in the stock date from the CSV file stock_data= np.loadtxt (path + stock+".csv", np.float, None, ",", None, 1, use_cols) @@ -94,7 +94,7 @@ def main (): f.close() #for stock in stocks_at_this_path ends #for path in listOfInputPaths ends - print "Finished..."+ str(time.strftime("%H:%M:%S")) + print("Finished..."+ str(time.strftime("%H:%M:%S"))) #main ends diff --git a/Legacy/csvconverter/yahoo_data_getter.py b/Legacy/csvconverter/yahoo_data_getter.py index 78ed70c65..41e2d4ed2 100644 --- a/Legacy/csvconverter/yahoo_data_getter.py +++ b/Legacy/csvconverter/yahoo_data_getter.py @@ -1,168 +1,168 @@ -''' -Created on Apr 8, 2011 - -@author: sjoshi42 -@summary: This module get stock data from yahoo finance -''' - -import qstkutil.DataAccess as da -import qstkutil.utils as utils -import urllib2 -import urllib -import datetime -import sys -import os - -def adjust (_str): - _list= list() -# print str ("Before" + _str) -# symbol= _str.partition (",")[0] -# rest= _str.partition (",")[2] #Everything but symbol - - _date= _str.partition(",")[0] - rest= _str.partition(",")[2] #Everything but symbol and date -# print str ("DT: " + _date) - - _list.append(rest.partition(",")[0]) #open - rest= rest.partition (",")[2] #Removed open -# print str ("OP: " + _list[-1]) - - _list.append(rest.partition(",")[0]) #high - rest= rest.partition (",")[2] #Removed high -# print str ("HI: " + _list[-1]) - - _list.append (rest.partition(",")[0]) #low - rest= rest.partition(",")[2] #Removed low -# print str ("LO: " + _list[-1]) - - _list.append (rest.partition(",")[0]) #close - rest= rest.partition (",")[2] #Removed close -# print str ("CL: " + _list[-1]) - - - vol= rest.partition(",")[0] #volume -# print str ("VOL: " + vol) - - adj_close= float (rest.partition (",")[2]) #Removed volume, and what we have left is adjusted close.. -# print str ("ADJ_CL: " + str(adj_close)) - - _list = map (lambda x: (float(x)), _list) #convert strings to floats - - try: - ratio= adj_close/ _list[-1] #Adjusted close / close - _list= map (lambda x: (x * ratio), _list) #Updating the values - _list= map (lambda x: (str(x)), _list) - _list_concat= ",".join(_list) #Concat all the elements in the list using `,' as the separator - _str= _date + "," + _list_concat + "," + vol + "," + str (adj_close) + "\n" - except ZeroDivisionError: - print ("Warning: Close value is Zero.") - - - return _str - #adjust ends - -def main(): - #Getting path - path= os.environ['QSDATA'] - - get_data_for_exchange("NASDAQ", path) - get_data_for_exchange("NYSE", path) - get_data_for_exchange("AMEX", path) - - #main ends -def get_data_for_exchange (exchange, data_path): - - #data_access= da.DataAccess('norgate') - #symbol_list= data_access.get_all_symbols() - data_path= data_path + "/Raw/Yahoo/US/" + str (exchange) + "/" - - #Create path if it doesn't exist - if not (os.access(data_path, os.F_OK)): - os.makedirs(data_path) - - utils.clean_paths(data_path) - - symbol_list= list() - - print "Getting list of stocks.." - - try: - nasdaq_params= urllib.urlencode ({'exchange':str(exchange), 'render':'download'}) - nasdaq_get= urllib2.urlopen ('http://www.nasdaq.com/screening/companies-by-name.aspx?%s' % nasdaq_params) - symbol_list.append (nasdaq_get.readline()) #Now we have all the data in a list- but we need only the symbols so we remove the rest - while (len (symbol_list[-1]) > 0): - symbol_list.append (nasdaq_get.readline()) - #while ends - symbol_list.pop(0) #This is just the word "symbol" and not a symbol itself - symbol_list.pop(-1) #Remove the last element because its going to be blank anyway - #symbol_list = map(lambda x:(x.partition(str(","))[0]),symbol_list) #Get the stuff before the first comma- which is the symbol - - #Unfortunately this symbol is in quotes. So we have to remove them now - symbol_list = map(lambda x:(x.partition(str("\""))[2]),symbol_list) #Keep the stuff only after the first " - symbol_list = map(lambda x:(x.partition(str("\""))[0]),symbol_list) #Keep the stuff before the second " - - except urllib2.HTTPError: - print "Unable to get list of stocks from server. Please check your internet connection and retry." - except: - print"Unknown error occoured when getting list of stocks from server." - - print "Got " + str (len(symbol_list)) + " symbols. Now getting symbol data..." - - _now =datetime.datetime.now(); - miss_ctr=0; #Counts how many symbols we could get - for symbol in symbol_list: - symbol_data=list() - print "Getting " + str (symbol) - - try: - params= urllib.urlencode ({'a':03, 'b':12, 'c':2000, 'd':_now.month, 'e':_now.day, 'f':_now.year, 's': str(symbol)}) - url_get= urllib2.urlopen("http://ichart.finance.yahoo.com/table.csv?%s" % params) - - header= url_get.readline() - symbol_data.append (url_get.readline()) - while (len(symbol_data[-1]) > 0): - symbol_data.append(url_get.readline()) -# print str(symbol_data[-1]) - - symbol_data.pop(-1) #The last element is going to be the string of length zero. We don't want to write that to file. - - #To change adjusted close so that Yahoo data is same as Norgate data - symbol_data= map (adjust, symbol_data) - - #Following changes so that the data looks like Norgate data and the change to cav_to_pkl.csv is minimized - symbol_data = map(lambda x:(x.replace("-", "")),symbol_data) - symbol_data = map(lambda x:(str(symbol) + "," + str(x)) ,symbol_data) #This means that the header is wrong but since it is ignored later anyways- this will work - - #now writing data to file - f= open (data_path + symbol + ".csv", 'w') - - #Writing the header - f.write (header) - - while (len(symbol_data) > 0): - f.write (symbol_data.pop()) - - f.close(); -# print url_get.readline() -# f= open (data_path + symbol + ".csv", 'w') -# f.write (url_get.read()) -# f.close() - - except urllib2.HTTPError: - miss_ctr= miss_ctr+1 - print "Unable to fetch data for stock: " + str (symbol) - except urllib2.URLError: - print "URL Error for stock: " + str (symbol) - -# except: -# print "Some error occurred" - #except ends - - - #for ends - print "All done. Got " + str (len(symbol_list) - miss_ctr) + " stocks. Could not get " + str (miss_ctr) + " stocks." - #main ends - - -if __name__ == '__main__': - main() +''' +Created on Apr 8, 2011 + +@author: sjoshi42 +@summary: This module get stock data from yahoo finance +''' + +import qstkutil.DataAccess as da +import qstkutil.utils as utils +import urllib.request, urllib.error, urllib.parse +import urllib.request, urllib.parse, urllib.error +import datetime +import sys +import os + +def adjust (_str): + _list= list() +# print str ("Before" + _str) +# symbol= _str.partition (",")[0] +# rest= _str.partition (",")[2] #Everything but symbol + + _date= _str.partition(",")[0] + rest= _str.partition(",")[2] #Everything but symbol and date +# print str ("DT: " + _date) + + _list.append(rest.partition(",")[0]) #open + rest= rest.partition (",")[2] #Removed open +# print str ("OP: " + _list[-1]) + + _list.append(rest.partition(",")[0]) #high + rest= rest.partition (",")[2] #Removed high +# print str ("HI: " + _list[-1]) + + _list.append (rest.partition(",")[0]) #low + rest= rest.partition(",")[2] #Removed low +# print str ("LO: " + _list[-1]) + + _list.append (rest.partition(",")[0]) #close + rest= rest.partition (",")[2] #Removed close +# print str ("CL: " + _list[-1]) + + + vol= rest.partition(",")[0] #volume +# print str ("VOL: " + vol) + + adj_close= float (rest.partition (",")[2]) #Removed volume, and what we have left is adjusted close.. +# print str ("ADJ_CL: " + str(adj_close)) + + _list = [(float(x)) for x in _list] #convert strings to floats + + try: + ratio= adj_close/ _list[-1] #Adjusted close / close + _list= [(x * ratio) for x in _list] #Updating the values + _list= [(str(x)) for x in _list] + _list_concat= ",".join(_list) #Concat all the elements in the list using `,' as the separator + _str= _date + "," + _list_concat + "," + vol + "," + str (adj_close) + "\n" + except ZeroDivisionError: + print ("Warning: Close value is Zero.") + + + return _str + #adjust ends + +def main(): + #Getting path + path= os.environ['QSDATA'] + + get_data_for_exchange("NASDAQ", path) + get_data_for_exchange("NYSE", path) + get_data_for_exchange("AMEX", path) + + #main ends +def get_data_for_exchange (exchange, data_path): + + #data_access= da.DataAccess('norgate') + #symbol_list= data_access.get_all_symbols() + data_path= data_path + "/Raw/Yahoo/US/" + str (exchange) + "/" + + #Create path if it doesn't exist + if not (os.access(data_path, os.F_OK)): + os.makedirs(data_path) + + utils.clean_paths(data_path) + + symbol_list= list() + + print("Getting list of stocks..") + + try: + nasdaq_params= urllib.parse.urlencode ({'exchange':str(exchange), 'render':'download'}) + nasdaq_get= urllib.request.urlopen ('http://www.nasdaq.com/screening/companies-by-name.aspx?%s' % nasdaq_params) + symbol_list.append (nasdaq_get.readline()) #Now we have all the data in a list- but we need only the symbols so we remove the rest + while (len (symbol_list[-1]) > 0): + symbol_list.append (nasdaq_get.readline()) + #while ends + symbol_list.pop(0) #This is just the word "symbol" and not a symbol itself + symbol_list.pop(-1) #Remove the last element because its going to be blank anyway + #symbol_list = map(lambda x:(x.partition(str(","))[0]),symbol_list) #Get the stuff before the first comma- which is the symbol + + #Unfortunately this symbol is in quotes. So we have to remove them now + symbol_list = [(x.partition(str("\""))[2]) for x in symbol_list] #Keep the stuff only after the first " + symbol_list = [(x.partition(str("\""))[0]) for x in symbol_list] #Keep the stuff before the second " + + except urllib.error.HTTPError: + print("Unable to get list of stocks from server. Please check your internet connection and retry.") + except: + print("Unknown error occoured when getting list of stocks from server.") + + print("Got " + str (len(symbol_list)) + " symbols. Now getting symbol data...") + + _now =datetime.datetime.now(); + miss_ctr=0; #Counts how many symbols we could get + for symbol in symbol_list: + symbol_data=list() + print("Getting " + str (symbol)) + + try: + params= urllib.parse.urlencode ({'a':0o3, 'b':12, 'c':2000, 'd':_now.month, 'e':_now.day, 'f':_now.year, 's': str(symbol)}) + url_get= urllib.request.urlopen("http://ichart.finance.yahoo.com/table.csv?%s" % params) + + header= url_get.readline() + symbol_data.append (url_get.readline()) + while (len(symbol_data[-1]) > 0): + symbol_data.append(url_get.readline()) +# print str(symbol_data[-1]) + + symbol_data.pop(-1) #The last element is going to be the string of length zero. We don't want to write that to file. + + #To change adjusted close so that Yahoo data is same as Norgate data + symbol_data= list(map (adjust, symbol_data)) + + #Following changes so that the data looks like Norgate data and the change to cav_to_pkl.csv is minimized + symbol_data = [(x.replace("-", "")) for x in symbol_data] + symbol_data = [(str(symbol) + "," + str(x)) for x in symbol_data] #This means that the header is wrong but since it is ignored later anyways- this will work + + #now writing data to file + f= open (data_path + symbol + ".csv", 'w') + + #Writing the header + f.write (header) + + while (len(symbol_data) > 0): + f.write (symbol_data.pop()) + + f.close(); +# print url_get.readline() +# f= open (data_path + symbol + ".csv", 'w') +# f.write (url_get.read()) +# f.close() + + except urllib.error.HTTPError: + miss_ctr= miss_ctr+1 + print("Unable to fetch data for stock: " + str (symbol)) + except urllib.error.URLError: + print("URL Error for stock: " + str (symbol)) + +# except: +# print "Some error occurred" + #except ends + + + #for ends + print("All done. Got " + str (len(symbol_list) - miss_ctr) + " stocks. Could not get " + str (miss_ctr) + " stocks.") + #main ends + + +if __name__ == '__main__': + main() diff --git a/Legacy/epydoc-3.0.1/epydoc/apidoc.py b/Legacy/epydoc-3.0.1/epydoc/apidoc.py index 7eac12069..f6b3703d0 100644 --- a/Legacy/epydoc-3.0.1/epydoc/apidoc.py +++ b/Legacy/epydoc-3.0.1/epydoc/apidoc.py @@ -41,7 +41,7 @@ import types, re, os.path, pickle from epydoc import log import epydoc -import __builtin__ +import builtins from epydoc.compat import * # Backwards compatibility from epydoc.util import decode_with_backslashreplace, py_src_filename import epydoc.markup.pyval_repr @@ -111,7 +111,7 @@ def __init__(self, *pieces, **options): for piece in pieces: if isinstance(piece, DottedName): self._identifiers += piece._identifiers - elif isinstance(piece, basestring): + elif isinstance(piece, str): for subpiece in piece.split('.'): if piece not in self._ok_identifiers: if not self._IDENTIFIER_RE.match(subpiece): @@ -129,7 +129,7 @@ def __init__(self, *pieces, **options): self._identifiers = tuple(self._identifiers) def __repr__(self): - idents = [`ident` for ident in self._identifiers] + idents = [repr(ident) for ident in self._identifiers] return 'DottedName(' + ', '.join(idents) + ')' def __str__(self): @@ -147,7 +147,7 @@ def __add__(self, other): Return a new C{DottedName} whose identifier sequence is formed by adding C{other}'s identifier sequence to C{self}'s. """ - if isinstance(other, (basestring, DottedName)): + if isinstance(other, (str, DottedName)): return DottedName(self, other) else: return DottedName(self, *other) @@ -157,7 +157,7 @@ def __radd__(self, other): Return a new C{DottedName} whose identifier sequence is formed by adding C{self}'s identifier sequence to C{other}'s. """ - if isinstance(other, (basestring, DottedName)): + if isinstance(other, (str, DottedName)): return DottedName(other, self) else: return DottedName(*(list(other)+[self])) @@ -169,7 +169,7 @@ def __getitem__(self, i): identifiers selected by the slice. If C{i} is an empty slice, return an empty list (since empty C{DottedName}s are not valid). """ - if isinstance(i, types.SliceType): + if isinstance(i, slice): pieces = self._identifiers[i.start:i.stop] if pieces: return DottedName(pieces) else: return [] @@ -277,7 +277,7 @@ def __init__(self, name): self.name = name def __repr__(self): return '<%s>' % self.name - def __nonzero__(self): + def __bool__(self): raise ValueError('Sentinel value <%s> can not be used as a boolean' % self.name) @@ -974,17 +974,17 @@ def apidoc_links(self, **filters): imports = filters.get('imports', True) private = filters.get('private', True) if variables and imports and private: - return self.variables.values() # list the common case first. + return list(self.variables.values()) # list the common case first. elif not variables: return [] elif not imports and not private: - return [v for v in self.variables.values() if + return [v for v in list(self.variables.values()) if v.is_imported != True and v.is_public != False] elif not private: - return [v for v in self.variables.values() if + return [v for v in list(self.variables.values()) if v.is_public != False] elif not imports: - return [v for v in self.variables.values() if + return [v for v in list(self.variables.values()) if v.is_imported != True] assert 0, 'this line should be unreachable' @@ -1008,7 +1008,7 @@ def init_sorted_variables(self): elif '*' in ident: regexp = re.compile('^%s$' % ident.replace('*', '(.*)')) # sort within matching group? - for name, var_doc in unsorted.items(): + for name, var_doc in list(unsorted.items()): if regexp.match(name): self.sorted_variables.append(unsorted.pop(name)) unused_idents.discard(ident) @@ -1019,7 +1019,7 @@ def init_sorted_variables(self): # Add any remaining variables in alphabetical order. - var_docs = unsorted.items() + var_docs = list(unsorted.items()) var_docs.sort() for name, var_doc in var_docs: self.sorted_variables.append(var_doc) @@ -1096,7 +1096,7 @@ def report_unused_groups(self): Issue a warning for any @group items that were not used by L{_init_grouping()}. """ - for (group, unused_idents) in self._unused_groups.items(): + for (group, unused_idents) in list(self._unused_groups.items()): for ident in unused_idents: log.warning("@group %s: %s.%s not found" % (group, self.canonical_name, ident)) @@ -1322,7 +1322,7 @@ def _c3_mro(self, warn_about_bad_bases): base.proxy_for is not None): self._report_bad_base(base) w = [warn_about_bad_bases]*len(bases) - return self._c3_merge([[self]] + map(ClassDoc._c3_mro, bases, w) + + return self._c3_merge([[self]] + list(map(ClassDoc._c3_mro, bases, w)) + [list(bases)]) def _report_bad_base(self, base): @@ -1866,7 +1866,7 @@ def find(self, name, context): @type name: C{str} or L{DottedName} @type context: L{APIDoc} """ - if isinstance(name, basestring): + if isinstance(name, str): name = re.sub(r'\(.*\)$', '', name.strip()) if re.match('^([a-zA-Z_]\w*)(\.[a-zA-Z_]\w*)*$', name): name = DottedName(name) @@ -1942,7 +1942,7 @@ def _get_module_classes(self, docs): if not isinstance(doc, ModuleDoc): continue - for var in doc.variables.values(): + for var in list(doc.variables.values()): if not isinstance(var.value, ClassDoc): continue @@ -2026,7 +2026,7 @@ def read_profiling_info(self, profile_stats): # from these `funcid`s to `RoutineDoc`s. self._update_funcid_to_doc(profile_stats) - for callee, (cc, nc, tt, ct, callers) in profile_stats.stats.items(): + for callee, (cc, nc, tt, ct, callers) in list(profile_stats.stats.items()): callee = self._funcid_to_doc.get(callee) if callee is None: continue for caller in callers: @@ -2106,7 +2106,7 @@ def pp_apidoc(api_doc, doublespace=0, depth=5, exclude=(), include=(), s = '%s [%s]' % (name, backpointers[pyid]) # Only print non-empty fields: - fields = [field for field in api_doc.__dict__.keys() + fields = [field for field in list(api_doc.__dict__.keys()) if (field in include or (getattr(api_doc, field) is not UNKNOWN and field not in exclude))] @@ -2114,7 +2114,7 @@ def pp_apidoc(api_doc, doublespace=0, depth=5, exclude=(), include=(), fields = [field for field in dir(api_doc) if field in include] else: - fields = [field for field in api_doc.__dict__.keys() + fields = [field for field in list(api_doc.__dict__.keys()) if (getattr(api_doc, field) is not UNKNOWN and field not in exclude)] fields.sort() @@ -2124,15 +2124,15 @@ def pp_apidoc(api_doc, doublespace=0, depth=5, exclude=(), include=(), if doublespace: s += '\n |' s += '\n +- %s' % field - if (isinstance(fieldval, types.ListType) and + if (isinstance(fieldval, list) and len(fieldval)>0 and isinstance(fieldval[0], APIDoc)): s += _pp_list(api_doc, fieldval, doublespace, depth, exclude, include, backpointers, (field is fields[-1])) - elif (isinstance(fieldval, types.DictType) and + elif (isinstance(fieldval, dict) and len(fieldval)>0 and - isinstance(fieldval.values()[0], APIDoc)): + isinstance(list(fieldval.values())[0], APIDoc)): s += _pp_dict(api_doc, fieldval, doublespace, depth, exclude, include, backpointers, (field is fields[-1])) @@ -2162,7 +2162,7 @@ def _pp_list(api_doc, items, doublespace, depth, exclude, include, def _pp_dict(api_doc, dict, doublespace, depth, exclude, include, backpointers, is_last): - items = dict.items() + items = list(dict.items()) items.sort() line1 = (is_last and ' ') or '|' s = '' @@ -2193,7 +2193,7 @@ def _pp_val(api_doc, val, doublespace, depth, exclude, include, backpointers): return pp_apidoc(val, doublespace, depth-1, exclude, include, backpointers) elif isinstance(val, markup.ParsedDocstring): - valrepr = `val.to_plaintext(None)` + valrepr = repr(val.to_plaintext(None)) if len(valrepr) < 40: return valrepr else: return valrepr[:37]+'...' else: diff --git a/Legacy/epydoc-3.0.1/epydoc/checker.py b/Legacy/epydoc-3.0.1/epydoc/checker.py index 3bc41d05f..b708f8395 100644 --- a/Legacy/epydoc-3.0.1/epydoc/checker.py +++ b/Legacy/epydoc-3.0.1/epydoc/checker.py @@ -189,7 +189,7 @@ def check(self, *check_sets): self._check(checks) log.end_progress() - for (warning, docs) in self._warnings.items(): + for (warning, docs) in list(self._warnings.items()): docs = sorted(docs) docnames = '\n'.join([' - %s' % self._name(d) for d in docs]) log.warning('%s:\n%s' % (warning, docnames)) @@ -206,7 +206,7 @@ def _check(self, checks): if not isinstance(d, GenericValueDoc): docs.add(d) for doc in valdocs: if isinstance(doc, NamespaceDoc): - for d in doc.variables.values(): + for d in list(doc.variables.values()): if isinstance(d.value, GenericValueDoc): docs.add(d) for i, doc in enumerate(sorted(docs)): @@ -333,7 +333,7 @@ def _check_func(self, doc): else: args_with_descr = [] for arg, descr in doc.arg_descrs: - if isinstance(arg, basestring): + if isinstance(arg, str): args_with_descr.append(arg) else: args_with_descr += arg diff --git a/Legacy/epydoc-3.0.1/epydoc/cli.py b/Legacy/epydoc-3.0.1/epydoc/cli.py index fe2359ad9..d0294ed29 100644 --- a/Legacy/epydoc-3.0.1/epydoc/cli.py +++ b/Legacy/epydoc-3.0.1/epydoc/cli.py @@ -73,7 +73,7 @@ from epydoc.util import plaintext_to_html from epydoc.apidoc import UNKNOWN from epydoc.compat import * -import ConfigParser +import configparser from epydoc.docwriter.html_css import STYLESHEETS as CSS_STYLESHEETS # This module is only available if Docutils are in the system @@ -113,7 +113,7 @@ 'The following built-in CSS stylesheets are available:\n' + '\n'.join([' %10s: %s' % (key, descr) for (key, (sheet, descr)) - in CSS_STYLESHEETS.items()])), + in list(CSS_STYLESHEETS.items())])), #'checks': textwrap.dedent('''\ # # '''), @@ -122,7 +122,7 @@ HELP_TOPICS['topics'] = wordwrap( 'Epydoc can provide additional help for the following topics: ' + - ', '.join(['%r' % topic for topic in HELP_TOPICS.keys()])) + ', '.join(['%r' % topic for topic in list(HELP_TOPICS.keys())])) ###################################################################### #{ Argument & Config File Parsing @@ -423,16 +423,16 @@ def parse_arguments(): # --help [topic] if options.action == 'help': names = set([n.lower() for n in names]) - for (topic, msg) in HELP_TOPICS.items(): + for (topic, msg) in list(HELP_TOPICS.items()): if topic.lower() in names: - print '\n' + msg.rstrip() + '\n' + print('\n' + msg.rstrip() + '\n') sys.exit(0) optparser.print_help() sys.exit(0) # Print version message, if requested. if options.action == 'version': - print version + print(version) sys.exit(0) # Process any config files. @@ -440,7 +440,7 @@ def parse_arguments(): try: parse_configfiles(options.configfiles, options, names) except (KeyboardInterrupt,SystemExit): raise - except Exception, e: + except Exception as e: if len(options.configfiles) == 1: cf_name = 'config file %s' % options.configfiles[0] else: @@ -504,7 +504,7 @@ def parse_arguments(): return options, names def parse_configfiles(configfiles, options, names): - configparser = ConfigParser.ConfigParser() + configparser = configparser.ConfigParser() # ConfigParser.read() silently ignores errors, so open the files # manually (since we want to notify the user of any errors). for configfile in configfiles: @@ -681,7 +681,7 @@ def main(options, names): elif options.action == 'pdf': stages += [60,50] elif options.action == 'check': stages += [10] elif options.action == 'pickle': stages += [10] - else: raise ValueError, '%r not supported' % options.action + else: raise ValueError('%r not supported' % options.action) if options.parse and not options.introspect: del stages[1] # no merging if options.introspect and not options.parse: @@ -712,7 +712,7 @@ def main(options, names): if xlink is not None: try: xlink.ApiLinkReader.read_configuration(options, problematic=False) - except Exception, exc: + except Exception as exc: log.error("Error while configuring external API linking: %s: %s" % (exc.__class__.__name__, exc)) @@ -772,7 +772,7 @@ def main(options, names): for filename in options.pstat_files[1:]: profile_stats.add(filename) except KeyboardInterrupt: raise - except Exception, e: + except Exception as e: log.error("Error reading pstat file: %s" % e) profile_stats = None if profile_stats is not None: @@ -790,7 +790,7 @@ def main(options, names): elif options.action == 'pickle': write_pickle(docindex, options) else: - print >>sys.stderr, '\nUnsupported action %s!' % options.action + print('\nUnsupported action %s!' % options.action, file=sys.stderr) # If we suppressed docstring warnings, then let the user know. if logger is not None and logger.suppressed_docstring_warning: @@ -852,7 +852,7 @@ def pickle_persistent_load(identifier): """Helper for pickling, which allows us to save and restore UNKNOWN, which is required to be identical to apidoc.UNKNOWN.""" if identifier == 'UNKNOWN': return UNKNOWN - else: raise pickle.UnpicklingError, 'Invalid persistent id' + else: raise pickle.UnpicklingError('Invalid persistent id') _RERUN_LATEX_RE = re.compile(r'(?im)^LaTeX\s+Warning:\s+Label\(s\)\s+may' r'\s+have\s+changed.\s+Rerun') @@ -925,13 +925,13 @@ def write_latex(docindex, options, format): 'ps2pdf -sPAPERSIZE#letter -dMaxSubsetPct#100 ' '-dSubsetFonts#true -dCompatibilityLevel#1.2 ' '-dEmbedAllFonts#true api.ps api.pdf') - except RunSubprocessError, e: + except RunSubprocessError as e: if running == 'latex': e.out = re.sub(r'(?sm)\A.*?!( LaTeX Error:)?', r'', e.out) e.out = re.sub(r'(?sm)\s*Type X to quit.*', '', e.out) e.out = re.sub(r'(?sm)^! Emergency stop.*', '', e.out) log.error("%s failed: %s" % (running, (e.out+e.err).lstrip())) - except OSError, e: + except OSError as e: log.error("%s failed: %s" % (running, e)) finally: os.chdir(oldpath) @@ -945,9 +945,9 @@ def write_text(docindex, options): for apidoc in docindex.root: s += plaintext_writer.write(apidoc) log.end_progress() - if isinstance(s, unicode): + if isinstance(s, str): s = s.encode('ascii', 'backslashreplace') - print s + print(s) def check_docs(docindex, options): from epydoc.checker import DocChecker @@ -968,17 +968,17 @@ def cli(): except SystemExit: raise except KeyboardInterrupt: - print '\n\n' - print >>sys.stderr, 'Keyboard interrupt.' + print('\n\n') + print('Keyboard interrupt.', file=sys.stderr) except: if options.debug: raise - print '\n\n' + print('\n\n') exc_info = sys.exc_info() - if isinstance(exc_info[0], basestring): e = exc_info[0] + if isinstance(exc_info[0], str): e = exc_info[0] else: e = exc_info[1] - print >>sys.stderr, ('\nUNEXPECTED ERROR:\n' - '%s\n' % (str(e) or e.__class__.__name__)) - print >>sys.stderr, 'Use --debug to see trace information.' + print(('\nUNEXPECTED ERROR:\n' + '%s\n' % (str(e) or e.__class__.__name__)), file=sys.stderr) + print('Use --debug to see trace information.', file=sys.stderr) sys.exit(3) def _profile(): @@ -986,7 +986,7 @@ def _profile(): if PROFILER == 'hotshot': try: import hotshot, hotshot.stats except ImportError: - print >>sys.stderr, "Could not import profile module!" + print("Could not import profile module!", file=sys.stderr) return try: prof = hotshot.Profile('hotshot.out') @@ -995,7 +995,7 @@ def _profile(): pass prof.close() # Convert profile.hotshot -> profile.out - print 'Consolidating hotshot profiling info...' + print('Consolidating hotshot profiling info...') hotshot.stats.load('hotshot.out').dump_stats('profile.out') # Standard 'profile' profiler. @@ -1006,7 +1006,7 @@ def _profile(): except ImportError: try: from profile import Profile except ImportError: - print >>sys.stderr, "Could not import profile module!" + print("Could not import profile module!", file=sys.stderr) return # There was a bug in Python 2.4's profiler. Check if it's @@ -1015,8 +1015,8 @@ def _profile(): # 2005-September/047099.html>) if (hasattr(Profile, 'dispatch') and Profile.dispatch['c_exception'] is - Profile.trace_dispatch_exception.im_func): - trace_dispatch_return = Profile.trace_dispatch_return.im_func + Profile.trace_dispatch_exception.__func__): + trace_dispatch_return = Profile.trace_dispatch_return.__func__ Profile.dispatch['c_exception'] = trace_dispatch_return try: prof = Profile() @@ -1026,7 +1026,7 @@ def _profile(): prof.dump_stats('profile.out') else: - print >>sys.stderr, 'Unknown profiler %s' % PROFILER + print('Unknown profiler %s' % PROFILER, file=sys.stderr) return ###################################################################### @@ -1094,11 +1094,11 @@ def __init__(self, term_stream=sys.stdout): # Colors set_fg = self._tigetstr('setf') if set_fg: - for i,color in zip(range(len(self._COLORS)), self._COLORS): + for i,color in zip(list(range(len(self._COLORS))), self._COLORS): setattr(self, color, curses.tparm(set_fg, i) or '') set_fg_ansi = self._tigetstr('setaf') if set_fg_ansi: - for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS): + for i,color in zip(list(range(len(self._ANSICOLORS))), self._ANSICOLORS): setattr(self, color, curses.tparm(set_fg_ansi, i) or '') def _tigetstr(self, cap_name): @@ -1217,7 +1217,7 @@ def _report(self, message): # then make room for the message. if self._progress_mode == 'simple-bar': if self._progress is not None: - print + print() self._progress = None if self._progress_mode == 'bar': sys.stdout.write(self.term.CLEAR_LINE) @@ -1235,7 +1235,7 @@ def progress(self, percent, message=''): if self._progress_mode == 'list': if message: - print '[%3d%%] %s' % (100*percent, message) + print('[%3d%%] %s' % (100*percent, message)) sys.stdout.flush() elif self._progress_mode == 'bar': @@ -1307,7 +1307,7 @@ def start_progress(self, header=None): self._progress_start_time = time.time() self._progress_header = header if self._progress_mode != 'hide' and header: - print self.term.BOLD + header + self.term.NORMAL + print(self.term.BOLD + header + self.term.NORMAL) def end_progress(self): self.progress(1.) @@ -1317,24 +1317,24 @@ def end_progress(self): sys.stdout.write((self.term.CLEAR_EOL + '\n')*2 + self.term.CLEAR_EOL + self.term.UP*2) if self._progress_mode == 'simple-bar': - print ']' + print(']') self._progress = None self._task_times.append( (time.time()-self._progress_start_time, self._progress_header) ) def print_times(self): - print - print 'Timing summary:' + print() + print('Timing summary:') total = sum([time for (time, task) in self._task_times]) max_t = max([time for (time, task) in self._task_times]) for (time, task) in self._task_times: task = task[:31] - print ' %s%s %7.1fs' % (task, '.'*(35-len(task)), time), + print(' %s%s %7.1fs' % (task, '.'*(35-len(task)), time), end=' ') if self.term.COLS > 55: - print '|'+'=' * int((self.term.COLS-53) * time / max_t) + print('|'+'=' * int((self.term.COLS-53) * time / max_t)) else: - print - print + print() + print() class UnifiedProgressConsoleLogger(ConsoleLogger): def __init__(self, verbosity, stages, progress_mode=None): diff --git a/Legacy/epydoc-3.0.1/epydoc/docbuilder.py b/Legacy/epydoc-3.0.1/epydoc/docbuilder.py index 1e6918d6d..19d602b81 100644 --- a/Legacy/epydoc-3.0.1/epydoc/docbuilder.py +++ b/Legacy/epydoc-3.0.1/epydoc/docbuilder.py @@ -67,7 +67,7 @@ ## Imports ###################################################################### -import sys, os, os.path, __builtin__, imp, re, inspect +import sys, os, os.path, builtins, imp, re, inspect from epydoc.apidoc import * from epydoc.docintrospecter import introspect_docs from epydoc.docparser import parse_docs, ParseError @@ -99,7 +99,7 @@ def __init__(self, introspect=True, parse=True, and re.compile(exclude_introspect) or None) self._parse_regexp = (exclude_parse and re.compile(exclude_parse) or None) - except Exception, exc: + except Exception as exc: log.error('Error in regular expression pattern: %s' % exc) raise @@ -198,7 +198,7 @@ def build_doc_index(items, introspect=True, parse=True, add_submodules=True, options = BuildOptions(parse=parse, introspect=introspect, exclude_introspect=exclude_introspect, exclude_parse=exclude_parse, add_submodules=add_submodules) - except Exception, e: + except Exception as e: # log.error already reported by constructor. return None @@ -276,7 +276,7 @@ def build_doc_index(items, introspect=True, parse=True, add_submodules=True, # the value's variables' docstrings if (isinstance(val_doc, NamespaceDoc) and val_doc.variables not in (None, UNKNOWN)): - for var_doc in val_doc.variables.values(): + for var_doc in list(val_doc.variables.values()): # Now we have a chance to propagate the defining module # to objects for which introspection is not possible, # such as properties. @@ -343,7 +343,7 @@ def _get_docs_from_items(items, options): # Collect (introspectdoc, parsedoc) pairs for each item. doc_pairs = [] for item in items: - if isinstance(item, basestring): + if isinstance(item, str): if is_module_file(item): doc_pairs.append(_get_docs_from_module_file( item, options, progress_estimator)) @@ -412,7 +412,7 @@ def _get_docs_from_pyobject(obj, options, progress_estimator): introspect_error = parse_error = None try: introspect_doc = introspect_docs(value=obj) - except ImportError, e: + except ImportError as e: log.error(e) return (None, None) if options.parse: @@ -447,14 +447,14 @@ def _get_docs_from_pyname(name, options, progress_estimator, if options.must_introspect(name): try: introspect_doc = introspect_docs(name=name) - except ImportError, e: + except ImportError as e: introspect_error = str(e) if options.must_parse(name): try: parse_doc = parse_docs(name=name) - except ParseError, e: + except ParseError as e: parse_error = str(e) - except ImportError, e: + except ImportError as e: # If we get here, then there' probably no python source # available; don't bother to generate a warnining. pass @@ -478,14 +478,14 @@ def _get_docs_from_pyscript(filename, options, progress_estimator): introspect_doc = introspect_docs(filename=filename, is_script=True) if introspect_doc.canonical_name is UNKNOWN: introspect_doc.canonical_name = munge_script_name(filename) - except ImportError, e: + except ImportError as e: introspect_error = str(e) if options.parse: try: parse_doc = parse_docs(filename=filename, is_script=True) - except ParseError, e: + except ParseError as e: parse_error = str(e) - except ImportError, e: + except ImportError as e: parse_error = str(e) # Report any errors we encountered. @@ -541,15 +541,15 @@ def _get_docs_from_module_file(filename, options, progress_estimator, filename=filename, context=parent_docs[0]) if introspect_doc.canonical_name is UNKNOWN: introspect_doc.canonical_name = modulename - except ImportError, e: + except ImportError as e: introspect_error = str(e) if src_file_available and options.must_parse(modulename): try: parse_doc = parse_docs( filename=filename, context=parent_docs[1]) - except ParseError, e: + except ParseError as e: parse_error = str(e) - except ImportError, e: + except ImportError as e: parse_error = str(e) # Report any errors we encountered. @@ -586,11 +586,11 @@ def _get_docs_from_submodules(item, pkg_docs, options, progress_estimator): subpackage_dirs.add(filename) # Update our estimate of the number of modules in this package. - progress_estimator.revise_estimate(item, module_filenames.items(), + progress_estimator.revise_estimate(item, list(module_filenames.items()), subpackage_dirs) docs = [pkg_docs] - for module_filename in module_filenames.values(): + for module_filename in list(module_filenames.values()): d = _get_docs_from_module_file( module_filename, options, progress_estimator, pkg_docs) docs.append(d) @@ -868,8 +868,8 @@ def merge_docs(introspect_doc, parse_doc, cyclecheck=None, path=None): _merge_posargs_and_defaults(introspect_doc, parse_doc, path) # Merge the two api_doc's attributes. - for attrib in set(introspect_doc.__dict__.keys() + - parse_doc.__dict__.keys()): + for attrib in set(list(introspect_doc.__dict__.keys()) + + list(parse_doc.__dict__.keys())): # Be sure not to merge any private attributes (especially # __mergeset or __has_been_hashed!) if attrib.startswith('_'): continue @@ -934,7 +934,7 @@ def merge_attribute(attrib, introspect_doc, parse_doc, cyclecheck, path): def merge_variables(varlist1, varlist2, precedence, cyclecheck, path): # Merge all variables that are in both sets. - for varname, var1 in varlist1.items(): + for varname, var1 in list(varlist1.items()): var2 = varlist2.get(varname) if var2 is not None: var = merge_docs(var1, var2, cyclecheck, path+'.'+varname) @@ -942,7 +942,7 @@ def merge_variables(varlist1, varlist2, precedence, cyclecheck, path): varlist2[varname] = var # Copy any variables that are not in varlist1 over. - for varname, var in varlist2.items(): + for varname, var in list(varlist2.items()): varlist1.setdefault(varname, var) return varlist1 @@ -1135,7 +1135,7 @@ def assign_canonical_names(val_doc, name, docindex, score=0): val_doc.canonical_name is not UNKNOWN): # If this is the first time we've seen val_doc, and it # already has a name, then don't change that name. - _name_scores[val_doc] = sys.maxint + _name_scores[val_doc] = sys.maxsize name = val_doc.canonical_name score = 0 else: @@ -1148,7 +1148,7 @@ def assign_canonical_names(val_doc, name, docindex, score=0): # Recurse to any contained values. if isinstance(val_doc, NamespaceDoc): - for var_doc in val_doc.variables.values(): + for var_doc in list(val_doc.variables.values()): # Set the variable's canonical name. varname = DottedName(name, var_doc.name) var_doc.canonical_name = varname @@ -1252,7 +1252,7 @@ def find_overrides(class_doc): for base_class in list(class_doc.mro(warn_about_bad_bases=True)): if base_class == class_doc: continue if base_class.variables is UNKNOWN: continue - for name, var_doc in base_class.variables.items(): + for name, var_doc in list(base_class.variables.items()): if ( not (name.startswith('__') and not name.endswith('__')) and base_class == var_doc.container and name in class_doc.variables and @@ -1274,7 +1274,7 @@ def inherit_docs(class_doc): # Inherit any variables. if base_class.variables is UNKNOWN: continue - for name, var_doc in base_class.variables.items(): + for name, var_doc in list(base_class.variables.items()): # If it's a __private variable, then don't inherit it. if name.startswith('__') and not name.endswith('__'): continue diff --git a/Legacy/epydoc-3.0.1/epydoc/docintrospecter.py b/Legacy/epydoc-3.0.1/epydoc/docintrospecter.py index cbbbb5610..f4c176a77 100644 --- a/Legacy/epydoc-3.0.1/epydoc/docintrospecter.py +++ b/Legacy/epydoc-3.0.1/epydoc/docintrospecter.py @@ -37,7 +37,7 @@ # For extracting encoding for docstrings: import epydoc.docparser # Builtin values -import __builtin__ +import builtins # Backwards compatibility from epydoc.compat import * @@ -196,11 +196,11 @@ def introspect_module(module, module_doc, module_name=None, preliminary=False): # Record the module's docformat if hasattr(module, '__docformat__'): - module_doc.docformat = unicode(module.__docformat__) + module_doc.docformat = str(module.__docformat__) # Record the module's filename if hasattr(module, '__file__'): - try: module_doc.filename = unicode(module.__file__) + try: module_doc.filename = str(module.__file__) except KeyboardInterrupt: raise except: pass if module_doc.filename is not UNKNOWN: @@ -222,7 +222,7 @@ def introspect_module(module, module_doc, module_name=None, preliminary=False): # package; so set is_package=True and record its __path__. if hasattr(module, '__path__'): module_doc.is_package = True - try: module_doc.path = [unicode(p) for p in module.__path__] + try: module_doc.path = [str(p) for p in module.__path__] except KeyboardInterrupt: raise except: pass else: @@ -388,7 +388,7 @@ def introspect_class(cls, class_doc, module_name=None): class_doc.variables = {} if hasattr(cls, '__dict__'): private_prefix = '_%s__' % getattr(cls, '__name__', '') - for child_name, child in cls.__dict__.items(): + for child_name, child in list(cls.__dict__.items()): if (child_name in base_children and base_children[child_name] == child): continue @@ -418,11 +418,11 @@ def introspect_routine(routine, routine_doc, module_name=None): # Extract the underying function if isinstance(routine, MethodType): - func = routine.im_func + func = routine.__func__ elif isinstance(routine, staticmethod): func = routine.__get__(0) elif isinstance(routine, classmethod): - func = routine.__get__(0).im_func + func = routine.__get__(0).__func__ else: func = routine @@ -447,13 +447,13 @@ def introspect_routine(routine, routine_doc, module_name=None): routine_doc.posarg_defaults[i+offset] = default_val # If it's a bound method, then strip off the first argument. - if isinstance(routine, MethodType) and routine.im_self is not None: + if isinstance(routine, MethodType) and routine.__self__ is not None: routine_doc.posargs = routine_doc.posargs[1:] routine_doc.posarg_defaults = routine_doc.posarg_defaults[1:] # Set the routine's line number. if hasattr(func, 'func_code'): - routine_doc.lineno = func.func_code.co_firstlineno + routine_doc.lineno = func.__code__.co_firstlineno else: # [XX] I should probably use UNKNOWN here?? @@ -561,10 +561,10 @@ def get_docstring(value, module_name=None): docstring = getattr(value, '__doc__', None) if docstring is None: return None - elif isinstance(docstring, unicode): + elif isinstance(docstring, str): return docstring elif isinstance(docstring, str): - try: return unicode(docstring, 'ascii') + try: return str(docstring, 'ascii') except UnicodeDecodeError: if module_name is None: module_name = get_containing_module(value) @@ -573,7 +573,7 @@ def get_docstring(value, module_name=None): module = get_value_from_name(module_name) filename = py_src_filename(module.__file__) encoding = epydoc.docparser.get_module_encoding(filename) - return unicode(docstring, encoding) + return str(docstring, encoding) except KeyboardInterrupt: raise except Exception: pass if hasattr(value, '__name__'): name = value.__name__ @@ -581,7 +581,7 @@ def get_docstring(value, module_name=None): log.warning("%s's docstring is not a unicode string, but it " "contains non-ascii data -- treating it as " "latin-1." % name) - return unicode(docstring, 'latin-1') + return str(docstring, 'latin-1') return None elif value is BuiltinMethodType: # Don't issue a warning for this special case. @@ -630,15 +630,15 @@ def get_canonical_name(value, strict=False): dotted_name = DottedName(value.__module__, value.__name__, strict=strict) - elif (inspect.ismethod(value) and value.im_self is not None and - value.im_class is ClassType and + elif (inspect.ismethod(value) and value.__self__ is not None and + value.__self__.__class__ is ClassType and not value.__name__.startswith('<')): # class method. - class_name = get_canonical_name(value.im_self) + class_name = get_canonical_name(value.__self__) if class_name is UNKNOWN: return UNKNOWN dotted_name = DottedName(class_name, value.__name__, strict=strict) elif (inspect.ismethod(value) and not value.__name__.startswith('<')): - class_name = get_canonical_name(value.im_class) + class_name = get_canonical_name(value.__self__.__class__) if class_name is UNKNOWN: return UNKNOWN dotted_name = DottedName(class_name, value.__name__, strict=strict) elif (isinstance(value, FunctionType) and @@ -689,11 +689,11 @@ def get_containing_module(value): return DottedName(value.__name__) elif isclass(value): return DottedName(value.__module__) - elif (inspect.ismethod(value) and value.im_self is not None and - value.im_class is ClassType): # class method. - return DottedName(value.im_self.__module__) + elif (inspect.ismethod(value) and value.__self__ is not None and + value.__self__.__class__ is ClassType): # class method. + return DottedName(value.__self__.__module__) elif inspect.ismethod(value): - return DottedName(value.im_class.__module__) + return DottedName(value.__self__.__class__.__module__) elif inspect.isroutine(value): module = _find_function_module(value) if module is None: return None @@ -720,10 +720,10 @@ def _find_function_module(func): # a couple special cases (including using epydoc to document # itself). In particular, if a module gets loaded twice, using # two different names for the same file, then this helps. - for module in sys.modules.values(): + for module in list(sys.modules.values()): if (hasattr(module, '__dict__') and hasattr(func, 'func_globals') and - func.func_globals is module.__dict__): + func.__globals__ is module.__dict__): return module.__name__ return None @@ -863,8 +863,8 @@ def get_value_from_name(name, globs=None): # the requested name refers to a builtin. try: module = _import(name[0]) - except ImportError, e: - if globs is None: globs = __builtin__.__dict__ + except ImportError as e: + if globs is None: globs = builtins.__dict__ if name[0] in globs: try: return _lookup(globs[name[0]], name[1:]) except: raise e @@ -900,7 +900,7 @@ def _import(name, filename=None): # explicitly store sys.path. old_sys = sys.__dict__.copy() old_sys_path = sys.path[:] - old_builtins = __builtin__.__dict__.copy() + old_builtins = builtins.__dict__.copy() # Add the current directory to sys.path, in case they're trying to # import a module by name that resides in the current directory. @@ -935,8 +935,8 @@ def _import(name, filename=None): raise ImportError(estr) finally: # Restore the important values that we saved. - __builtin__.__dict__.clear() - __builtin__.__dict__.update(old_builtins) + builtins.__dict__.clear() + builtins.__dict__.update(old_builtins) sys.__dict__.clear() sys.__dict__.update(old_sys) sys.path = old_sys_path @@ -983,7 +983,7 @@ def read(self, size=0): return '' def readline(self, size=0): return '' def readlines(self, sizehint=0): return [] def seek(self, offset, whence=0): pass - def tell(self): return 0L + def tell(self): return 0 def truncate(self, size=0): pass def write(self, str): pass def writelines(self, sequence): pass diff --git a/Legacy/epydoc-3.0.1/epydoc/docparser.py b/Legacy/epydoc-3.0.1/epydoc/docparser.py index b52e226d5..f93762d72 100644 --- a/Legacy/epydoc-3.0.1/epydoc/docparser.py +++ b/Legacy/epydoc-3.0.1/epydoc/docparser.py @@ -64,7 +64,7 @@ # API documentation encoding: from epydoc.apidoc import * # For looking up the docs of builtins: -import __builtin__, exceptions +import builtins, exceptions import epydoc.docintrospecter # Misc utility functions: from epydoc.util import * @@ -227,7 +227,7 @@ def parse_docs(filename=None, name=None, context=None, is_script=False): # Use a python source version, if possible. if not is_script: try: filename = py_src_filename(filename) - except ValueError, e: raise ImportError('%s' % e) + except ValueError as e: raise ImportError('%s' % e) # Check the cache, first. if filename in _moduledoc_cache: @@ -276,12 +276,12 @@ def parse_docs(filename=None, name=None, context=None, is_script=False): # Tokenize & process the contents of the module's source file. try: process_file(module_doc) - except tokenize.TokenError, e: + except tokenize.TokenError as e: msg, (srow, scol) = e.args raise ParseError('Error during parsing: %s ' '(%s, line %d, char %d)' % (msg, module_doc.filename, srow, scol)) - except IndentationError, e: + except IndentationError as e: raise ParseError('Error during parsing: %s (%s)' % (e, module_doc.filename)) @@ -322,7 +322,7 @@ def handle_special_module_vars(module_doc): if toktree is not None: try: public_names = set(parse_string_list(toktree)) - for name, var_doc in module_doc.variables.items(): + for name, var_doc in list(module_doc.variables.items()): if name in public_names: var_doc.is_public = True if not isinstance(var_doc, ModuleDoc): @@ -331,7 +331,7 @@ def handle_special_module_vars(module_doc): var_doc.is_public = False except ParseError: # If we couldn't parse the list, give precedence to introspection. - for name, var_doc in module_doc.variables.items(): + for name, var_doc in list(module_doc.variables.items()): if not isinstance(var_doc, ModuleDoc): var_doc.is_imported = UNKNOWN del module_doc.variables['__all__'] @@ -442,7 +442,7 @@ def _get_filename(identifier, path=None): fp, filename, (s,m,typ) = imp.find_module(identifier, path) if fp is not None: fp.close() except ImportError: - raise ImportError, 'No Python source file found.' + raise ImportError('No Python source file found.') if typ == imp.PY_SOURCE: return filename @@ -450,21 +450,21 @@ def _get_filename(identifier, path=None): # See if we can find a corresponding non-compiled version. filename = re.sub('.py\w$', '.py', filename) if not os.path.exists(filename): - raise ImportError, 'No Python source file found.' + raise ImportError('No Python source file found.') return filename elif typ == imp.PKG_DIRECTORY: filename = os.path.join(filename, '__init__.py') if not os.path.exists(filename): filename = os.path.join(filename, '__init__.pyw') if not os.path.exists(filename): - raise ImportError, 'No package file found.' + raise ImportError('No package file found.') return filename elif typ == imp.C_BUILTIN: - raise ImportError, 'No Python source file for builtin modules.' + raise ImportError('No Python source file for builtin modules.') elif typ == imp.C_EXTENSION: - raise ImportError, 'No Python source file for c extensions.' + raise ImportError('No Python source file for c extensions.') else: - raise ImportError, 'No Python source file found.' + raise ImportError('No Python source file found.') #///////////////////////////////////////////////////////////////// #{ File tokenization loop @@ -536,7 +536,7 @@ def process_file(module_doc): for toktype, toktext, (srow,scol), (erow,ecol), line_str in tok_iter: # BOM encoding marker: ignore. if (toktype == token.ERRORTOKEN and - (toktext == u'\ufeff' or + (toktext == '\ufeff' or toktext.encode(encoding) == '\xef\xbb\xbf')): pass @@ -629,12 +629,12 @@ def process_file(module_doc): prev_line_doc = process_line( shallow_parse(line_toks), parent_docs, prev_line_doc, lineno, comments, decorators, encoding) - except ParseError, e: + except ParseError as e: raise ParseError('Error during parsing: invalid ' 'syntax (%s, line %d) -- %s' % (module_doc.filename, lineno, e)) - except KeyboardInterrupt, e: raise - except Exception, e: + except KeyboardInterrupt as e: raise + except Exception as e: log.error('Internal error during parsing (%s, line ' '%s):\n%s' % (module_doc.filename, lineno, e)) raise @@ -673,7 +673,7 @@ def add_to_group(container, api_doc, group_name): if isinstance(api_doc, VariableDoc): var_name = api_doc.name else: - if api_doc.canonical_name is UNKNOWN: log.debug('ouch', `api_doc`) + if api_doc.canonical_name is UNKNOWN: log.debug('ouch', repr(api_doc)) var_name = api_doc.canonical_name[-1] for (name, group_vars) in container.group_specs: @@ -919,7 +919,7 @@ def _process_fromstar_import(src, parent_docs): try: module_doc = _find(src) except ImportError: module_doc = None if isinstance(module_doc, ModuleDoc): - for name, imp_var in module_doc.variables.items(): + for name, imp_var in list(module_doc.variables.items()): # [xx] this is not exactly correct, but close. It # does the wrong thing if a __var__ is explicitly # listed in __all__. @@ -1460,7 +1460,7 @@ def apply_decorator(decorator_name, func_doc): elif DEFAULT_DECORATOR_BEHAVIOR == 'opaque': return GenericValueDoc(docs_extracted_by='parser') else: - raise ValueError, 'Bad value for DEFAULT_DECORATOR_BEHAVIOR' + raise ValueError('Bad value for DEFAULT_DECORATOR_BEHAVIOR') def init_arglist(func_doc, arglist): if not isinstance(arglist, list) or arglist[0] != (token.OP, '('): @@ -1547,7 +1547,7 @@ def process_classdef(line, parent_docs, prev_line_doc, lineno, try: for base_name in parse_classdef_bases(line[2]): class_doc.bases.append(find_base(base_name, parent_docs)) - except ParseError, e: + except ParseError as e: log.warning("Unable to extract the base list for %s: %s" % (canonical_name, e)) class_doc.bases = UNKNOWN @@ -1849,7 +1849,7 @@ def set_variable(namespace, var_doc, preserve_docstring=False): # This happens when the class definition has not been parsed, e.g. in # sf bug #1693253 on ``Exception.x = y`` if namespace.sort_spec is UNKNOWN: - namespace.sort_spec = namespace.variables.keys() + namespace.sort_spec = list(namespace.variables.keys()) # If we already have a variable with this name, then remove the # old VariableDoc from the sort_spec list; and if we gave its @@ -1900,7 +1900,7 @@ def lookup_name(identifier, parent_docs): # nested scopes, because nested scope lookup does not apply to # nested class definitions, and we're not worried about variables # in nested functions. - if not isinstance(identifier, basestring): + if not isinstance(identifier, str): raise TypeError('identifier must be a string') # Locals diff --git a/Legacy/epydoc-3.0.1/epydoc/docstringparser.py b/Legacy/epydoc-3.0.1/epydoc/docstringparser.py index b609bc9d2..e416d9639 100644 --- a/Legacy/epydoc-3.0.1/epydoc/docstringparser.py +++ b/Legacy/epydoc-3.0.1/epydoc/docstringparser.py @@ -37,7 +37,7 @@ from epydoc.util import py_src_filename from epydoc import log import epydoc.docparser -import __builtin__, exceptions +import builtins, exceptions ###################################################################### # Docstring Fields @@ -242,14 +242,14 @@ def parse_docstring(api_doc, docindex, suppress_warnings=[]): try: process_field(init_api_doc, docindex, field.tag(), field.arg(), field.body()) - except ValueError, e: field_warnings.append(str(e)) + except ValueError as e: field_warnings.append(str(e)) # Process fields for field in fields: try: process_field(api_doc, docindex, field.tag(), field.arg(), field.body()) - except ValueError, e: field_warnings.append(str(e)) + except ValueError as e: field_warnings.append(str(e)) # Check to make sure that all type parameters correspond to # some documented parameter. @@ -303,7 +303,7 @@ def add_metadata_from_var(api_doc, field): value = [] # Try extracting the value from the pyval. - ok_types = (basestring, int, float, bool, type(None)) + ok_types = (str, int, float, bool, type(None)) if val_doc.pyval is not UNKNOWN: if isinstance(val_doc.pyval, ok_types): value = [val_doc.pyval] @@ -329,7 +329,7 @@ def add_metadata_from_var(api_doc, field): if isinstance(elt, str): elt = decode_with_backslashreplace(elt) else: - elt = unicode(elt) + elt = str(elt) elt = epytext.ParsedEpytextDocstring( epytext.parse_as_para(elt), inline=True) @@ -480,7 +480,7 @@ def report_errors(api_doc, docindex, parse_errors, field_warnings): if isinstance(api_doc, ValueDoc) and api_doc != module: if module not in (None, UNKNOWN) and module.pyval is exceptions: return - for builtin_val in __builtin__.__dict__.values(): + for builtin_val in list(builtins.__dict__.values()): if builtin_val is api_doc.pyval: return @@ -516,7 +516,7 @@ def report_errors(api_doc, docindex, parse_errors, field_warnings): error.set_linenum_offset(startline) message = error.descr() messages.setdefault(message, []).append(error.linenum()) - message_items = messages.items() + message_items = list(messages.items()) message_items.sort(lambda a,b:cmp(min(a[1]), min(b[1]))) for message, linenums in message_items: linenums = [n for n in linenums if n is not None] @@ -667,7 +667,7 @@ def process_undocumented_field(api_doc, docindex, tag, arg, descr): _check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=False) for ident in _descr_to_identifiers(descr): var_name_re = re.compile('^%s$' % ident.replace('*', '(.*)')) - for var_name, var_doc in api_doc.variables.items(): + for var_name, var_doc in list(api_doc.variables.items()): if var_name_re.match(var_name): # Remove the variable from `variables`. api_doc.variables.pop(var_name, None) @@ -707,7 +707,7 @@ def process_deffield_field(api_doc, docindex, tag, arg, descr): docstring_field = _descr_to_docstring_field(arg, descr) docstring_field.varnames.append("__%s__" % arg) api_doc.extra_docstring_fields.append(docstring_field) - except ValueError, e: + except ValueError as e: raise ValueError('Bad %s: %s' % (tag, e)) def process_raise_field(api_doc, docindex, tag, arg, descr): @@ -936,7 +936,7 @@ def unindent_docstring(docstring): lines = docstring.expandtabs().split('\n') # Find minimum indentation of any non-blank lines after first line. - margin = sys.maxint + margin = sys.maxsize for line in lines[1:]: content = len(line.lstrip()) if content: @@ -945,7 +945,7 @@ def unindent_docstring(docstring): # Remove indentation. if lines: lines[0] = lines[0].lstrip() - if margin < sys.maxint: + if margin < sys.maxsize: for i in range(1, len(lines)): lines[i] = lines[i][margin:] # Remove any trailing (but not leading!) blank lines. while lines and not lines[-1]: @@ -976,7 +976,7 @@ def _descr_to_identifiers(descr): idents = descr.to_plaintext(None).strip() idents = re.sub(r'\s+', ' ', idents) if not _IDENTIFIER_LIST_REGEXP.match(idents): - raise ValueError, 'Bad Identifier list: %r' % idents + raise ValueError('Bad Identifier list: %r' % idents) rval = re.split('[:;, ] *', idents) return rval @@ -985,7 +985,7 @@ def _descr_to_docstring_field(arg, descr): descr = descr.to_plaintext(None).strip() args = re.split('[:;,] *', descr) if len(args) == 0 or len(args) > 3: - raise ValueError, 'Wrong number of arguments' + raise ValueError('Wrong number of arguments') singular = args[0] if len(args) >= 2: plural = args[1] else: plural = None diff --git a/Legacy/epydoc-3.0.1/epydoc/docwriter/dotgraph.py b/Legacy/epydoc-3.0.1/epydoc/docwriter/dotgraph.py index b7128d31f..fbe4a615c 100644 --- a/Legacy/epydoc-3.0.1/epydoc/docwriter/dotgraph.py +++ b/Legacy/epydoc-3.0.1/epydoc/docwriter/dotgraph.py @@ -115,7 +115,7 @@ def __init__(self, title, body='', node_defaults=None, have the same uid.""" # Encode the title, if necessary. - if isinstance(self.title, unicode): + if isinstance(self.title, str): self.title = self.title.encode('ascii', 'xmlcharrefreplace') # Make sure the UID isn't too long. @@ -247,7 +247,7 @@ def _run_dot(self, *options): result, err = run_subprocess((DOT_COMMAND,)+options, self.to_dotfile()) if err: log.warning("Graphviz dot warning(s):\n%s" % err) - except OSError, e: + except OSError as e: log.warning("Unable to render Graphviz dot graph:\n%s" % e) #log.debug(self.to_dotfile()) return None @@ -261,9 +261,9 @@ def to_dotfile(self): """ lines = ['digraph %s {' % self.uid, 'node [%s]' % ','.join(['%s="%s"' % (k,v) for (k,v) - in self.node_defaults.items()]), + in list(self.node_defaults.items())]), 'edge [%s]' % ','.join(['%s="%s"' % (k,v) for (k,v) - in self.edge_defaults.items()])] + in list(self.edge_defaults.items())])] if self.body: lines.append(self.body) lines.append('/* Nodes */') @@ -275,7 +275,7 @@ def to_dotfile(self): lines.append('}') # Default dot input encoding is UTF-8 - return u'\n'.join(lines).encode('utf-8') + return '\n'.join(lines).encode('utf-8') class DotGraphNode: _next_id = 0 @@ -304,7 +304,7 @@ def to_dotfile(self): """ Return the dot commands that should be used to render this node. """ - attribs = ['%s="%s"' % (k,v) for (k,v) in self._attribs.items() + attribs = ['%s="%s"' % (k,v) for (k,v) in list(self._attribs.items()) if v is not None] if self._html_label: attribs.insert(0, 'label=<%s>' % (self._html_label,)) @@ -341,7 +341,7 @@ def to_dotfile(self): if (self.end.port is not None and 'tailport' not in attribs): attribs['tailport'] = self.end.port # Convert attribs to a string - attribs = ','.join(['%s="%s"' % (k,v) for (k,v) in attribs.items() + attribs = ','.join(['%s="%s"' % (k,v) for (k,v) in list(attribs.items()) if v is not None]) if attribs: attribs = ' [%s]' % attribs # Return the dotfile edge. @@ -791,7 +791,7 @@ def _get_html_label(self): attributes, operations, qualifiers) def to_dotfile(self): - attribs = ['%s="%s"' % (k,v) for (k,v) in self._attribs.items()] + attribs = ['%s="%s"' % (k,v) for (k,v) in list(self._attribs.items())] attribs.append('label=<%s>' % self._get_html_label()) s = 'node%d%s' % (self.id, ' [%s]' % (','.join(attribs))) if not self.collapsed: @@ -929,7 +929,7 @@ def _color(self, package, depth): return '#%06x' % ((red<<16)+(green<<8)+blue) def to_dotfile(self): - attribs = ['%s="%s"' % (k,v) for (k,v) in self._attribs.items()] + attribs = ['%s="%s"' % (k,v) for (k,v) in list(self._attribs.items())] label, depth, width = self._get_html_label(self.module_doc) attribs.append('label=<%s>' % label) return 'node%d%s' % (self.id, ' [%s]' % (','.join(attribs))) @@ -1100,7 +1100,7 @@ def uml_class_tree_graph(class_doc, linker, context=None, **options): # Only show variables in the class where they're defined for # *class_doc*. mro = class_doc.mro() - for name, var in class_doc.variables.items(): + for name, var in list(class_doc.variables.items()): i = mro.index(var.container) for base in mro[i+1:]: if base.pyval is object: continue # don't include `object`. @@ -1120,7 +1120,7 @@ def uml_class_tree_graph(class_doc, linker, context=None, **options): # Turn attributes into links. if options.get('link_attributes', True): - for node in nodes.values(): + for node in list(nodes.values()): node.link_attributes(nodes) # Make sure that none of the new attribute edges break the # rank ordering assigned by inheritance. @@ -1131,7 +1131,7 @@ def uml_class_tree_graph(class_doc, linker, context=None, **options): # Construct the graph. graph = DotGraph('UML class diagram for %s' % class_doc.canonical_name, body='ranksep=.2\n;nodesep=.3\n') - graph.nodes = nodes.values() + graph.nodes = list(nodes.values()) # Add inheritance edges. for node in inheritance_nodes: @@ -1200,7 +1200,7 @@ def call_graph(api_docs, docindex, linker, context=None, **options): if isinstance(api_doc, RoutineDoc): functions.append(api_doc) elif isinstance(api_doc, NamespaceDoc): - for vardoc in api_doc.variables.values(): + for vardoc in list(api_doc.variables.values()): if isinstance(vardoc.value, RoutineDoc): functions.append(vardoc.value) @@ -1258,7 +1258,7 @@ def get_dot_version(): _dot_version = [int(x) for x in m.group(1).split('.')] else: _dot_version = (0,) - except OSError, e: + except OSError as e: _dot_version = (0,) log.info('Detected dot version %s' % _dot_version) return _dot_version diff --git a/Legacy/epydoc-3.0.1/epydoc/docwriter/html.py b/Legacy/epydoc-3.0.1/epydoc/docwriter/html.py index e7791ef90..40d25c601 100644 --- a/Legacy/epydoc-3.0.1/epydoc/docwriter/html.py +++ b/Legacy/epydoc-3.0.1/epydoc/docwriter/html.py @@ -15,8 +15,8 @@ __docformat__ = 'epytext en' import re, os, sys, codecs, sre_constants, pprint, base64 -import urllib -import __builtin__ +import urllib.request, urllib.parse, urllib.error +import builtins from epydoc.apidoc import * import epydoc.docstringparser import time, epydoc, epydoc.markup, epydoc.markup.epytext @@ -147,7 +147,7 @@ def compile_template(docstring, template_string, #log.debug(pysrc) if debug: localdict = {'__debug': (pysrc_lines, func_name)} else: localdict = {} - try: exec pysrc in globals(), localdict + try: exec(pysrc, globals(), localdict) except SyntaxError: log.error('Error in script:\n' + pysrc + '\n') raise @@ -162,7 +162,7 @@ def strip_indent(s): that amount of indentation from all lines in C{s}. """ # Strip indentation from the template. - minindent = sys.maxint + minindent = sys.maxsize lines = s.split('\n') for line in lines: stripline = line.lstrip() @@ -378,7 +378,7 @@ def __init__(self, docindex, **kwargs): # Make sure inheritance has a sane value. if self._inheritance not in ('listed', 'included', 'grouped'): - raise ValueError, 'Bad value for inheritance' + raise ValueError('Bad value for inheritance') # Create the project homepage link, if it was not specified. if (self._prj_name or self._prj_url) and not self._prj_link: @@ -422,7 +422,7 @@ def __init__(self, docindex, **kwargs): if isinstance(doc, NamespaceDoc): # add any vars with generic values; but don't include # inherited vars. - self.indexed_docs += [d for d in doc.variables.values() if + self.indexed_docs += [d for d in list(doc.variables.values()) if isinstance(d.value, GenericValueDoc) and d.container == doc] self.indexed_docs.sort() @@ -629,15 +629,15 @@ def write(self, directory=None): self._write(self.write_toc, directory, 'toc.html') self._write(self.write_project_toc, directory, 'toc-everything.html') for doc in self.module_list: - filename = 'toc-%s' % urllib.unquote(self.url(doc)) + filename = 'toc-%s' % urllib.parse.unquote(self.url(doc)) self._write(self.write_module_toc, directory, filename, doc) # Write the object documentation. for doc in self.module_list: - filename = urllib.unquote(self.url(doc)) + filename = urllib.parse.unquote(self.url(doc)) self._write(self.write_module, directory, filename, doc) for doc in self.class_list: - filename = urllib.unquote(self.url(doc)) + filename = urllib.parse.unquote(self.url(doc)) self._write(self.write_class, directory, filename, doc) # Write source code files. @@ -651,11 +651,11 @@ def write(self, directory=None): name = api_doc.canonical_name[-1] name_to_docs.setdefault(name, []).append(api_doc) # Sort each entry of the name_to_docs list. - for doc_list in name_to_docs.values(): + for doc_list in list(name_to_docs.values()): doc_list.sort() # Write the source code for each module. for doc in self.modules_with_sourcecode: - filename = urllib.unquote(self.pysrc_url(doc)) + filename = urllib.parse.unquote(self.pysrc_url(doc)) self._write(self.write_sourcecode, directory, filename, doc, name_to_docs) @@ -672,17 +672,17 @@ def write(self, directory=None): self.write_homepage(directory) # Don't report references to builtins as missing - for k in self._failed_xrefs.keys(): # have a copy of keys + for k in list(self._failed_xrefs.keys()): # have a copy of keys if hasattr(__builtin__, k): del self._failed_xrefs[k] # Report any failed crossreferences if self._failed_xrefs: estr = 'Failed identifier crossreference targets:\n' - failed_identifiers = self._failed_xrefs.keys() + failed_identifiers = list(self._failed_xrefs.keys()) failed_identifiers.sort() for identifier in failed_identifiers: - names = self._failed_xrefs[identifier].keys() + names = list(self._failed_xrefs[identifier].keys()) names.sort() estr += '- %s' % identifier estr += '\n' @@ -1292,7 +1292,7 @@ def write_project_toc(self, out): # List the functions. funcs = [d for d in self.routine_list if not isinstance(self.docindex.container(d), - (ClassDoc, types.NoneType))] + (ClassDoc, type(None)))] self.write_toc_section(out, "All Functions", funcs) # List the variables. @@ -1444,15 +1444,15 @@ def write_css(self, directory, cssname): def write_javascript(self, directory): jsfile = open(os.path.join(directory, 'epydoc.js'), 'w') - print >> jsfile, self.TOGGLE_PRIVATE_JS - print >> jsfile, self.SHOW_PRIVATE_JS - print >> jsfile, self.GET_COOKIE_JS - print >> jsfile, self.SET_FRAME_JS - print >> jsfile, self.HIDE_PRIVATE_JS - print >> jsfile, self.TOGGLE_CALLGRAPH_JS - print >> jsfile, html_colorize.PYSRC_JAVASCRIPTS - print >> jsfile, self.GET_ANCHOR_JS - print >> jsfile, self.REDIRECT_URL_JS + print(self.TOGGLE_PRIVATE_JS, file=jsfile) + print(self.SHOW_PRIVATE_JS, file=jsfile) + print(self.GET_COOKIE_JS, file=jsfile) + print(self.SET_FRAME_JS, file=jsfile) + print(self.HIDE_PRIVATE_JS, file=jsfile) + print(self.TOGGLE_CALLGRAPH_JS, file=jsfile) + print(html_colorize.PYSRC_JAVASCRIPTS, file=jsfile) + print(self.GET_ANCHOR_JS, file=jsfile) + print(self.REDIRECT_URL_JS, file=jsfile) jsfile.close() #: A javascript that is used to show or hide the API documentation @@ -1651,7 +1651,7 @@ def render_callgraph(self, callgraph, token=""): """ if callgraph is None: return "" - if isinstance(callgraph, basestring): + if isinstance(callgraph, str): uid = callgraph rv = self._callgraph_cache.get(callgraph, "") @@ -1692,7 +1692,7 @@ def callgraph_link(self, callgraph, token=""): # Use class=codelink, to match style w/ the source code link. if callgraph is None: return '' - if isinstance(callgraph, basestring): + if isinstance(callgraph, str): uid = callgraph else: uid = callgraph.uid @@ -1718,7 +1718,7 @@ def callgraph_link(self, callgraph, token=""): } def write_images(self, directory): - for (name, data) in self.IMAGES.items(): + for (name, data) in list(self.IMAGES.items()): f = open(os.path.join(directory, name), 'wb') f.write(base64.decodestring(data)) f.close() @@ -2701,7 +2701,7 @@ def func_arg(self, name, default, css_class): return s def _arg_name(self, arg): - if isinstance(arg, basestring): + if isinstance(arg, str): return arg elif len(arg) == 1: return '(%s,)' % self._arg_name(arg[0]) @@ -2841,7 +2841,7 @@ def write_standard_fields(self, out, doc): out('
') for field in fields: if field.takes_arg: - for arg, descrs in field_values[field].items(): + for arg, descrs in list(field_values[field].items()): self.write_standard_field(out, doc, field, descrs, arg) else: @@ -2963,7 +2963,7 @@ def build_metadata_index(self, field_name): for (field, arg, descr) in doc.metadata: if field.tags[0] == field_name: descrs.setdefault(arg, []).append(descr) - for (arg, descr_list) in descrs.iteritems(): + for (arg, descr_list) in descrs.items(): index.setdefault(arg, []).append( (doc, descr_list) ) return index @@ -3069,13 +3069,13 @@ def write_api_list(self, out): skip = (ModuleDoc, ClassDoc, type(UNKNOWN)) for val_doc in self.module_list: self.write_url_record(out, val_doc) - for var in val_doc.variables.itervalues(): + for var in val_doc.variables.values(): if not isinstance(var.value, skip): self.write_url_record(out, var) for val_doc in self.class_list: self.write_url_record(out, val_doc) - for var in val_doc.variables.itervalues(): + for var in val_doc.variables.values(): self.write_url_record(out, var) def write_url_record(self, out, obj): @@ -3091,7 +3091,7 @@ def _val_is_public(self, valdoc): """Make a best-guess as to whether the given class is public.""" container = self.docindex.container(valdoc) if isinstance(container, NamespaceDoc): - for vardoc in container.variables.values(): + for vardoc in list(container.variables.values()): if vardoc in (UNKNOWN, None): continue if vardoc.value is valdoc: return vardoc.is_public @@ -3176,11 +3176,11 @@ def _url(self, obj): # Module: -module.html if isinstance(obj, ModuleDoc): if obj not in self.module_set: return None - return urllib.quote('%s'%obj.canonical_name) + '-module.html' + return urllib.parse.quote('%s'%obj.canonical_name) + '-module.html' # Class: -class.html elif isinstance(obj, ClassDoc): if obj not in self.class_set: return None - return urllib.quote('%s'%obj.canonical_name) + '-class.html' + return urllib.parse.quote('%s'%obj.canonical_name) + '-class.html' # Variable elif isinstance(obj, VariableDoc): val_doc = obj.value @@ -3197,7 +3197,7 @@ def _url(self, obj): else: container_url = self.url(obj.container) if container_url is None: return None - return '%s#%s' % (container_url, urllib.quote('%s'%obj.name)) + return '%s#%s' % (container_url, urllib.parse.quote('%s'%obj.name)) # Value (other than module or class) elif isinstance(obj, ValueDoc): container = self.docindex.container(obj) @@ -3206,7 +3206,7 @@ def _url(self, obj): else: container_url = self.url(container) if container_url is None: return None - anchor = urllib.quote('%s'%obj.canonical_name[-1]) + anchor = urllib.parse.quote('%s'%obj.canonical_name[-1]) return '%s#%s' % (container_url, anchor) # Dotted name: look up the corresponding APIDoc elif isinstance(obj, DottedName): @@ -3221,7 +3221,7 @@ def _url(self, obj): elif obj == 'trees': return self._trees_url else: - raise ValueError, "Don't know what to do with %r" % obj + raise ValueError("Don't know what to do with %r" % obj) def pysrc_link(self, api_doc): if not self._incl_sourcecode: @@ -3242,7 +3242,7 @@ def pysrc_url(self, api_doc): elif isinstance(api_doc, ModuleDoc): if api_doc in self.modules_with_sourcecode: return ('%s-pysrc.html' % - urllib.quote('%s' % api_doc.canonical_name)) + urllib.parse.quote('%s' % api_doc.canonical_name)) else: return None else: @@ -3257,7 +3257,7 @@ def pysrc_url(self, api_doc): return module_pysrc_url mname_len = len(module.canonical_name) anchor = '%s' % api_doc.canonical_name[mname_len:] - return '%s#%s' % (module_pysrc_url, urllib.quote(anchor)) + return '%s#%s' % (module_pysrc_url, urllib.parse.quote(anchor)) # We didn't find it: return None @@ -3463,7 +3463,7 @@ def translate_identifier_xref(self, identifier, label=None): # [xx] Should this be added to the DocstringLinker interface??? # Currently, this is *only* used by dotgraph. def url_for(self, identifier): - if isinstance(identifier, (basestring, DottedName)): + if isinstance(identifier, (str, DottedName)): doc = self.docindex.find(identifier, self.container) if doc: return self.htmlwriter.url(doc) diff --git a/Legacy/epydoc-3.0.1/epydoc/docwriter/html_colorize.py b/Legacy/epydoc-3.0.1/epydoc/docwriter/html_colorize.py index 38d0758c5..0ba7a1c12 100644 --- a/Legacy/epydoc-3.0.1/epydoc/docwriter/html_colorize.py +++ b/Legacy/epydoc-3.0.1/epydoc/docwriter/html_colorize.py @@ -18,8 +18,8 @@ from epydoc.util import py_src_filename from epydoc.apidoc import * import tokenize, token, cgi, keyword -try: from cStringIO import StringIO -except: from StringIO import StringIO +try: from io import StringIO +except: from io import StringIO ###################################################################### ## Python source colorizer @@ -460,7 +460,7 @@ def colorize(self): self.find_line_offsets() num_lines = self.text.count('\n')+1 - self.linenum_size = len(`num_lines+1`) + self.linenum_size = len(repr(num_lines+1)) # Call the tokenizer, and send tokens to our `tokeneater()` # method. If anything goes wrong, then fall-back to using @@ -472,7 +472,7 @@ def colorize(self): html = output.getvalue() if self.has_decorators: html = self._FIX_DECORATOR_RE.sub(r'\2\1', html) - except tokenize.TokenError, ex: + except tokenize.TokenError as ex: html = self.text # Check for a unicode encoding declaration. @@ -494,16 +494,17 @@ def colorize(self): return html - def tokeneater(self, toktype, toktext, (srow,scol), (erow,ecol), line): + def tokeneater(self, toktype, toktext, xxx_todo_changeme, xxx_todo_changeme1, line): """ A callback function used by C{tokenize.tokenize} to handle each token in the module. C{tokeneater} collects tokens into the C{self.cur_line} list until a complete logical line has been formed; and then calls L{handle_line} to process that line. """ - # If we encounter any errors, then just give up. + (srow,scol) = xxx_todo_changeme + (erow,ecol) = xxx_todo_changeme1 if toktype == token.ERRORTOKEN: - raise tokenize.TokenError, toktype + raise tokenize.TokenError(toktype) # Did we skip anything whitespace? If so, add a pseudotoken # for it, with toktype=None. (Note -- this skipped string @@ -568,7 +569,7 @@ def handle_line(self, line): # Loop through each token, and colorize it appropriately. for i, (toktype, toktext) in enumerate(line): if type(s) is not str: - if type(s) is unicode: + if type(s) is str: log.error('While colorizing %s -- got unexpected ' 'unicode string' % self.module_name) s = s.encode('ascii', 'xmlcharrefreplace') @@ -729,7 +730,7 @@ def handle_line(self, line): (uid, css_class_html, targets_html, tooltip_html, css_class_html, onclick)) elif url: - if isinstance(url, unicode): + if isinstance(url, str): url = url.encode('ascii', 'xmlcharrefreplace') s += ('' % (tooltip_html, css_class_html, url)) @@ -741,8 +742,8 @@ def handle_line(self, line): else: try: s += self.add_line_numbers(cgi.escape(toktext), css_class) - except Exception, e: - print (toktext, css_class, toktext.encode('ascii')) + except Exception as e: + print((toktext, css_class, toktext.encode('ascii'))) raise if onclick: s += "" diff --git a/Legacy/epydoc-3.0.1/epydoc/docwriter/latex.py b/Legacy/epydoc-3.0.1/epydoc/docwriter/latex.py index 18a88cf45..8eddb3fc4 100644 --- a/Legacy/epydoc-3.0.1/epydoc/docwriter/latex.py +++ b/Legacy/epydoc-3.0.1/epydoc/docwriter/latex.py @@ -217,7 +217,7 @@ def _write(self, write_func, directory, filename, *args): else: result = [] write_func(result.append, *args) - s = u''.join(result) + s = ''.join(result) try: s = s.encode(self._encoding) except UnicodeError: @@ -846,7 +846,7 @@ def func_arg(self, name, default): return s def _arg_name(self, arg): - if isinstance(arg, basestring): + if isinstance(arg, str): return arg elif len(arg) == 1: return '(%s,)' % self._arg_name(arg[0]) @@ -1040,7 +1040,7 @@ def write_standard_fields(self, out, doc): for field in fields: if field.takes_arg: - for arg, descrs in field_values[field].items(): + for arg, descrs in list(field_values[field].items()): self.write_standard_field(out, doc, field, descrs, arg) else: diff --git a/Legacy/epydoc-3.0.1/epydoc/docwriter/plaintext.py b/Legacy/epydoc-3.0.1/epydoc/docwriter/plaintext.py index 00baa70ae..4c6fbef28 100644 --- a/Legacy/epydoc-3.0.1/epydoc/docwriter/plaintext.py +++ b/Legacy/epydoc-3.0.1/epydoc/docwriter/plaintext.py @@ -30,9 +30,9 @@ def write(self, api_doc, **options): self.write_function(out, api_doc) else: assert 0, ('%s not handled yet' % api_doc.__class__) - except Exception, e: - print '\n\n' - print ''.join(result) + except Exception as e: + print('\n\n') + print(''.join(result)) raise return ''.join(result) diff --git a/Legacy/epydoc-3.0.1/epydoc/docwriter/xlink.py b/Legacy/epydoc-3.0.1/epydoc/docwriter/xlink.py index e15bb7f86..49b33f6ce 100644 --- a/Legacy/epydoc-3.0.1/epydoc/docwriter/xlink.py +++ b/Legacy/epydoc-3.0.1/epydoc/docwriter/xlink.py @@ -247,7 +247,7 @@ def load_index(self, f): """ self._filename = str(f) - if isinstance(f, basestring): + if isinstance(f, str): f = open(f) self.load_records(self._iter_tuples(f)) @@ -391,7 +391,7 @@ def resolve_api_name(n, rawtext, text, lineno, inliner, # Get the resolver from the register and create an url from it. try: url = api_register[name].get_url(text) - except IndexError, exc: + except IndexError as exc: msg = inliner.reporter.warning(str(exc), line=lineno) if problematic: prb = inliner.problematic(rawtext, text, msg) @@ -498,8 +498,8 @@ def read_configuration(self, settings, problematic=True): for name, root in map(split_name, settings.external_api_root): set_api_root(name, root) - except OptionValueError, exc: - print >>sys.stderr, "%s: %s" % (exc.__class__.__name__, exc) + except OptionValueError as exc: + print("%s: %s" % (exc.__class__.__name__, exc), file=sys.stderr) sys.exit(2) read_configuration = classmethod(read_configuration) diff --git a/Legacy/epydoc-3.0.1/epydoc/gui.py b/Legacy/epydoc-3.0.1/epydoc/gui.py index dbd388a7a..460d245ab 100644 --- a/Legacy/epydoc-3.0.1/epydoc/gui.py +++ b/Legacy/epydoc-3.0.1/epydoc/gui.py @@ -31,14 +31,14 @@ __docformat__ = 'epytext en' import sys, os.path, re, glob -from Tkinter import * -from tkFileDialog import askopenfilename, asksaveasfilename -from thread import start_new_thread, exit_thread +from tkinter import * +from tkinter.filedialog import askopenfilename, asksaveasfilename +from _thread import start_new_thread, exit_thread from pickle import dump, load # askdirectory is only defined in python 2.2+; fall back on # asksaveasfilename if it's not available. -try: from tkFileDialog import askdirectory +try: from tkinter.filedialog import askdirectory except: askdirectory = None # Include support for Zope, if it's available. @@ -238,7 +238,7 @@ def document(options, cancel, done): log.error('Cancelled!') done[0] ='cancel' raise - except Exception, e: + except Exception as e: # We failed. log.error('Internal error: %s' % e) done[0] ='cancel' @@ -270,7 +270,7 @@ def __init__(self): # *not* reload the modules that are present when the EpydocGUI # is created, but that should only contain some builtins, some # epydoc modules, Tkinter, pickle, and thread.. - self._old_modules = sys.modules.keys() + self._old_modules = list(sys.modules.keys()) # Create the main window. self._root = Tk() @@ -662,7 +662,7 @@ def _init_options(self, optsframe, ctrlframe): self._help_browse.grid(row=row, column=3, sticky='ew', padx=2) from epydoc.docwriter.html_css import STYLESHEETS - items = STYLESHEETS.items() + items = list(STYLESHEETS.items()) def _css_sort(css1, css2): if css1[0] == 'default': return -1 elif css2[0] == 'default': return 1 @@ -805,7 +805,7 @@ def destroy(self, *e): if self._root is None: return # Unload any modules that we've imported - for m in sys.modules.keys(): + for m in list(sys.modules.keys()): if m not in self._old_modules: del sys.modules[m] self._root.destroy() self._root = None @@ -825,7 +825,7 @@ def add_module(self, name, check=0): get_value_from_scriptname(name) else: get_value_from_name(name) - except ImportError, e: + except ImportError as e: log.error(e) self._update_messages() self._root.bell() @@ -891,7 +891,7 @@ def _go(self, *e): # Restore the module list. This will force re-loading of # anything that we're documenting. - for m in sys.modules.keys(): + for m in list(sys.modules.keys()): if m not in self._old_modules: del sys.modules[m] @@ -1044,7 +1044,7 @@ def open(self, prjfile): self._imports_var.set(opts.get('show_imports', 0)) self._css_entry.delete(0, 'end') - if opts.get('css', 'default') in STYLESHEETS.keys(): + if opts.get('css', 'default') in list(STYLESHEETS.keys()): self._css_var.set(opts.get('css', 'default')) else: self._css_var.set('-other-') @@ -1056,7 +1056,7 @@ def open(self, prjfile): # self._private_css_var.set('-other-') # self._css_entry.insert(0, opts.get('private_css', 'default')) - except Exception, e: + except Exception as e: log.error('Error opening %s: %s' % (prjfile, e)) self._root.bell() @@ -1065,7 +1065,7 @@ def _save(self, *e): try: opts = self._getopts() dump(opts, open(self._filename, 'w')) - except Exception, e: + except Exception as e: if self._filename is None: log.error('Error saving: %s' % e) else: @@ -1087,22 +1087,22 @@ def _version(): @rtype: C{None} """ import epydoc - print "Epydoc version %s" % epydoc.__version__ + print("Epydoc version %s" % epydoc.__version__) sys.exit(0) # At some point I could add: # --show-messages, --hide-messages # --show-options, --hide-options def _usage(): - print - print 'Usage: epydocgui [OPTIONS] [FILE.prj | MODULES...]' - print - print ' FILE.prj An epydoc GUI project file.' - print ' MODULES... A list of Python modules to document.' - print ' -V, --version Print the version of epydoc.' - print ' -h, -?, --help, --usage Display this usage message' - print ' --debug Do not suppress error messages' - print + print() + print('Usage: epydocgui [OPTIONS] [FILE.prj | MODULES...]') + print() + print(' FILE.prj An epydoc GUI project file.') + print(' MODULES... A list of Python modules to document.') + print(' -V, --version Print the version of epydoc.') + print(' -h, -?, --help, --usage Display this usage message') + print(' --debug Do not suppress error messages') + print() sys.exit(0) def _error(s): @@ -1110,7 +1110,7 @@ def _error(s): if len(s) > 80: i = s.rfind(' ', 0, 80) if i>0: s = s[:i]+'\n'+s[i+1:] - print >>sys.stderr, s + print(s, file=sys.stderr) sys.exit(1) def gui(): diff --git a/Legacy/epydoc-3.0.1/epydoc/log.py b/Legacy/epydoc-3.0.1/epydoc/log.py index e6fae6839..5591e4e82 100644 --- a/Legacy/epydoc-3.0.1/epydoc/log.py +++ b/Legacy/epydoc-3.0.1/epydoc/log.py @@ -122,7 +122,7 @@ class SimpleLogger(Logger): def __init__(self, threshold=WARNING): self.threshold = threshold def log(self, level, message): - if level >= self.threshold: print message + if level >= self.threshold: print(message) ###################################################################### # Logger Registry diff --git a/Legacy/epydoc-3.0.1/epydoc/markup/__init__.py b/Legacy/epydoc-3.0.1/epydoc/markup/__init__.py index 2b9565cbc..fd026901e 100644 --- a/Legacy/epydoc-3.0.1/epydoc/markup/__init__.py +++ b/Legacy/epydoc-3.0.1/epydoc/markup/__init__.py @@ -161,9 +161,9 @@ def parse(docstring, markup='plaintext', errors=None, **options): parse_docstring = _markup_language_registry[markup] # If it's a string, then it names a function to import. - if isinstance(parse_docstring, basestring): + if isinstance(parse_docstring, str): try: exec('from %s import parse_docstring' % parse_docstring) - except ImportError, e: + except ImportError as e: _parse_warn('Error importing %s for markup language %s: %s' % (parse_docstring, markup, e)) import epydoc.markup.plaintext as plaintext @@ -176,7 +176,7 @@ def parse(docstring, markup='plaintext', errors=None, **options): # Parse the docstring. try: parsed_docstring = parse_docstring(docstring, errors, **options) except KeyboardInterrupt: raise - except Exception, e: + except Exception as e: if epydoc.DEBUG: raise log.error('Internal error while parsing a docstring: %s; ' 'treating docstring as plaintext' % e) @@ -322,7 +322,7 @@ def to_plaintext(self, docstring_linker, **options): @return: A plaintext fragment that encodes this docstring. @rtype: C{string} """ - raise NotImplementedError, 'ParsedDocstring.to_plaintext()' + raise NotImplementedError('ParsedDocstring.to_plaintext()') def index_terms(self): """ @@ -453,7 +453,7 @@ def translate_indexterm(self, indexterm): @rtype: C{string} @return: The translated index term. """ - raise NotImplementedError, 'DocstringLinker.translate_indexterm()' + raise NotImplementedError('DocstringLinker.translate_indexterm()') def translate_identifier_xref(self, identifier, label=None): """ @@ -470,7 +470,7 @@ def translate_identifier_xref(self, identifier, label=None): @rtype: C{string} @return: The translated crossreference link. """ - raise NotImplementedError, 'DocstringLinker.translate_xref()' + raise NotImplementedError('DocstringLinker.translate_xref()') ################################################## ## ParseError exceptions diff --git a/Legacy/epydoc-3.0.1/epydoc/markup/epytext.py b/Legacy/epydoc-3.0.1/epydoc/markup/epytext.py index 058c5fa55..e70a020c2 100644 --- a/Legacy/epydoc-3.0.1/epydoc/markup/epytext.py +++ b/Legacy/epydoc-3.0.1/epydoc/markup/epytext.py @@ -143,13 +143,13 @@ def __str__(self): notation. @bug: Doesn't escape '<' or '&' or '>'. """ - attribs = ''.join([' %s=%r' % t for t in self.attribs.items()]) + attribs = ''.join([' %s=%r' % t for t in list(self.attribs.items())]) return ('<%s%s>' % (self.tag, attribs) + ''.join([str(child) for child in self.children]) + '' % self.tag) def __repr__(self): - attribs = ''.join([', %s=%r' % t for t in self.attribs.items()]) + attribs = ''.join([', %s=%r' % t for t in list(self.attribs.items())]) args = ''.join([', %r' % c for c in self.children]) return 'Element(%s%s%s)' % (self.tag, args, attribs) @@ -1085,7 +1085,7 @@ def _colorize(doc, token, errors, tagName='para'): # Special handling for symbols: if stack[-1].tag == 'symbol': if (len(stack[-1].children) != 1 or - not isinstance(stack[-1].children[0], basestring)): + not isinstance(stack[-1].children[0], str)): estr = "Invalid symbol code." errors.append(ColorizingError(estr, token, end)) else: @@ -1100,7 +1100,7 @@ def _colorize(doc, token, errors, tagName='para'): # Special handling for escape elements: if stack[-1].tag == 'escape': if (len(stack[-1].children) != 1 or - not isinstance(stack[-1].children[0], basestring)): + not isinstance(stack[-1].children[0], str)): estr = "Invalid escape code." errors.append(ColorizingError(estr, token, end)) else: @@ -1157,7 +1157,7 @@ def _colorize_graph(doc, graph, token, end, errors): children = graph.children[:] graph.children = [] - if len(children) != 1 or not isinstance(children[0], basestring): + if len(children) != 1 or not isinstance(children[0], str): bad_graph_spec = "Bad graph specification" else: pieces = children[0].split(None, 1) @@ -1188,7 +1188,7 @@ def _colorize_link(doc, link, token, end, errors): variables = link.children[:] # If the last child isn't text, we know it's bad. - if len(variables)==0 or not isinstance(variables[-1], basestring): + if len(variables)==0 or not isinstance(variables[-1], str): estr = "Bad %s target." % link.tag errors.append(ColorizingError(estr, token, end)) return @@ -1259,7 +1259,7 @@ def to_epytext(tree, indent=0, seclevel=0): @return: The epytext string corresponding to C{tree}. @rtype: C{string} """ - if isinstance(tree, basestring): + if isinstance(tree, str): str = re.sub(r'\{', '\0', tree) str = re.sub(r'\}', '\1', str) return str @@ -1318,7 +1318,7 @@ def to_epytext(tree, indent=0, seclevel=0): elif tree.tag == 'graph': return 'G{%s}' % ' '.join(variables) else: - for (tag, name) in _COLORIZING_TAGS.items(): + for (tag, name) in list(_COLORIZING_TAGS.items()): if name == tree.tag: return '%s{%s}' % (tag, childstr) raise ValueError('Unknown DOM element %r' % tree.tag) @@ -1346,7 +1346,7 @@ def to_plaintext(tree, indent=0, seclevel=0): @return: The epytext string corresponding to C{tree}. @rtype: C{string} """ - if isinstance(tree, basestring): return tree + if isinstance(tree, str): return tree if tree.tag == 'section': seclevel += 1 @@ -1427,7 +1427,7 @@ def to_debug(tree, indent=4, seclevel=0): @return: The epytext string corresponding to C{tree}. @rtype: C{string} """ - if isinstance(tree, basestring): + if isinstance(tree, str): str = re.sub(r'\{', '\0', tree) str = re.sub(r'\}', '\1', str) return str @@ -1460,7 +1460,7 @@ def to_debug(tree, indent=4, seclevel=0): str = re.sub('\0', 'E{lb}', childstr) str = re.sub('\1', 'E{rb}', str) uline = len(childstr)*_HEADING_CHARS[seclevel-1] - return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' + + return ('SEC'+repr(seclevel)+'>|'+(indent-8)*' ' + str + '\n' + ' |'+(indent-8)*' ' + uline + '\n') elif tree.tag == 'doctestblock': str = re.sub('\0', '{', childstr) @@ -1493,7 +1493,7 @@ def to_debug(tree, indent=4, seclevel=0): elif tree.tag == 'graph': return 'G{%s}' % ' '.join(variables) else: - for (tag, name) in _COLORIZING_TAGS.items(): + for (tag, name) in list(_COLORIZING_TAGS.items()): if name == tree.tag: return '%s{%s}' % (tag, childstr) raise ValueError('Unknown DOM element %r' % tree.tag) @@ -1535,19 +1535,19 @@ def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr): warnings.sort() errors.sort() if warnings: - print >>stream, '='*SCRWIDTH - print >>stream, "WARNINGS" - print >>stream, '-'*SCRWIDTH + print('='*SCRWIDTH, file=stream) + print("WARNINGS", file=stream) + print('-'*SCRWIDTH, file=stream) for warning in warnings: - print >>stream, warning.as_warning() - print >>stream, '='*SCRWIDTH + print(warning.as_warning(), file=stream) + print('='*SCRWIDTH, file=stream) if errors and show_errors: - if not warnings: print >>stream, '='*SCRWIDTH - print >>stream, "ERRORS" - print >>stream, '-'*SCRWIDTH + if not warnings: print('='*SCRWIDTH, file=stream) + print("ERRORS", file=stream) + print('-'*SCRWIDTH, file=stream) for error in errors: - print >>stream, error - print >>stream, '='*SCRWIDTH + print(error, file=stream) + print('='*SCRWIDTH, file=stream) if confused: raise elif errors: raise SyntaxError('Encountered Errors') @@ -1808,7 +1808,7 @@ def _index_term_key(self, tree): def _to_html(self, tree, linker, directory, docindex, context, indent=0, seclevel=0): - if isinstance(tree, basestring): + if isinstance(tree, str): return plaintext_to_html(tree) if tree.tag == 'epytext': indent -= 2 @@ -1933,7 +1933,7 @@ def _build_graph(self, graph_type, graph_args, linker, def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0): - if isinstance(tree, basestring): + if isinstance(tree, str): return plaintext_to_latex(tree, breakany=breakany) if tree.tag == 'section': seclevel += 1 @@ -2048,7 +2048,7 @@ def summary(self): para = Element('para', inline=True) doc.children.append(para) for parachild in parachildren: - if isinstance(parachild, basestring): + if isinstance(parachild, str): m = self._SUMMARY_RE.match(parachild) if m: para.children.append(m.group(1)) @@ -2104,7 +2104,7 @@ def index_terms(self): return self._terms def _index_terms(self, tree, terms): - if tree is None or isinstance(tree, basestring): + if tree is None or isinstance(tree, str): return if tree.tag == 'indexed': diff --git a/Legacy/epydoc-3.0.1/epydoc/markup/plaintext.py b/Legacy/epydoc-3.0.1/epydoc/markup/plaintext.py index 9825b3467..07957172e 100644 --- a/Legacy/epydoc-3.0.1/epydoc/markup/plaintext.py +++ b/Legacy/epydoc-3.0.1/epydoc/markup/plaintext.py @@ -28,7 +28,7 @@ def parse_docstring(docstring, errors, **options): class ParsedPlaintextDocstring(ParsedDocstring): def __init__(self, text, **options): self._verbatim = options.get('verbatim', 1) - if text is None: raise ValueError, 'Bad text value (expected a str)' + if text is None: raise ValueError('Bad text value (expected a str)') self._text = text def to_html(self, docstring_linker, **options): diff --git a/Legacy/epydoc-3.0.1/epydoc/markup/pyval_repr.py b/Legacy/epydoc-3.0.1/epydoc/markup/pyval_repr.py index c6d41e03d..c1b95120f 100644 --- a/Legacy/epydoc-3.0.1/epydoc/markup/pyval_repr.py +++ b/Legacy/epydoc-3.0.1/epydoc/markup/pyval_repr.py @@ -127,9 +127,9 @@ def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True): RE_OP_TAG = 're-op' RE_FLAGS_TAG = 're-flags' - ELLIPSIS = Element('code', u'...', style='variable-ellipsis') - LINEWRAP = Element('symbol', u'crarr') - UNKNOWN_REPR = Element('code', u'??', style='variable-unknown') + ELLIPSIS = Element('code', '...', style='variable-ellipsis') + LINEWRAP = Element('symbol', 'crarr') + UNKNOWN_REPR = Element('code', '??', style='variable-unknown') GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE) @@ -180,12 +180,12 @@ def _colorize(self, pyval, state): state.score += 1 if pyval is None or pyval is True or pyval is False: - self._output(unicode(pyval), self.CONST_TAG, state) - elif pyval_type in (int, float, long, types.ComplexType): - self._output(unicode(pyval), self.NUMBER_TAG, state) + self._output(str(pyval), self.CONST_TAG, state) + elif pyval_type in (int, float, int, complex): + self._output(str(pyval), self.NUMBER_TAG, state) elif pyval_type is str: self._colorize_str(pyval, state, '', 'string-escape') - elif pyval_type is unicode: + elif pyval_type is str: if self.ESCAPE_UNICODE: self._colorize_str(pyval, state, 'u', 'unicode-escape') else: @@ -201,15 +201,15 @@ def _colorize(self, pyval, state): self._multiline(self._colorize_iter, self._sort(pyval), state, 'frozenset([', '])') elif pyval_type is dict: - self._multiline(self._colorize_dict, self._sort(pyval.items()), + self._multiline(self._colorize_dict, self._sort(list(pyval.items())), state, '{', '}') elif is_re_pattern(pyval): self._colorize_re(pyval, state) else: try: pyval_repr = repr(pyval) - if not isinstance(pyval_repr, (str, unicode)): - pyval_repr = unicode(pyval_repr) + if not isinstance(pyval_repr, str): + pyval_repr = str(pyval_repr) pyval_repr_ok = True except KeyboardInterrupt: raise @@ -325,7 +325,7 @@ def _colorize_re(self, pyval, state): # Parse the regexp pattern. tree = sre_parse.parse(pat, flags) groups = dict([(num,name) for (name,num) in - tree.pattern.groupdict.items()]) + list(tree.pattern.groupdict.items())]) # Colorize it! self._output("re.compile(r'", None, state) self._colorize_re_flags(tree.pattern.flags, state) @@ -348,7 +348,7 @@ def _colorize_re_tree(self, tree, state, noparen, groups): args = elt[1] if op == sre_constants.LITERAL: - c = unichr(args) + c = chr(args) # Add any appropriate escaping. if c in '.^$\\*+?{}[]|()\'': c = '\\'+c elif c == '\t': c = '\\t' @@ -428,7 +428,7 @@ def _colorize_re_tree(self, tree, state, noparen, groups): self._output('(?P<', self.RE_GROUP_TAG, state) self._output(groups[args[0]], self.RE_REF_TAG, state) self._output('>', self.RE_GROUP_TAG, state) - elif isinstance(args[0], (int, long)): + elif isinstance(args[0], int): # This is cheating: self._output('(', self.RE_GROUP_TAG, state) else: @@ -505,7 +505,7 @@ def _output(self, s, tag, state): raise _Maxlines() if not state.linebreakok: raise _Linebreak() - state.result.append(u'\n') + state.result.append('\n') state.lineno += 1 state.charpos = 0 diff --git a/Legacy/epydoc-3.0.1/epydoc/markup/restructuredtext.py b/Legacy/epydoc-3.0.1/epydoc/markup/restructuredtext.py index b11b15442..e30a2dd14 100644 --- a/Legacy/epydoc-3.0.1/epydoc/markup/restructuredtext.py +++ b/Legacy/epydoc-3.0.1/epydoc/markup/restructuredtext.py @@ -402,12 +402,12 @@ def visit_field(self, node): # Handle special fields: fbody = node[1] if arg is None: - for (list_tag, entry_tag) in CONSOLIDATED_FIELDS.items(): + for (list_tag, entry_tag) in list(CONSOLIDATED_FIELDS.items()): if tagname.lower() == list_tag: try: self.handle_consolidated_field(fbody, entry_tag) return - except ValueError, e: + except ValueError as e: estr = 'Unable to split consolidated field ' estr += '"%s" - %s' % (tagname, e) self._errors.append(ParseError(estr, node.line, @@ -632,7 +632,7 @@ def starttag(self, node, tagname, suffix='\n', **attributes): # iterate through attributes one at a time because some # versions of docutils don't case-normalize attributes. for attr_dict in attr_dicts: - for (key, val) in attr_dict.items(): + for (key, val) in list(attr_dict.items()): # Prefix all CSS classes with "rst-"; and prefix all # names with "rst-" to avoid conflicts. if key.lower() in ('class', 'id', 'name'): diff --git a/Legacy/epydoc-3.0.1/epydoc/test/__init__.py b/Legacy/epydoc-3.0.1/epydoc/test/__init__.py index 5095bc52e..b5099e569 100644 --- a/Legacy/epydoc-3.0.1/epydoc/test/__init__.py +++ b/Legacy/epydoc-3.0.1/epydoc/test/__init__.py @@ -87,8 +87,8 @@ def check_requirements(filename): try: __import__(module) except ImportError: - print ('Skipping %r (required module %r not found)' % - (os.path.split(filename)[-1], module)) + print(('Skipping %r (required module %r not found)' % + (os.path.split(filename)[-1], module))) return False return True diff --git a/Legacy/epydoc-3.0.1/epydoc/test/util.py b/Legacy/epydoc-3.0.1/epydoc/test/util.py index a451103cf..59c79f3fd 100644 --- a/Legacy/epydoc-3.0.1/epydoc/test/util.py +++ b/Legacy/epydoc-3.0.1/epydoc/test/util.py @@ -53,7 +53,7 @@ def runbuilder(s, attribs='', build=None, exclude=''): if build: val_doc = val_doc.variables[build].value # Display it. if isinstance(val_doc, ClassDoc): - for val in val_doc.variables.values(): + for val in list(val_doc.variables.values()): if isinstance(val.value, RoutineDoc): fun_to_plain(val.value) s = val_doc.pp(include=attribs.split(),exclude=exclude.split()) @@ -61,7 +61,7 @@ def runbuilder(s, attribs='', build=None, exclude=''): s = re.sub(r"(", r"\1...>", s) s = re.sub(r"(<\w+ object at )0x\w+>", r"\1...>", s) - print s + print(s) # Clean up. cleanup_tmp_dir(tmp_dir) @@ -89,7 +89,7 @@ def runparser(s, attribs='', show=None, exclude=''): # Display it. s = val_doc.pp(include=attribs.split(), exclude=exclude.split()) s = re.sub(r"filename = .*", "filename = ...", s) - print s + print(s) # Clean up. cleanup_tmp_dir(tmp_dir) @@ -121,7 +121,7 @@ def runintrospecter(s, attribs='', introspect=None, exclude=''): s = re.sub(r"(", r"\1...>", s) s = re.sub(r"(<\w+ object at )0x\w+>", r"\1...>", s) - print s + print(s) # Clean up. cleanup_tmp_dir(tmp_dir) @@ -192,7 +192,7 @@ def to_plain(docstring): def fun_to_plain(val_doc): """Convert parsed docstrings in text from a RoutineDoc""" - for k, v in val_doc.arg_types.items(): + for k, v in list(val_doc.arg_types.items()): val_doc.arg_types[k] = to_plain(v) for i, (k, v) in enumerate(val_doc.arg_descrs): val_doc.arg_descrs[i] = (k, to_plain(v)) @@ -206,7 +206,7 @@ def print_docstring_as_html(self, parsed_docstring, *varargs, **kwargs): s = parsed_docstring.to_html(None).strip() s = s.encode('ascii', 'xmlcharrefreplace') s = remove_surrogates(s) - print s + print(s) return '' def remove_surrogates(s): diff --git a/Legacy/epydoc-3.0.1/epydoc/util.py b/Legacy/epydoc-3.0.1/epydoc/util.py index 85f3102de..f5ead963b 100644 --- a/Legacy/epydoc-3.0.1/epydoc/util.py +++ b/Legacy/epydoc-3.0.1/epydoc/util.py @@ -27,7 +27,7 @@ def is_module_file(path): # Make sure it's a file name. - if not isinstance(path, basestring): + if not isinstance(path, str): return False (dir, filename) = os.path.split(path) (basename, extension) = os.path.splitext(filename) @@ -36,7 +36,7 @@ def is_module_file(path): extension in PY_SRC_EXTENSIONS+PY_BIN_EXTENSIONS) def is_src_filename(filename): - if not isinstance(filename, basestring): return False + if not isinstance(filename, str): return False if not os.path.exists(filename): return False return os.path.splitext(filename)[1] in PY_SRC_EXTENSIONS @@ -47,7 +47,7 @@ def is_package_dir(dirname): and its name is a valid identifier). """ # Make sure it's a directory name. - if not isinstance(dirname, basestring): + if not isinstance(dirname, str): return False if not os.path.isdir(dirname): return False @@ -224,7 +224,7 @@ def run_subprocess(cmd, data=None): @raise OSError: If there is any problem executing the command, or if its exitval is not 0. """ - if isinstance(cmd, basestring): + if isinstance(cmd, str): cmd = cmd.split() # Under Python 2.4+, use subprocess @@ -276,7 +276,7 @@ def run_subprocess(cmd, data=None): try: to_child.write(data) # Guard for a broken pipe error - except IOError, e: + except IOError as e: raise OSError(e) to_child.close() out = from_child.read() diff --git a/Legacy/quicksim/quickSim.py b/Legacy/quicksim/quickSim.py index b289d8387..a552488b6 100644 --- a/Legacy/quicksim/quickSim.py +++ b/Legacy/quicksim/quickSim.py @@ -15,7 +15,7 @@ # Python imports import os -import cPickle +import pickle import sys import datetime as dt @@ -54,8 +54,8 @@ def quickSim( alloc, historic, start_cash ): #check each row in alloc for row in range( 0, len(alloc.values[:, 0]) ): if( abs(alloc.values[row, :].sum() - 1) > .0001 ): - print "warning, alloc row " + str(row) + \ - "does not sum to one, rebalancing" + print("warning, alloc row " + str(row) + \ + "does not sum to one, rebalancing") #if no allocation, all in cash if(alloc.values[row, :].sum()==0): alloc.values[row, -1] = 1 @@ -142,8 +142,8 @@ def shortingQuickSim(alloc, historic, start_cash, leverage): #TODO Find out what to use for fundvall below... this_leverage = _compute_leverage( alloc.values[0, :], start_cash ) if this_leverage > leverage: - print 'Warning, leverage of ', this_leverage, \ - ' reached, exceeds leverage limit of ', leverage, '\n' + print('Warning, leverage of ', this_leverage, \ + ' reached, exceeds leverage limit of ', leverage, '\n') #get closest date(previous date) closest = historic[ historic.index <= alloc.index[i] ] #for loop to calculate fund daily (without rebalancing) @@ -185,7 +185,7 @@ def alloc_backtest(alloc, start): #read in alloc table from command line arguements alloc_input_file = open(alloc, "r") - alloc = cPickle.load(alloc_input_file) + alloc = pickle.load(alloc_input_file) # Get the data from the data store dataobj = da.DataAccess('Norgate') @@ -284,33 +284,33 @@ def run_main(): if(sys.argv[1] == '-a'): funds = alloc_backtest(sys.argv[2], sys.argv[3]) output = open(sys.argv[4], "w") - cPickle.dump(funds, output) + pickle.dump(funds, output) elif(sys.argv[1] == '-s'): - t = map(int, sys.argv[3].split('-')) + t = list(map(int, sys.argv[3].split('-'))) startday = dt.datetime(t[2], t[0], t[1]) - t = map(int, sys.argv[4].split('-')) + t = list(map(int, sys.argv[4].split('-'))) endday = dt.datetime(t[2], t[0], t[1]) fundsmatrix = strat_backtest1(sys.argv[2], startday, endday, 1, 0, int(sys.argv[5])) output = open(sys.argv[6], "w") - cPickle.dump(fundsmatrix, output) + pickle.dump(fundsmatrix, output) elif(sys.argv[1] == '-r'): - t = map(int, sys.argv[3].split('-')) + t = list(map(int, sys.argv[3].split('-'))) startday = dt.datetime(t[2], t[0], t[1]) - t = map(int, sys.argv[4].split('-')) + t = list(map(int, sys.argv[4].split('-'))) endday = dt.datetime(t[2], t[0], t[1]) fundsmatrix = strat_backtest2(sys.argv[2], startday, endday, int(sys.argv[5]), int(sys.argv[6]), int(sys.argv[7])) output = open(sys.argv[8], "w") - cPickle.dump(fundsmatrix, output) + pickle.dump(fundsmatrix, output) else: - print 'invalid command line call' - print 'use python quickSim.py -a alloc_pkl start_value output_pkl' - print 'or python quickSim.py -s strategy start_date end_date' + \ - 'start_value output_pkl' - print 'or python quickSim.py -r strategy start_date end_date' + \ - ' test_offset_in_days duration start_value output_pkl' + print('invalid command line call') + print('use python quickSim.py -a alloc_pkl start_value output_pkl') + print('or python quickSim.py -s strategy start_date end_date' + \ + 'start_value output_pkl') + print('or python quickSim.py -r strategy start_date end_date' + \ + ' test_offset_in_days duration start_value output_pkl') if __name__ == "__main__": run_main() diff --git a/Legacy/quicksim/strategies/MonthlyRebalancing.py b/Legacy/quicksim/strategies/MonthlyRebalancing.py index 8fed4256e..09ebd03b3 100644 --- a/Legacy/quicksim/strategies/MonthlyRebalancing.py +++ b/Legacy/quicksim/strategies/MonthlyRebalancing.py @@ -1,71 +1,71 @@ -''' -(c) 2011, 2012 Georgia Tech Research Corporation -This source code is released under the New BSD license. Please see -http://wiki.quantsoftware.org/index.php?title=QSTK_License -for license details. - -Created on Jan 1, 2011 - -@author:Drew Bratcher -@contact: dbratcher@gatech.edu -@summary: Contains tutorial for backtester and report. - -''' - - -# -# MonthlyRebalancingExample.py -# -# Usage: python MonthlyRebalancingExample.py 1-1-2004' '1-1-2009' 'alloc.pkl' -# -# A strategy script which creates a monthly allocation table using -# start date and end date along with the first 20 symbols of S&P500. -# It then dumps the allocation table to a pickle file. -# -# - -#python imports -import cPickle -from pylab import * -from pandas import * -import matplotlib.pyplot as plt -import datetime as dt -import os - -#qstk imports -import qstkutil.DataAccess as da -import qstkutil.qsdateutil as du - -if __name__ == "__main__": - print "Running Monthly Rebalancing strategy starting "+sys.argv[1]+" and ending "+sys.argv[2]+"." - - #Get first 20 S&P Symbols - symbols = list(np.loadtxt(os.environ['QS']+'/quicksim/strategies/S&P500.csv',dtype='str',delimiter=',',comments='#',skiprows=0)) - symbols = symbols[0:19] - - #Set start and end boundary times - t = map(int,sys.argv[1].split('-')) - startday = dt.datetime(t[2],t[0],t[1]) - t = map(int,sys.argv[2].split('-')) - endday = dt.datetime(t[2],t[0],t[1]) - - #Get desired timestamps - timeofday=dt.timedelta(hours=16) - timestamps = du.getNYSEdays(startday,endday,timeofday) - - # Get the data from the data store - dataobj = da.DataAccess('Norgate') - historic = dataobj.get_data(timestamps, symbols, "close") - - # Setup the allocation table - alloc_vals=.8/(len(historic.values[0,:])-1)*ones((1,len(historic.values[0,:]))) - alloc=DataMatrix(index=[historic.index[0]], data=alloc_vals, columns=symbols) - for date in range(1, len(historic.index)): - if(historic.index[date].day==1): - alloc=alloc.append(DataMatrix(index=[historic.index[date]], data=alloc_vals, columns=symbols)) - alloc[symbols[0]] = .1 - alloc['_CASH'] = .1 - - #Dump to a pkl file - output=open(sys.argv[3],"wb") - cPickle.dump(alloc, output) +''' +(c) 2011, 2012 Georgia Tech Research Corporation +This source code is released under the New BSD license. Please see +http://wiki.quantsoftware.org/index.php?title=QSTK_License +for license details. + +Created on Jan 1, 2011 + +@author:Drew Bratcher +@contact: dbratcher@gatech.edu +@summary: Contains tutorial for backtester and report. + +''' + + +# +# MonthlyRebalancingExample.py +# +# Usage: python MonthlyRebalancingExample.py 1-1-2004' '1-1-2009' 'alloc.pkl' +# +# A strategy script which creates a monthly allocation table using +# start date and end date along with the first 20 symbols of S&P500. +# It then dumps the allocation table to a pickle file. +# +# + +#python imports +import pickle +from pylab import * +from pandas import * +import matplotlib.pyplot as plt +import datetime as dt +import os + +#qstk imports +import qstkutil.DataAccess as da +import qstkutil.qsdateutil as du + +if __name__ == "__main__": + print("Running Monthly Rebalancing strategy starting "+sys.argv[1]+" and ending "+sys.argv[2]+".") + + #Get first 20 S&P Symbols + symbols = list(np.loadtxt(os.environ['QS']+'/quicksim/strategies/S&P500.csv',dtype='str',delimiter=',',comments='#',skiprows=0)) + symbols = symbols[0:19] + + #Set start and end boundary times + t = list(map(int,sys.argv[1].split('-'))) + startday = dt.datetime(t[2],t[0],t[1]) + t = list(map(int,sys.argv[2].split('-'))) + endday = dt.datetime(t[2],t[0],t[1]) + + #Get desired timestamps + timeofday=dt.timedelta(hours=16) + timestamps = du.getNYSEdays(startday,endday,timeofday) + + # Get the data from the data store + dataobj = da.DataAccess('Norgate') + historic = dataobj.get_data(timestamps, symbols, "close") + + # Setup the allocation table + alloc_vals=.8/(len(historic.values[0,:])-1)*ones((1,len(historic.values[0,:]))) + alloc=DataMatrix(index=[historic.index[0]], data=alloc_vals, columns=symbols) + for date in range(1, len(historic.index)): + if(historic.index[date].day==1): + alloc=alloc.append(DataMatrix(index=[historic.index[date]], data=alloc_vals, columns=symbols)) + alloc[symbols[0]] = .1 + alloc['_CASH'] = .1 + + #Dump to a pkl file + output=open(sys.argv[3],"wb") + pickle.dump(alloc, output) diff --git a/Legacy/quicksim/strategies/OneStock.py b/Legacy/quicksim/strategies/OneStock.py index 022fb0dde..302cde1bb 100644 --- a/Legacy/quicksim/strategies/OneStock.py +++ b/Legacy/quicksim/strategies/OneStock.py @@ -1,67 +1,67 @@ -''' -(c) 2011, 2012 Georgia Tech Research Corporation -This source code is released under the New BSD license. Please see -http://wiki.quantsoftware.org/index.php?title=QSTK_License -for license details. - -Created on Jan 1, 2011 - -@author:Drew Bratcher -@contact: dbratcher@gatech.edu -@summary: Contains tutorial for backtester and report. - -''' - - -# OneStock.py -# -# Usage: python OneStock.py '1-1-2004' '1-1-2009' 'alloc.pkl' -# -# A strategy script which creates a daily allocation table using one stock (GOOG) -# and the start and end dates provided by the user. -# It then dumps the allocation table to a pickle file. -# -# - -# python imports -import cPickle -import sys -from pandas import DataMatrix -import datetime as dt -import random - -# qstk imports -import qstkutil.DataAccess as da -import qstkutil.qsdateutil as du - -if __name__ == "__main__": - print "Running One Stock strategy from "+sys.argv[1] +" to "+sys.argv[2] - - # Use google symbol - symbols = list(['SPY']) - - # Set start and end dates - t = map(int,sys.argv[1].split('-')) - startday = dt.datetime(t[2],t[0],t[1]) - t = map(int,sys.argv[2].split('-')) - endday = dt.datetime(t[2],t[0],t[1]) - - # Get desired timestamps - timeofday=dt.timedelta(hours=16) - timestamps = du.getNYSEdays(startday,endday,timeofday) - - # Get the data from the data store - dataobj = da.DataAccess('Norgate') - historic = dataobj.get_data(timestamps, symbols, "close") - - # Setup the allocation table - alloc_val= random.random() - alloc=DataMatrix(index=[historic.index[0]], data=[alloc_val], columns=symbols) - for date in range(1, len(historic.index)): - alloc_val=1 #random.random() - alloc=alloc.append(DataMatrix(index=[historic.index[date]], data=[alloc_val], columns=[symbols[0]])) - alloc['_CASH']=1-alloc[symbols[0]] - - # Dump to pkl file - output=open(sys.argv[3],"wb") - cPickle.dump(alloc, output) +''' +(c) 2011, 2012 Georgia Tech Research Corporation +This source code is released under the New BSD license. Please see +http://wiki.quantsoftware.org/index.php?title=QSTK_License +for license details. + +Created on Jan 1, 2011 + +@author:Drew Bratcher +@contact: dbratcher@gatech.edu +@summary: Contains tutorial for backtester and report. + +''' + + +# OneStock.py +# +# Usage: python OneStock.py '1-1-2004' '1-1-2009' 'alloc.pkl' +# +# A strategy script which creates a daily allocation table using one stock (GOOG) +# and the start and end dates provided by the user. +# It then dumps the allocation table to a pickle file. +# +# + +# python imports +import pickle +import sys +from pandas import DataMatrix +import datetime as dt +import random + +# qstk imports +import qstkutil.DataAccess as da +import qstkutil.qsdateutil as du + +if __name__ == "__main__": + print("Running One Stock strategy from "+sys.argv[1] +" to "+sys.argv[2]) + + # Use google symbol + symbols = list(['SPY']) + + # Set start and end dates + t = list(map(int,sys.argv[1].split('-'))) + startday = dt.datetime(t[2],t[0],t[1]) + t = list(map(int,sys.argv[2].split('-'))) + endday = dt.datetime(t[2],t[0],t[1]) + + # Get desired timestamps + timeofday=dt.timedelta(hours=16) + timestamps = du.getNYSEdays(startday,endday,timeofday) + + # Get the data from the data store + dataobj = da.DataAccess('Norgate') + historic = dataobj.get_data(timestamps, symbols, "close") + + # Setup the allocation table + alloc_val= random.random() + alloc=DataMatrix(index=[historic.index[0]], data=[alloc_val], columns=symbols) + for date in range(1, len(historic.index)): + alloc_val=1 #random.random() + alloc=alloc.append(DataMatrix(index=[historic.index[date]], data=[alloc_val], columns=[symbols[0]])) + alloc['_CASH']=1-alloc[symbols[0]] + + # Dump to pkl file + output=open(sys.argv[3],"wb") + pickle.dump(alloc, output) diff --git a/Legacy/quicksim/strategies/bollinger.py b/Legacy/quicksim/strategies/bollinger.py index ea6a2d713..03693bc89 100644 --- a/Legacy/quicksim/strategies/bollinger.py +++ b/Legacy/quicksim/strategies/bollinger.py @@ -21,7 +21,7 @@ # #python imports -import cPickle +import pickle from pylab import * from pandas import * import matplotlib.pyplot as plt @@ -85,14 +85,14 @@ def create(adjclose, timestamps, lookback, spread, high, low, bet, duration): if __name__ == "__main__": #Usage: python bollinger.py '1-1-2004' '1-1-2009' 'alloc.pkl' - print "Running Bollinger strategy starting "+sys.argv[1]+" and ending "+sys.argv[2]+"." + print("Running Bollinger strategy starting "+sys.argv[1]+" and ending "+sys.argv[2]+".") #Run S&P500 for thresholds 1 and -1 in simple version for lookback of 10 days symbols = list(np.loadtxt(os.environ['QS']+'/quicksim/strategies/S&P500.csv',dtype='str',delimiter=',',comments='#',skiprows=0)) - t=map(int,sys.argv[1].split('-')) + t=list(map(int,sys.argv[1].split('-'))) startday = dt.datetime(t[2],t[0],t[1]) - t=map(int,sys.argv[2].split('-')) + t=list(map(int,sys.argv[2].split('-'))) endday = dt.datetime(t[2],t[0],t[1]) timeofday=dt.timedelta(hours=16) @@ -104,8 +104,8 @@ def create(adjclose, timestamps, lookback, spread, high, low, bet, duration): badsyms=[] if size(intersectsyms)= 1.6.1, - scipy >= 0.9.0, - matplotlib >= 1.1.0, - pandas >= 0.7.3, - - python-dateutil==1.5, + - python-dateutil>=2.0, - cvxopt >= 1.1.3, - scikit-learn >= 0.11 + ### Vagrant Image + Vagrant image with QSTK installed can be found at [https://github.com/Wesseldr/vagrant-QSTK](https://github.com/Wesseldr/vagrant-QSTK) + ##Author - [Prof. Tucker Balch](http://www.cc.gatech.edu/~tucker/) - - [Sourabh Bajaj](http://sb2nov.github.io) + - [Sourabh Bajaj](http://www.sourabhbajaj.com/) - [John Cornwell](https://github.com/JWCornV) ##License diff --git a/QSTK.egg-info/SOURCES.txt b/QSTK.egg-info/SOURCES.txt index de5756650..77aae5c84 100644 --- a/QSTK.egg-info/SOURCES.txt +++ b/QSTK.egg-info/SOURCES.txt @@ -6,13 +6,6 @@ README.md UbuntuInstallation.sh ez_setup.py setup.py -Bin/DataGenerate_SineWave.py -Bin/Data_CSV.py -Bin/__init__.py -Bin/converter.py -Bin/csvformatter.py -Bin/gen_nyse_dates.py -Bin/investors_report.py Examples/Validation.py Examples/Basic/allsyms.csv Examples/Basic/example-data.csv @@ -41,7 +34,6 @@ Examples/KNN/data-ripple-prob.csv Examples/KNN/data1.csv Examples/KNN/data2.csv QSTK/__init__.py -QSTK/__init__.pyc QSTK.egg-info/PKG-INFO QSTK.egg-info/SOURCES.txt QSTK.egg-info/dependency_links.txt diff --git a/QSTK.egg-info/requires.txt b/QSTK.egg-info/requires.txt index b28bb3b5a..a5446fafc 100644 --- a/QSTK.egg-info/requires.txt +++ b/QSTK.egg-info/requires.txt @@ -1,6 +1,7 @@ -numpy >= 1.6.1 -scipy >= 0.9.0 -matplotlib >= 1.1.0 -pandas >= 0.7.3 -python-dateutil == 1.5 -scikit-learn >= 0.11 \ No newline at end of file +numpy>=1.6.1 +scipy>=0.9.0 +matplotlib>=1.1.0 +pandas>=0.7.3 +python-dateutil>=2.0 +scikit-learn>=0.11 +cvxopt>=1.1.3 diff --git a/QSTK.egg-info/top_level.txt b/QSTK.egg-info/top_level.txt index 63a931ca2..adbbd4e15 100644 --- a/QSTK.egg-info/top_level.txt +++ b/QSTK.egg-info/top_level.txt @@ -1,2 +1,2 @@ -bin QSTK +bin diff --git a/QSTK/qstkfeat/__init__.py b/QSTK/qstkfeat/__init__.py index aca4b1e71..b5872d01c 100644 --- a/QSTK/qstkfeat/__init__.py +++ b/QSTK/qstkfeat/__init__.py @@ -1,3 +1,3 @@ -from classes import * -from featutil import * -from features import * +from .classes import * +from .featutil import * +from .features import * diff --git a/QSTK/qstkfeat/featutil.py b/QSTK/qstkfeat/featutil.py index b358246c0..f9c988e7b 100644 --- a/QSTK/qstkfeat/featutil.py +++ b/QSTK/qstkfeat/featutil.py @@ -60,7 +60,7 @@ def getMarketRel( dData, sRel='$SPX' ): #print dfCloseMark #Make all data market relative, except for volume - for sKey in dData.keys(): + for sKey in list(dData.keys()): # Don't calculate market relative volume, but still copy it over if sKey == 'volume': @@ -114,8 +114,8 @@ def applyFeatures( dData, lfcFeatures, ldArgs, sMarketRel=None, sLog=None, bMin= if 'MR' in ldArgs[i]: if ldArgs[i]['MR'] == False: - print 'Warning, setting MR to false will still be Market Relative',\ - 'simply do not include MR key in args' + print('Warning, setting MR to false will still be Market Relative',\ + 'simply do not include MR key in args') if sMarketRel == None: raise AssertionError('Functions require market relative stock but sMarketRel=None') @@ -244,7 +244,7 @@ def stackSyms( ldfFeatures, dtStart=None, dtEnd=None, lsSym=None, sDelNan='ALL', 'FEAT' == sDelNan and not math.isnan( np.sum(naStkData[i,:-1]) ): llValidRows.append(i) elif bShowRemoved: - print 'Removed', sStock, naStkData[i,:] + print('Removed', sStock, naStkData[i,:]) naStkData = naStkData[llValidRows,:] @@ -295,7 +295,7 @@ def normFeatures( naFeatures, fMin, fMax, bAbsolute, bIgnoreLast=True ): fRange = fFeatMax - fFeatMin if fRange == 0: - print 'Warning, bad query data range' + print('Warning, bad query data range') fMult = 1. fShigt = 0. else: @@ -430,7 +430,7 @@ def testFeature( fcFeature, dArgs ): lsKeys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldfData = norObj.get_data( ldtTimestamps, lsSym, lsKeys ) - dData = dict(zip(lsKeys, ldfData)) + dData = dict(list(zip(lsKeys, ldfData))) dfPrice = dData['close'] @@ -439,13 +439,13 @@ def testFeature( fcFeature, dArgs ): ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' dtStart = dt.datetime.now() ldfFeatures = applyFeatures( dData, [fcFeature], [dArgs], sMarketRel='$SPX' ) - print 'Runtime:', dt.datetime.now() - dtStart + print('Runtime:', dt.datetime.now() - dtStart) ''' Use last 3 months of index, to avoid lookback nans ''' dfPrint = ldfFeatures[0]['GOOG'] - print 'GOOG values:', dfPrint.values - print 'GOOG Sum:', dfPrint.ix[dfPrint.notnull()].sum() + print('GOOG values:', dfPrint.values) + print('GOOG Sum:', dfPrint.ix[dfPrint.notnull()].sum()) for sSym in lsSym: plt.subplot( 211 ) @@ -483,7 +483,7 @@ def speedTest(lfcFeature,ldArgs): lsKeys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldtTimestamps = du.getNYSEdays( dtStart, dtEnd, dtTimeofday) ldfData = daData.get_data( ldtTimestamps, lsSym, lsKeys) - dData = dict(zip(lsKeys, ldfData)) + dData = dict(list(zip(lsKeys, ldfData))) '''loop through features''' ltResults = [] @@ -497,7 +497,7 @@ def speedTest(lfcFeature,ldArgs): '''print out result''' for tResult in ltResults: - print tResult[1], ':', tResult[0] + print(tResult[1], ':', tResult[0]) return ltResults diff --git a/QSTK/qstklearn/1knn.py b/QSTK/qstklearn/1knn.py index 8630b3c80..7bf287aa8 100644 --- a/QSTK/qstklearn/1knn.py +++ b/QSTK/qstklearn/1knn.py @@ -1,285 +1,285 @@ -''' -(c) 2011, 2012 Georgia Tech Research Corporation -This source code is released under the New BSD license. Please see -http://wiki.quantsoftware.org/index.php?title=QSTK_License -for license details. - -Created on Feb 20, 2011 -@author: John Cornwell -@organization: Georgia Institute of Technology -@contact: JohnWCornwellV@gmail.com -@summary: This is an implementation of the 1-KNN algorithm for ranking features quickly. - It uses the knn implementation. -@status: oneKNN functions correctly, optimized to use n^2/2 algorithm. -''' - -import matplotlib.pyplot as plt -from pylab import gca - -import itertools -import string -import numpy as np -import math -import knn - -from time import clock - - -''' -@summary: Query function for 1KNN, return value is a double between 0 and 1. - -@param naData: A 2D numpy array. Each row is a data point with the final column containing the classification. -''' -def oneKnn( naData ): - - - if naData.ndim != 2: - raise Exception( "Data should have two dimensions" ) - - lLen = naData.shape[0] - ''' # of dimensions, subtract one for classification ''' - lDim = naData.shape[1] - 1 - - ''' Start best distances as very large ''' - ldDistances = [1E300] * lLen - llIndexes = [-1] * lLen - - dDistance = 0.0; - - ''' Loop through finding closest neighbors ''' - for i in range( lLen ): - for j in range( i+1, lLen ): - - dDistance = 0.0 - for k in range( 0, lDim ): - dDistance += (naData[i][k] - naData[j][k])**2 - dDistance = math.sqrt( dDistance ) - - ''' Two distances to check, for i's best, and j's best ''' - if dDistance < ldDistances[i]: - ldDistances[i] = dDistance - llIndexes[i] = j - - if dDistance < ldDistances[j]: - ldDistances[j] = dDistance - llIndexes[j] = i - - lCount = 0 - ''' Now count # of matching pairs ''' - for i in range( lLen ): - if naData[i][-1] == naData[ llIndexes[i] ][-1]: - lCount = lCount + 1 - - return float(lCount) / lLen - - -''' Test function to plot results ''' -def _plotResults( naDist1, naDist2, lfOneKnn, lf5Knn ): - plt.clf() - - plt.subplot(311) - plt.scatter( naDist1[:,0], naDist1[:,1] ) - - plt.scatter( naDist2[:,0], naDist2[:,1], color='r' ) - - - #plt.ylabel( 'Feature 2' ) - #plt.xlabel( 'Feature 1' ) - #gca().annotate( '', xy=( .8, 0 ), xytext=( -.3 , 0 ), arrowprops=dict(facecolor='red', shrink=0.05) ) - gca().annotate( '', xy=( .7, 0 ), xytext=( 1.5 , 0 ), arrowprops=dict(facecolor='black', shrink=0.05) ) - plt.title( 'Data Distribution' ) - - plt.subplot(312) - - plt.plot( range( len(lfOneKnn) ), lfOneKnn ) - - plt.ylabel( '1-KNN Value' ) - #plt.xlabel( 'Distribution Merge' ) - - plt.title( '1-KNN Performance' ) - - plt.subplot(313) - - plt.plot( range( len(lf5Knn) ), lf5Knn ) - - plt.ylabel( '% Correct Classification' ) - #plt.xlabel( 'Distribution Merge' ) - - plt.title( '5-KNN Performance' ) - - plt.subplots_adjust() - - plt.show() - -''' Function to plot 2 distributions ''' -def _plotDist( naDist1, naDist2, i ): - plt.clf() - - plt.scatter( naDist1[:,0], naDist1[:,1] ) - - plt.scatter( naDist2[:,0], naDist2[:,1], color='r' ) - - - plt.ylabel( 'Feature 2' ) - plt.xlabel( 'Feature 1' ) - - plt.title( 'Iteration ' + str(i) ) - - plt.show() - -''' Function to test KNN performance ''' -def _knnResult( naData ): - - - ''' Split up data into training/testing ''' - lSplit = naData.shape[0] * .7 - naTrain = naData[:lSplit, :] - naTest = naData[lSplit:, :] - - knn.addEvidence( naTrain.astype(float), 1 ); - - ''' Query with last column omitted and 5 nearest neighbors ''' - naResults = knn.query( naTest[:,:-1], 5, 'mode') - - ''' Count returns which are correct ''' - lCount = 0 - for i, dVal in enumerate(naResults): - if dVal == naTest[i,-1]: - lCount = lCount + 1 - - dResult = float(lCount) / naResults.size - - return dResult - -''' Tests performance of 1-KNN ''' -def _test1(): - - ''' Generate three random samples to show the value of 1-KNN compared to 5KNN learner performance ''' - - for i in range(3): - - ''' Select one of three distributions ''' - if i == 0: - naTest1 = np.random.normal( loc=[0,0],scale=.25,size=[500,2] ) - naTest1 = np.hstack( (naTest1, np.zeros(500).reshape(-1,1) ) ) - - naTest2 = np.random.normal( loc=[1.5,0],scale=.25,size=[500,2] ) - naTest2 = np.hstack( (naTest2, np.ones(500).reshape(-1,1) ) ) - elif i == 1: - naTest1 = np.random.normal( loc=[0,0],scale=.25,size=[500,2] ) - naTest1 = np.hstack( (naTest1, np.zeros(500).reshape(-1,1) ) ) - - naTest2 = np.random.normal( loc=[1.5,0],scale=.1,size=[500,2] ) - naTest2 = np.hstack( (naTest2, np.ones(500).reshape(-1,1) ) ) - else: - naTest1 = np.random.normal( loc=[0,0],scale=.25,size=[500,2] ) - naTest1 = np.hstack( (naTest1, np.zeros(500).reshape(-1,1) ) ) - - naTest2 = np.random.normal( loc=[1.5,0],scale=.25,size=[250,2] ) - naTest2 = np.hstack( (naTest2, np.ones(250).reshape(-1,1) ) ) - - naOrig = np.vstack( (naTest1, naTest2) ) - naBoth = np.vstack( (naTest1, naTest2) ) - - ''' Keep track of runtimes ''' - t = clock() - cOneRuntime = t-t; - cKnnRuntime = t-t; - - lfResults = [] - lfKnnResults = [] - for i in range( 15 ): - #_plotDist( naTest1, naBoth[100:,:], i ) - - t = clock() - lfResults.append( oneKnn( naBoth ) ) - cOneRuntime = cOneRuntime + (clock() - t) - - t = clock() - lfKnnResults.append( _knnResult( np.random.permutation(naBoth) ) ) - cKnnRuntime = cKnnRuntime + (clock() - t) - - naBoth[500:,0] = naBoth[500:,0] - .1 - - print 'Runtime OneKnn:', cOneRuntime - print 'Runtime 5-KNN:', cKnnRuntime - _plotResults( naTest1, naTest2, lfResults, lfKnnResults ) - -''' Tests performance of 1-KNN ''' -def _test2(): - ''' Generate three random samples to show the value of 1-KNN compared to 5KNN learner performance ''' - - np.random.seed( 12345 ) - - ''' Create 5 distributions for each of the 5 attributes ''' - dist1 = np.random.uniform( -1, 1, 1000 ).reshape( -1, 1 ) - dist2 = np.random.uniform( -1, 1, 1000 ).reshape( -1, 1 ) - dist3 = np.random.uniform( -1, 1, 1000 ).reshape( -1, 1 ) - dist4 = np.random.uniform( -1, 1, 1000 ).reshape( -1, 1 ) - dist5 = np.random.uniform( -1, 1, 1000 ).reshape( -1, 1 ) - - lDists = [ dist1, dist2, dist3, dist4, dist5 ] - - ''' All features used except for distribution 4 ''' - distY = np.sin( dist1 ) + np.sin( dist2 ) + np.sin( dist3 ) + np.sin( dist5 ) - distY = distY.reshape( -1, 1 ) - - for i, fVal in enumerate( distY ): - if fVal >= 0: - distY[i] = 1 - else: - distY[i] = 0 - - for i in range( 1, 6 ): - - lsNames = [] - lf1Vals = [] - lfVals = [] - - for perm in itertools.combinations( '12345', i ): - - ''' set test distribution to first element ''' - naTest = lDists[ int(perm[0]) - 1 ] - sPerm = perm[0] - - ''' stack other distributions on ''' - for j in range( 1, len(perm) ): - sPerm = sPerm + str(perm[j]) - naTest = np.hstack( (naTest, lDists[ int(perm[j]) - 1 ] ) ) - - ''' finally stack y values ''' - naTest = np.hstack( (naTest, distY) ) - - lf1Vals.append( oneKnn( naTest ) ) - lfVals.append( _knnResult( np.random.permutation(naTest) ) ) - lsNames.append( sPerm ) - - ''' Plot results ''' - plt1 = plt.bar( np.arange(len(lf1Vals)), lf1Vals, .2, color='r' ) - plt2 = plt.bar( np.arange(len(lfVals)) + 0.2, lfVals, .2, color='b' ) - - plt.legend( (plt1[0], plt2[0]), ('1-KNN', 'KNN, K=5') ) - - plt.ylabel('1-KNN Value/KNN Classification') - plt.xlabel('Feature Set') - plt.title('Combinations of ' + str(i) + ' Features') - - plt.ylim( (0,1) ) - if len(lf1Vals) < 2: - plt.xlim( (-1,1) ) - - gca().xaxis.set_ticks( np.arange(len(lf1Vals)) + .2 ) - gca().xaxis.set_ticklabels( lsNames ) - - plt.show() - - - -if __name__ == '__main__': - - _test1() - #_test2() - - - - +''' +(c) 2011, 2012 Georgia Tech Research Corporation +This source code is released under the New BSD license. Please see +http://wiki.quantsoftware.org/index.php?title=QSTK_License +for license details. + +Created on Feb 20, 2011 +@author: John Cornwell +@organization: Georgia Institute of Technology +@contact: JohnWCornwellV@gmail.com +@summary: This is an implementation of the 1-KNN algorithm for ranking features quickly. + It uses the knn implementation. +@status: oneKNN functions correctly, optimized to use n^2/2 algorithm. +''' + +import matplotlib.pyplot as plt +from pylab import gca + +import itertools +import string +import numpy as np +import math +import knn + +from time import clock + + +''' +@summary: Query function for 1KNN, return value is a double between 0 and 1. + +@param naData: A 2D numpy array. Each row is a data point with the final column containing the classification. +''' +def oneKnn( naData ): + + + if naData.ndim != 2: + raise Exception( "Data should have two dimensions" ) + + lLen = naData.shape[0] + ''' # of dimensions, subtract one for classification ''' + lDim = naData.shape[1] - 1 + + ''' Start best distances as very large ''' + ldDistances = [1E300] * lLen + llIndexes = [-1] * lLen + + dDistance = 0.0; + + ''' Loop through finding closest neighbors ''' + for i in range( lLen ): + for j in range( i+1, lLen ): + + dDistance = 0.0 + for k in range( 0, lDim ): + dDistance += (naData[i][k] - naData[j][k])**2 + dDistance = math.sqrt( dDistance ) + + ''' Two distances to check, for i's best, and j's best ''' + if dDistance < ldDistances[i]: + ldDistances[i] = dDistance + llIndexes[i] = j + + if dDistance < ldDistances[j]: + ldDistances[j] = dDistance + llIndexes[j] = i + + lCount = 0 + ''' Now count # of matching pairs ''' + for i in range( lLen ): + if naData[i][-1] == naData[ llIndexes[i] ][-1]: + lCount = lCount + 1 + + return float(lCount) / lLen + + +''' Test function to plot results ''' +def _plotResults( naDist1, naDist2, lfOneKnn, lf5Knn ): + plt.clf() + + plt.subplot(311) + plt.scatter( naDist1[:,0], naDist1[:,1] ) + + plt.scatter( naDist2[:,0], naDist2[:,1], color='r' ) + + + #plt.ylabel( 'Feature 2' ) + #plt.xlabel( 'Feature 1' ) + #gca().annotate( '', xy=( .8, 0 ), xytext=( -.3 , 0 ), arrowprops=dict(facecolor='red', shrink=0.05) ) + gca().annotate( '', xy=( .7, 0 ), xytext=( 1.5 , 0 ), arrowprops=dict(facecolor='black', shrink=0.05) ) + plt.title( 'Data Distribution' ) + + plt.subplot(312) + + plt.plot( list(range( len(lfOneKnn))), lfOneKnn ) + + plt.ylabel( '1-KNN Value' ) + #plt.xlabel( 'Distribution Merge' ) + + plt.title( '1-KNN Performance' ) + + plt.subplot(313) + + plt.plot( list(range( len(lf5Knn))), lf5Knn ) + + plt.ylabel( '% Correct Classification' ) + #plt.xlabel( 'Distribution Merge' ) + + plt.title( '5-KNN Performance' ) + + plt.subplots_adjust() + + plt.show() + +''' Function to plot 2 distributions ''' +def _plotDist( naDist1, naDist2, i ): + plt.clf() + + plt.scatter( naDist1[:,0], naDist1[:,1] ) + + plt.scatter( naDist2[:,0], naDist2[:,1], color='r' ) + + + plt.ylabel( 'Feature 2' ) + plt.xlabel( 'Feature 1' ) + + plt.title( 'Iteration ' + str(i) ) + + plt.show() + +''' Function to test KNN performance ''' +def _knnResult( naData ): + + + ''' Split up data into training/testing ''' + lSplit = naData.shape[0] * .7 + naTrain = naData[:lSplit, :] + naTest = naData[lSplit:, :] + + knn.addEvidence( naTrain.astype(float), 1 ); + + ''' Query with last column omitted and 5 nearest neighbors ''' + naResults = knn.query( naTest[:,:-1], 5, 'mode') + + ''' Count returns which are correct ''' + lCount = 0 + for i, dVal in enumerate(naResults): + if dVal == naTest[i,-1]: + lCount = lCount + 1 + + dResult = float(lCount) / naResults.size + + return dResult + +''' Tests performance of 1-KNN ''' +def _test1(): + + ''' Generate three random samples to show the value of 1-KNN compared to 5KNN learner performance ''' + + for i in range(3): + + ''' Select one of three distributions ''' + if i == 0: + naTest1 = np.random.normal( loc=[0,0],scale=.25,size=[500,2] ) + naTest1 = np.hstack( (naTest1, np.zeros(500).reshape(-1,1) ) ) + + naTest2 = np.random.normal( loc=[1.5,0],scale=.25,size=[500,2] ) + naTest2 = np.hstack( (naTest2, np.ones(500).reshape(-1,1) ) ) + elif i == 1: + naTest1 = np.random.normal( loc=[0,0],scale=.25,size=[500,2] ) + naTest1 = np.hstack( (naTest1, np.zeros(500).reshape(-1,1) ) ) + + naTest2 = np.random.normal( loc=[1.5,0],scale=.1,size=[500,2] ) + naTest2 = np.hstack( (naTest2, np.ones(500).reshape(-1,1) ) ) + else: + naTest1 = np.random.normal( loc=[0,0],scale=.25,size=[500,2] ) + naTest1 = np.hstack( (naTest1, np.zeros(500).reshape(-1,1) ) ) + + naTest2 = np.random.normal( loc=[1.5,0],scale=.25,size=[250,2] ) + naTest2 = np.hstack( (naTest2, np.ones(250).reshape(-1,1) ) ) + + naOrig = np.vstack( (naTest1, naTest2) ) + naBoth = np.vstack( (naTest1, naTest2) ) + + ''' Keep track of runtimes ''' + t = clock() + cOneRuntime = t-t; + cKnnRuntime = t-t; + + lfResults = [] + lfKnnResults = [] + for i in range( 15 ): + #_plotDist( naTest1, naBoth[100:,:], i ) + + t = clock() + lfResults.append( oneKnn( naBoth ) ) + cOneRuntime = cOneRuntime + (clock() - t) + + t = clock() + lfKnnResults.append( _knnResult( np.random.permutation(naBoth) ) ) + cKnnRuntime = cKnnRuntime + (clock() - t) + + naBoth[500:,0] = naBoth[500:,0] - .1 + + print('Runtime OneKnn:', cOneRuntime) + print('Runtime 5-KNN:', cKnnRuntime) + _plotResults( naTest1, naTest2, lfResults, lfKnnResults ) + +''' Tests performance of 1-KNN ''' +def _test2(): + ''' Generate three random samples to show the value of 1-KNN compared to 5KNN learner performance ''' + + np.random.seed( 12345 ) + + ''' Create 5 distributions for each of the 5 attributes ''' + dist1 = np.random.uniform( -1, 1, 1000 ).reshape( -1, 1 ) + dist2 = np.random.uniform( -1, 1, 1000 ).reshape( -1, 1 ) + dist3 = np.random.uniform( -1, 1, 1000 ).reshape( -1, 1 ) + dist4 = np.random.uniform( -1, 1, 1000 ).reshape( -1, 1 ) + dist5 = np.random.uniform( -1, 1, 1000 ).reshape( -1, 1 ) + + lDists = [ dist1, dist2, dist3, dist4, dist5 ] + + ''' All features used except for distribution 4 ''' + distY = np.sin( dist1 ) + np.sin( dist2 ) + np.sin( dist3 ) + np.sin( dist5 ) + distY = distY.reshape( -1, 1 ) + + for i, fVal in enumerate( distY ): + if fVal >= 0: + distY[i] = 1 + else: + distY[i] = 0 + + for i in range( 1, 6 ): + + lsNames = [] + lf1Vals = [] + lfVals = [] + + for perm in itertools.combinations( '12345', i ): + + ''' set test distribution to first element ''' + naTest = lDists[ int(perm[0]) - 1 ] + sPerm = perm[0] + + ''' stack other distributions on ''' + for j in range( 1, len(perm) ): + sPerm = sPerm + str(perm[j]) + naTest = np.hstack( (naTest, lDists[ int(perm[j]) - 1 ] ) ) + + ''' finally stack y values ''' + naTest = np.hstack( (naTest, distY) ) + + lf1Vals.append( oneKnn( naTest ) ) + lfVals.append( _knnResult( np.random.permutation(naTest) ) ) + lsNames.append( sPerm ) + + ''' Plot results ''' + plt1 = plt.bar( np.arange(len(lf1Vals)), lf1Vals, .2, color='r' ) + plt2 = plt.bar( np.arange(len(lfVals)) + 0.2, lfVals, .2, color='b' ) + + plt.legend( (plt1[0], plt2[0]), ('1-KNN', 'KNN, K=5') ) + + plt.ylabel('1-KNN Value/KNN Classification') + plt.xlabel('Feature Set') + plt.title('Combinations of ' + str(i) + ' Features') + + plt.ylim( (0,1) ) + if len(lf1Vals) < 2: + plt.xlim( (-1,1) ) + + gca().xaxis.set_ticks( np.arange(len(lf1Vals)) + .2 ) + gca().xaxis.set_ticklabels( lsNames ) + + plt.show() + + + +if __name__ == '__main__': + + _test1() + #_test2() + + + + diff --git a/QSTK/qstklearn/fastknn.py b/QSTK/qstklearn/fastknn.py index ca03465eb..747b43def 100644 --- a/QSTK/qstklearn/fastknn.py +++ b/QSTK/qstklearn/fastknn.py @@ -64,7 +64,7 @@ def resetAnchors(self,selection_type='random'): picks the first 'num_anchors' as new anchors. """ if selection_type == 'random': - self.anchors = range(len(self.training_data)) + self.anchors = list(range(len(self.training_data))) random.shuffle(self.anchors) self.anchors = self.anchors[0:self.num_anchors] self.kdt = scipy.spatial.kdtree.KDTree(numpy.array(self.training_data)[self.anchors,:]) @@ -92,7 +92,7 @@ def addEvidence(self,data,label): dist = self.distfun(data,self.training_data[a]) bisect.insort(self.data_by_anchors[a],(dist,new_idx)) elif len(data.shape)>1: - for i in xrange(len(data)): + for i in range(len(data)): thing = data[i] new_idx = len(self.training_data) self.training_data.append(thing) @@ -138,7 +138,7 @@ def query(self,point,k=None,method='mode',slow=False,dumdumcheck=False): neighbors = list() maxd = None maxd_idx = 0 - for i in xrange(0,len(anchor_list)): + for i in range(0,len(anchor_list)): nextpnt_dist = self.distfun(point,self.training_data[anchor_list[i][1]]) self.num_checks += 1 nextthing = (nextpnt_dist,anchor_list[i][1]) @@ -177,10 +177,10 @@ def query(self,point,k=None,method='mode',slow=False,dumdumcheck=False): else: tmp = 0 class_count[clss] = tmp+1 - bleh = max(class_count.iteritems(),key=lambda item:item[1]) + bleh = max(iter(class_count.items()),key=lambda item:item[1]) if dumdumcheck and bleh[1] == 1: - print "aHAH!" - print point + print("aHAH!") + print(point) rv = bleh[0] elif method == 'mean': return sum([self.data_classes[n[1]] for n in neighbors])/float(k) @@ -195,7 +195,7 @@ def dataifywine(fname): #first line is the name of the attributes, strip it off bar = bar[1:] #trim, split, and cast the data. seperator is ';' - return [map(float,thing.strip().split(';')) for thing in bar] + return [list(map(float,thing.strip().split(';'))) for thing in bar] def testwine(): wqred = dataifywine('wine/winequality-red.csv') + dataifywine('wine/winequality-white.csv') @@ -203,14 +203,14 @@ def testwine(): leftout = int(len(wqred)*leftoutperc) testing = wqred[:leftout] training = wqred[leftout:] - print "Training:",len(training) - print "Testing:",len(testing) + print("Training:",len(training)) + print("Testing:",len(testing)) foo = FastKNN(10) foo.addEvidence(numpy.array([thing[:-1] for thing in training]), [thing[-1] for thing in training]) knn.addEvidence(numpy.array(training)) total = 0 correct = 0 - for x in xrange(len(testing)): + for x in range(len(testing)): thing = testing[x] guess = foo.query(numpy.array(thing[:-1]),3) #realknn = knn.query(numpy.array([thing[:-1],]),3,method='mean') @@ -221,15 +221,15 @@ def testwine(): correct += 1 total += 1 if total % 50 == 0: - print total,'/',len(testing) - print correct,"/",total,":",float(correct)/float(total) - print "Average checks per query:", float(foo.num_checks)/float(total) + print(total,'/',len(testing)) + print(correct,"/",total,":",float(correct)/float(total)) + print("Average checks per query:", float(foo.num_checks)/float(total)) def testspiral(): - for leftout in xrange(1,11): - print "Fold",leftout + for leftout in range(1,11): + print("Fold",leftout) foo = FastKNN(10) - for x in xrange(1,11): + for x in range(1,11): if x != leftout: somedata = open("spiral/spiralfold%d.txt" % x) pnts = list() @@ -256,13 +256,13 @@ def testspiral(): if guess == pbbbt: correct += 1 total += 1 - print correct,"/",total,":",float(correct)/float(total) - print "Average number of checks per query:", - print float(foo.num_checks)/float(total) + print(correct,"/",total,":",float(correct)/float(total)) + print("Average number of checks per query:", end=' ') + print(float(foo.num_checks)/float(total)) def getflatcsv(fname): inf = open(fname) - return numpy.array([map(float,s.strip().split(',')) for s in inf.readlines()]) + return numpy.array([list(map(float,s.strip().split(','))) for s in inf.readlines()]) def testgendata(): anchors = 200 @@ -276,13 +276,13 @@ def testgendata(): data = getflatcsv(fname) foo.addEvidence(data[:,:-1],data[:,-1]) foo.num_checks = 0 - for x in xrange(querys): + for x in range(querys): pnt = numpy.array(gendata.gensingle(d,bnds,clsses)) foo.query(pnt[:-1]) if x % 50 == 0: - print float(foo.num_checks)/float(x+1), - print x,"/",querys - print "Average # queries:", float(foo.num_checks)/float(querys) + print(float(foo.num_checks)/float(x+1), end=' ') + print(x,"/",querys) + print("Average # queries:", float(foo.num_checks)/float(querys)) diff --git a/QSTK/qstklearn/gendata.py b/QSTK/qstklearn/gendata.py index bc70bd9f1..4ead8d22a 100644 --- a/QSTK/qstklearn/gendata.py +++ b/QSTK/qstklearn/gendata.py @@ -1,16 +1,16 @@ import random def gendata(N,d,bounds,clsses,fname): outf = open(fname,'w') - for i in xrange(N): + for i in range(N): pnt = [None,]*(d+1) - for x in xrange(d): + for x in range(d): pnt[x] = random.uniform(bounds[x][0],bounds[x][1]) pnt[d] = random.choice(clsses) outf.write(", ".join(map(str,pnt))+"\n") outf.close() def gensingle(d,bounds,clsses): pnt = [None,]*(d+1) - for x in xrange(d): + for x in range(d): pnt[x] = random.uniform(bounds[x][0],bounds[x][1]) pnt[d] = random.choice(clsses) return pnt diff --git a/QSTK/qstklearn/hmm.py b/QSTK/qstklearn/hmm.py index 99ac8dd10..dee0d58c1 100644 --- a/QSTK/qstklearn/hmm.py +++ b/QSTK/qstklearn/hmm.py @@ -26,11 +26,11 @@ def calcalpha(stateprior,transition,emission,observations,numstates,elem_size=nu first t symbols. """ alpha = numpy.zeros((len(observations),numstates),dtype=elem_size) - for x in xrange(numstates): + for x in range(numstates): alpha[0][x] = stateprior[x]*emission[x][observations[0]] - for t in xrange(1,len(observations)): - for j in xrange(numstates): - for i in xrange(numstates): + for t in range(1,len(observations)): + for j in range(numstates): + for i in range(numstates): alpha[t][j] += alpha[t-1][i]*transition[i][j] alpha[t][j] *= emission[j][observations[t]] return alpha @@ -51,11 +51,11 @@ def calcbeta(transition,emission,observations,numstates,elem_size=numpy.longdoub symbols from t+1 to the end (T). """ beta = numpy.zeros((len(observations),numstates),dtype=elem_size) - for s in xrange(numstates): + for s in range(numstates): beta[len(observations)-1][s] = 1. - for t in xrange(len(observations)-2,-1,-1): - for i in xrange(numstates): - for j in xrange(numstates): + for t in range(len(observations)-2,-1,-1): + for i in range(numstates): + for j in range(numstates): beta[t][i] += transition[i][j]*emission[j][observations[t+1]]*beta[t+1][j] return beta @@ -72,18 +72,18 @@ def calcxi(stateprior,transition,emission,observations,numstates,alpha=None,beta if beta is None: beta = calcbeta(transition,emission,observations,numstates,elem_size) xi = numpy.zeros((len(observations),numstates,numstates),dtype=elem_size) - for t in xrange(len(observations)-1): + for t in range(len(observations)-1): denom = 0.0 - for i in xrange(numstates): - for j in xrange(numstates): + for i in range(numstates): + for j in range(numstates): thing = 1.0 thing *= alpha[t][i] thing *= transition[i][j] thing *= emission[j][observations[t+1]] thing *= beta[t+1][j] denom += thing - for i in xrange(numstates): - for j in xrange(numstates): + for i in range(numstates): + for j in range(numstates): numer = 1.0 numer *= alpha[t][i] numer *= transition[i][j] @@ -100,8 +100,8 @@ def calcgamma(xi,seqlen,numstates, elem_size=numpy.longdouble): in state 'i' at time 't' given the full observation sequence. """ gamma = numpy.zeros((seqlen,numstates),dtype=elem_size) - for t in xrange(seqlen): - for i in xrange(numstates): + for t in range(seqlen): + for i in range(numstates): gamma[t][i] = sum(xi[t][i]) return gamma @@ -114,20 +114,20 @@ def baumwelchstep(stateprior,transition,emission,observations,numstates,numsym,e gamma = calcgamma(xi,len(observations),numstates,elem_size) newprior = gamma[0] newtrans = numpy.zeros((numstates,numstates),dtype=elem_size) - for i in xrange(numstates): - for j in xrange(numstates): + for i in range(numstates): + for j in range(numstates): numer = 0.0 denom = 0.0 - for t in xrange(len(observations)-1): + for t in range(len(observations)-1): numer += xi[t][i][j] denom += gamma[t][i] newtrans[i][j] = numer/denom newemiss = numpy.zeros( (numstates,numsym) ,dtype=elem_size) - for j in xrange(numstates): - for k in xrange(numsym): + for j in range(numstates): + for k in range(numsym): numer = 0.0 denom = 0.0 - for t in xrange(len(observations)): + for t in range(len(observations)): if observations[t] == k: numer += gamma[t][j] denom += gamma[t][j] @@ -219,7 +219,7 @@ def addEvidence(self, newData, iterations=1,epsilon=0.0): processed. """ if len(newData.shape) == 1: - for i in xrange(iterations): + for i in range(iterations): newp,newt,newe = baumwelchstep( self.prior, \ self.transition_matrix, \ self.emission_matrix, \ @@ -236,7 +236,7 @@ def addEvidence(self, newData, iterations=1,epsilon=0.0): self.transition_matrix = newt self.emission_matrix = newe else: - for i in xrange(iterations): + for i in range(iterations): for sequence in newData: newp,newt,newe = baumwelchstep( self.prior, \ self.transition_matrix, \ diff --git a/QSTK/qstklearn/kdtknn.py b/QSTK/qstklearn/kdtknn.py index 7bc130015..083f6cf80 100644 --- a/QSTK/qstklearn/kdtknn.py +++ b/QSTK/qstklearn/kdtknn.py @@ -96,15 +96,15 @@ def query(self,points,k=None,method=None): na_dist, na_neighbors = self.kdt.query(points,k) - n_clsses = map(lambda rslt: map(lambda p: p[-1], self.data[rslt]), na_neighbors) + n_clsses = [[p[-1] for p in self.data[rslt]] for rslt in na_neighbors] #print n_clsses if method=='mode': return map(lambda x: scipy.stats.stats.mode(x)[0],n_clsses)[0] elif method=='mean': - return numpy.array(map(lambda x: numpy.mean(x),n_clsses)) + return numpy.array([numpy.mean(x) for x in n_clsses]) elif method=='median': - return numpy.array(map(lambda x: numpy.median(x),n_clsses)) + return numpy.array([numpy.median(x) for x in n_clsses]) elif method=='raw': return numpy.array(n_clsses) elif method=='all': @@ -112,7 +112,7 @@ def query(self,points,k=None,method=None): def getflatcsv(fname): inf = open(fname) - return numpy.array([map(float,s.strip().split(',')) for s in inf.readlines()]) + return numpy.array([list(map(float,s.strip().split(','))) for s in inf.readlines()]) def testgendata(): fname = 'test2.dat' @@ -126,12 +126,12 @@ def testgendata(): kdt.addEvidence(data) kdt.rebuildKDT() stime = time.time() - for x in xrange(querys): + for x in range(querys): pnt = numpy.array(gendata.gensingle(d,bnds,clsses)) reslt = kdt.query(numpy.array([pnt[:-1]])) - print pnt,"->",reslt + print(pnt,"->",reslt) etime = time.time() - print etime-stime,'/',querys,'=',(etime-stime)/float(querys),'avg wallclock time per query' + print(etime-stime,'/',querys,'=',(etime-stime)/float(querys),'avg wallclock time per query') #foo.addEvidence(data[:,:-1],data[:,-1]) #foo.num_checks = 0 #for x in xrange(querys): diff --git a/QSTK/qstklearn/mldiagnostics.py b/QSTK/qstklearn/mldiagnostics.py index 287665e27..532a20215 100644 --- a/QSTK/qstklearn/mldiagnostics.py +++ b/QSTK/qstklearn/mldiagnostics.py @@ -50,7 +50,7 @@ def plotCurves(self,filename): savefig(filename,format='pdf') def runDiagnostics(self,filename): - for i,lambdaVal in zip(range(len(self.lambdaArray)),self.lambdaArray): + for i,lambdaVal in zip(list(range(len(self.lambdaArray))),self.lambdaArray): learner = copy.copy(self.learner())# is deep copy required # setLambda needs to be a supported function for all ML strategies. learner.setLambda(lambdaVal) diff --git a/QSTK/qstklearn/parallelknn.py b/QSTK/qstklearn/parallelknn.py index 6bf8795e0..670d8947a 100644 --- a/QSTK/qstklearn/parallelknn.py +++ b/QSTK/qstklearn/parallelknn.py @@ -1,181 +1,181 @@ -''' -(c) 2011, 2012 Georgia Tech Research Corporation -This source code is released under the New BSD license. Please see -http://wiki.quantsoftware.org/index.php?title=QSTK_License -for license details. - -Created on Feb 1, 2011 -@author: Shreyas Joshi -@organization: Georgia Institute of Technology -@contact: shreyasj@gatech.edu -@summary: This is an implementation of the K nearest neighbor learning algorithm. The implementation is trivial in that the near neighbors are - calculated naively- without any smart tricks. Euclidean distance is used to calculate the distance between two points. The implementation - also provides some coarse parallelism. If the par_query function is used then the query points are split up equally amongst threads and their - near neighbors are calculated in parallel. If the number of threads to use is not specified then no of threads = no of cores as returned by - the cpu_count function. This may not be ideal. -@status: complete. "mode" untested -''' - -import numpy as np -import scipy as sc -import math -import sys -import time -from multiprocessing import Pool -from multiprocessing import cpu_count -data = np.zeros (0)#this is the global data -import scipy.stats - -def par_query (allQueries, k, method='mean', noOfThreads=None): - ''' - @summary: Finds the k- nearest nrighbors in parallel. Based on function "query" - @param allQueries: is another 2D numpy array. Each row here is one query point. It has no 'y' values. These have to be calculated. - @param k: no. of neighbors to consider - @param method: method of combining the 'y' values of the nearest neighbors. Default is mean. - @param noOfThreads: optional parameter that specifies how many threads to create. Default value: no. of threads = value returned by cpu_count - @return: A numpy array with the predicted 'y' values for the query points. The ith element in the array is the 'y' value for the ith query point. - ''' - - #Here we basically start 'noOfThreads' threads. Each thread calculates the neighbors for (noOfQueryPoints / noOfThreads) query points. - - if (noOfThreads == None): - noOfThreads = cpu_count() - #if ends - - #print "No of threads: " + str (noOfThreads) - pool = Pool (processes=noOfThreads) - - resultList = [] - query_per_thread = allQueries.shape[0] / noOfThreads - - #time_start = time.time(); - for thread_ctr in range (0, noOfThreads - 1): - resultList.append(pool.apply_async(query, (allQueries[math.floor(query_per_thread * thread_ctr): (math.floor(query_per_thread * (thread_ctr + 1 ))),:], k,))) - #NOTE: we may need a -1 in above. Possible bug - #for ends - #the "remaining" query points go to the last thread - resultList.append (pool.apply_async(query , (allQueries[(math.floor(query_per_thread* (noOfThreads - 1))):, :] ,k ,))) - - pool.close() - pool.join() - - #time_finish = time.time() - #print "Time taken (secs): " + str (time_finish - time_start) - - answer = resultList[0].get() - - for thread_ctr in range (1, noOfThreads): - answer = np.hstack((answer, resultList[thread_ctr].get())) - #for ends - - #print "par_query done" - return answer - #par_query ends - - -def query(allQueries, k, method='mean'): - ''' - @summary: A serial implementation of k-nearest neighbors. - @param allQueries: is another 2D numpy array. Each row here is one query point. It has no 'y' values. These have to be calculated. - @param k: no. of neighbors to consider - @param method: method of combining the 'y' values of the nearest neighbors. Default is mean. - @return: A numpy array with the predicted 'y' values for the query points. The ith element in the array is the 'y' value for the ith query point. If there is more than one mode then only the first mode is returned. - ''' - - limit = allQueries.shape [0] - data_limit = data.shape[0] - omitLastCol = data.shape [1] - 1; #It must have two columns at least. Possibly add a check for this? - answer = np.zeros (limit) #initialize the answer array to all zeros - temp1 = np.zeros ((data.shape[0], (data.shape[1] -1))) - temp2= np.zeros (data.shape[0]) - - if (allQueries.shape[1] != (data.shape[1] -1) ): - print "ERROR: Data and query points are not of the same dimension" - raise ValueError - #if ends - if (k < 1): - print "ERROR: K should be >= 1" - raise ValueError - #if ends - if (k > data.shape[0]): - print "ERROR: K is greater than the total number of data points." - raise ValueError - #if ends - - for ctr in range (0, limit): #for every query point... - #if (ctr % 10 == 0): - # print ctr - #if ends - - #for i in range (0 , data_limit): #for every data point - temp1[0:data_limit, :] = data [0:data_limit ,0:omitLastCol] - allQueries[ctr, :] - #for loop done - - temp1 = temp1*temp1; #square each element in temp1 - for ctr2 in range (0, data_limit): - temp2[ctr2] = math.sqrt(sum (temp1[ctr2,:])) - #for ends - - index = temp2.argsort () #This is actually overkill because we need to sort only the top 'k' terms- but this sorts all of them - #following loop for debugging only - #for j in range (0, k): - # print str(data [index[j], :]) - #for ends - - if (method == 'mean'): - #Now we need to find the average of the k top most 'y' values - answer[ctr] = sum (data[ index [0:k], -1]) / k #0 to (k-1)th index will be k values. But for this we have to give [0:k] because it stops one short of the last index - #if method == mean ends - if (method == 'median'): - answer [ctr] = np.median (data[index [0:k], -1]) - # if median ends - if (method == 'mode'): - answer [ctr] = sc.stats.mode (data[index[0:k],-1])[0][0]; #The first mode. If there is more than one mode then the only the first mode is returned - #endif mode - #for ctr in range (0, limit) ends - return answer - #getAnswer ends - - -def addEvidence (newData): - ''' - @summary: This is the funtion to be called to add data. This function can be called multiple times- to add data whenever you like. - @note: Any dimensional data can be added the first time. After that- the data must have the same number of columns as the data that was added the first time. - @param newData: A 2D numpy array. Each row is a data point and each column is a dimension. The last dimension corresponds to 'y' values. - ''' - - global data - if (data.shape[0] == 0): - data = newData - else: - try: - data= np.vstack ((data, newData)) - except Exception as ex: - print "Type of exception: "+ str(type (ex)) - print "args: " + str(ex.args) - #except ends - #addEvidence ends - - -def main(args): - ''' - @summary: This function is just for testing. Will not be used as such... - ''' - - #Below code just for testing - a = np.loadtxt ("/nethome/sjoshi42/knn_naive/data/3_D_1000_diskQueryPoints.txt") - b= np.loadtxt ("/nethome/sjoshi42/knn_naive/data/3_D_128000_diskDataPoints.txt") - addEvidence(b) - - answer = par_query(a, 5 ,'mode') - #answer = query(a, 5, 'mean') - - for i in range (0, answer.shape[0]): - print answer[i] - #end for - - - print "The answer is: " - -if __name__ == '__main__': - main (sys.argv) +''' +(c) 2011, 2012 Georgia Tech Research Corporation +This source code is released under the New BSD license. Please see +http://wiki.quantsoftware.org/index.php?title=QSTK_License +for license details. + +Created on Feb 1, 2011 +@author: Shreyas Joshi +@organization: Georgia Institute of Technology +@contact: shreyasj@gatech.edu +@summary: This is an implementation of the K nearest neighbor learning algorithm. The implementation is trivial in that the near neighbors are + calculated naively- without any smart tricks. Euclidean distance is used to calculate the distance between two points. The implementation + also provides some coarse parallelism. If the par_query function is used then the query points are split up equally amongst threads and their + near neighbors are calculated in parallel. If the number of threads to use is not specified then no of threads = no of cores as returned by + the cpu_count function. This may not be ideal. +@status: complete. "mode" untested +''' + +import numpy as np +import scipy as sc +import math +import sys +import time +from multiprocessing import Pool +from multiprocessing import cpu_count +data = np.zeros (0)#this is the global data +import scipy.stats + +def par_query (allQueries, k, method='mean', noOfThreads=None): + ''' + @summary: Finds the k- nearest nrighbors in parallel. Based on function "query" + @param allQueries: is another 2D numpy array. Each row here is one query point. It has no 'y' values. These have to be calculated. + @param k: no. of neighbors to consider + @param method: method of combining the 'y' values of the nearest neighbors. Default is mean. + @param noOfThreads: optional parameter that specifies how many threads to create. Default value: no. of threads = value returned by cpu_count + @return: A numpy array with the predicted 'y' values for the query points. The ith element in the array is the 'y' value for the ith query point. + ''' + + #Here we basically start 'noOfThreads' threads. Each thread calculates the neighbors for (noOfQueryPoints / noOfThreads) query points. + + if (noOfThreads == None): + noOfThreads = cpu_count() + #if ends + + #print "No of threads: " + str (noOfThreads) + pool = Pool (processes=noOfThreads) + + resultList = [] + query_per_thread = allQueries.shape[0] / noOfThreads + + #time_start = time.time(); + for thread_ctr in range (0, noOfThreads - 1): + resultList.append(pool.apply_async(query, (allQueries[math.floor(query_per_thread * thread_ctr): (math.floor(query_per_thread * (thread_ctr + 1 ))),:], k,))) + #NOTE: we may need a -1 in above. Possible bug + #for ends + #the "remaining" query points go to the last thread + resultList.append (pool.apply_async(query , (allQueries[(math.floor(query_per_thread* (noOfThreads - 1))):, :] ,k ,))) + + pool.close() + pool.join() + + #time_finish = time.time() + #print "Time taken (secs): " + str (time_finish - time_start) + + answer = resultList[0].get() + + for thread_ctr in range (1, noOfThreads): + answer = np.hstack((answer, resultList[thread_ctr].get())) + #for ends + + #print "par_query done" + return answer + #par_query ends + + +def query(allQueries, k, method='mean'): + ''' + @summary: A serial implementation of k-nearest neighbors. + @param allQueries: is another 2D numpy array. Each row here is one query point. It has no 'y' values. These have to be calculated. + @param k: no. of neighbors to consider + @param method: method of combining the 'y' values of the nearest neighbors. Default is mean. + @return: A numpy array with the predicted 'y' values for the query points. The ith element in the array is the 'y' value for the ith query point. If there is more than one mode then only the first mode is returned. + ''' + + limit = allQueries.shape [0] + data_limit = data.shape[0] + omitLastCol = data.shape [1] - 1; #It must have two columns at least. Possibly add a check for this? + answer = np.zeros (limit) #initialize the answer array to all zeros + temp1 = np.zeros ((data.shape[0], (data.shape[1] -1))) + temp2= np.zeros (data.shape[0]) + + if (allQueries.shape[1] != (data.shape[1] -1) ): + print("ERROR: Data and query points are not of the same dimension") + raise ValueError + #if ends + if (k < 1): + print("ERROR: K should be >= 1") + raise ValueError + #if ends + if (k > data.shape[0]): + print("ERROR: K is greater than the total number of data points.") + raise ValueError + #if ends + + for ctr in range (0, limit): #for every query point... + #if (ctr % 10 == 0): + # print ctr + #if ends + + #for i in range (0 , data_limit): #for every data point + temp1[0:data_limit, :] = data [0:data_limit ,0:omitLastCol] - allQueries[ctr, :] + #for loop done + + temp1 = temp1*temp1; #square each element in temp1 + for ctr2 in range (0, data_limit): + temp2[ctr2] = math.sqrt(sum (temp1[ctr2,:])) + #for ends + + index = temp2.argsort () #This is actually overkill because we need to sort only the top 'k' terms- but this sorts all of them + #following loop for debugging only + #for j in range (0, k): + # print str(data [index[j], :]) + #for ends + + if (method == 'mean'): + #Now we need to find the average of the k top most 'y' values + answer[ctr] = sum (data[ index [0:k], -1]) / k #0 to (k-1)th index will be k values. But for this we have to give [0:k] because it stops one short of the last index + #if method == mean ends + if (method == 'median'): + answer [ctr] = np.median (data[index [0:k], -1]) + # if median ends + if (method == 'mode'): + answer [ctr] = sc.stats.mode (data[index[0:k],-1])[0][0]; #The first mode. If there is more than one mode then the only the first mode is returned + #endif mode + #for ctr in range (0, limit) ends + return answer + #getAnswer ends + + +def addEvidence (newData): + ''' + @summary: This is the funtion to be called to add data. This function can be called multiple times- to add data whenever you like. + @note: Any dimensional data can be added the first time. After that- the data must have the same number of columns as the data that was added the first time. + @param newData: A 2D numpy array. Each row is a data point and each column is a dimension. The last dimension corresponds to 'y' values. + ''' + + global data + if (data.shape[0] == 0): + data = newData + else: + try: + data= np.vstack ((data, newData)) + except Exception as ex: + print("Type of exception: "+ str(type (ex))) + print("args: " + str(ex.args)) + #except ends + #addEvidence ends + + +def main(args): + ''' + @summary: This function is just for testing. Will not be used as such... + ''' + + #Below code just for testing + a = np.loadtxt ("/nethome/sjoshi42/knn_naive/data/3_D_1000_diskQueryPoints.txt") + b= np.loadtxt ("/nethome/sjoshi42/knn_naive/data/3_D_128000_diskDataPoints.txt") + addEvidence(b) + + answer = par_query(a, 5 ,'mode') + #answer = query(a, 5, 'mean') + + for i in range (0, answer.shape[0]): + print(answer[i]) + #end for + + + print("The answer is: ") + +if __name__ == '__main__': + main (sys.argv) diff --git a/QSTK/qstksim/__init__.py b/QSTK/qstksim/__init__.py index a24938453..4f14a4239 100644 --- a/QSTK/qstksim/__init__.py +++ b/QSTK/qstksim/__init__.py @@ -11,4 +11,4 @@ @summary: Init for simulator code ''' -from tradesim import _calculate_leverage, tradesim, tradesim_comb +from .tradesim import _calculate_leverage, tradesim, tradesim_comb diff --git a/QSTK/qstksim/tests/test_tradesim.py b/QSTK/qstksim/tests/test_tradesim.py index 8f95eec0f..c3a6da010 100644 --- a/QSTK/qstksim/tests/test_tradesim.py +++ b/QSTK/qstksim/tests/test_tradesim.py @@ -81,11 +81,11 @@ def test_buy_close(self): qstksim.tradesim( self.df_alloc, self.df_close, 10000, 1, True, 0.02, 5, 0.02 ) - print 'Commision Costs : ' + str(f_commision) - print 'Slippage : ' + str(f_slippage) - print 'Short Borrowing Cost : ' + str(f_borrow) - print 'Leverage : ' - print ts_leverage + print('Commision Costs : ' + str(f_commision)) + print('Slippage : ' + str(f_slippage)) + print('Short Borrowing Cost : ' + str(f_borrow)) + print('Leverage : ') + print(ts_leverage) np.testing.assert_approx_equal(df_funds[-1], \ 10000 * self.i_open_result, significant = 3) self.assertTrue(True) diff --git a/QSTK/qstksim/tests/test_tradesim_SPY.py b/QSTK/qstksim/tests/test_tradesim_SPY.py index cf54ef2a8..92e1ad7a7 100644 --- a/QSTK/qstksim/tests/test_tradesim_SPY.py +++ b/QSTK/qstksim/tests/test_tradesim_SPY.py @@ -71,11 +71,11 @@ def test_buy_close(self): qstksim.tradesim( self.df_alloc, self.df_close, 10000, 1, True, 0.02, 5, 0.02) - print 'Commision Costs : ' + str(f_commision) - print 'Slippage : ' + str(f_slippage) - print 'Short Borrowing Cost : ' + str(f_borrow) - print 'Leverage : ' - print ts_leverage + print('Commision Costs : ' + str(f_commision)) + print('Slippage : ' + str(f_slippage)) + print('Short Borrowing Cost : ' + str(f_borrow)) + print('Leverage : ') + print(ts_leverage) np.testing.assert_approx_equal(df_funds[-1], \ 10000 * self.i_open_result, significant = 3) self.assertTrue(True) diff --git a/QSTK/qstksim/tests/test_tradesim_SPY_Short.py b/QSTK/qstksim/tests/test_tradesim_SPY_Short.py index 87f7fccde..12b52465b 100644 --- a/QSTK/qstksim/tests/test_tradesim_SPY_Short.py +++ b/QSTK/qstksim/tests/test_tradesim_SPY_Short.py @@ -71,11 +71,11 @@ def test_buy_close(self): qstksim.tradesim( self.df_alloc, self.df_close, 10000, 1, True, 0.02, 5, 0.02) - print 'Commision Costs : ' + str(f_commision) - print 'Slippage : ' + str(f_slippage) - print 'Short Borrowing Cost : ' + str(f_borrow) - print 'Leverage : ' - print ts_leverage + print('Commision Costs : ' + str(f_commision)) + print('Slippage : ' + str(f_slippage)) + print('Short Borrowing Cost : ' + str(f_borrow)) + print('Leverage : ') + print(ts_leverage) np.testing.assert_approx_equal(df_funds[-1], \ 10000 * self.i_open_result, significant = 3) self.assertTrue(True) diff --git a/QSTK/qstksim/tradesim.py b/QSTK/qstksim/tradesim.py index 9a2ad8989..1556a5c4f 100644 --- a/QSTK/qstksim/tradesim.py +++ b/QSTK/qstksim/tradesim.py @@ -153,20 +153,20 @@ def tradesim( alloc, df_historic, f_start_cash, i_leastcount=1, @param f_commision_share: Commision per share @param b_followleastcount: False will allow fractional shares @param log: CSV file to log transactions to - @return funds: TimeSeries with fund values for each day in the back test - @return leverage: TimeSeries with Leverage values for each day in the back test + @return funds: Series with fund values for each day in the back test + @return leverage: Series with Leverage values for each day in the back test @return Commision costs : Total commision costs in the whole backtester @return Slippage costs : Total slippage costs in the whole backtester - @rtype TimeSeries + @rtype Series """ if alloc.index[-1] > df_historic.index[-1]: - print "Historical Data not sufficient" + print("Historical Data not sufficient") indices, = np.where(alloc.index <= df_historic.index[-1]) alloc = alloc.reindex(index = alloc.index[indices]) if alloc.index[0] < df_historic.index[0]: - print "Historical Data not sufficient" + print("Historical Data not sufficient") indices, = np.where(alloc.index >= df_historic.index[0]) alloc = alloc.reindex(index = alloc.index[indices]) @@ -176,7 +176,7 @@ def tradesim( alloc, df_historic, f_start_cash, i_leastcount=1, #write column headings if log!="false": - print "writing transaction log to "+log + print("writing transaction log to "+log) log_file.write("Symbol,Company Name,Txn Type,Txn Date/Time, Gross Leverage, Net Leverage,# Shares,Price,Txn Value,Portfolio # Shares,Portfolio Value,Commission,Slippage(10BPS),Comments\n") #a dollar is always worth a dollar @@ -451,11 +451,11 @@ def tradesim_comb( df_alloc, d_data, f_start_cash, i_leastcount=1, @param f_minimumcommision: Minimum commision cost per transaction @param f_commision_share: Commision per share @param b_followleastcount: False will allow fractional shares - @return funds: TimeSeries with fund values for each day in the back test - @return leverage: TimeSeries with Leverage values for each day in the back test + @return funds: Series with fund values for each day in the back test + @return leverage: Series with Leverage values for each day in the back test @return Commision costs : Total commision costs in the whole backtester @return Slippage costs : Total slippage costs in the whole backtester - @rtype TimeSeries + @rtype Series """ df_close = d_data['close'] @@ -474,4 +474,4 @@ def tradesim_comb( df_alloc, d_data, f_start_cash, i_leastcount=1, f_commision_share, i_target_leverage, f_rate_borrow, log, b_exposure) if __name__ == '__main__': - print "Done" + print("Done") diff --git a/QSTK/qstkstrat/strategies.py b/QSTK/qstkstrat/strategies.py index 12f7896f4..c7aaaca57 100644 --- a/QSTK/qstkstrat/strategies.py +++ b/QSTK/qstkstrat/strategies.py @@ -32,12 +32,12 @@ def stratGiven( dtStart, dtEnd, dFuncArgs ): @param dFuncArgs: Dict of function args passed to the function @return DataFrame corresponding to the portfolio allocations """ - if not dFuncArgs.has_key('dmPrice'): - print 'Error: Strategy requires dmPrice information' + if 'dmPrice' not in dFuncArgs: + print('Error: Strategy requires dmPrice information') return - if not dFuncArgs.has_key('lfWeights'): - print 'Error: Strategy requires weight information' + if 'lfWeights' not in dFuncArgs: + print('Error: Strategy requires weight information') return dmPrice = dFuncArgs['dmPrice'] @@ -60,8 +60,8 @@ def strat1OverN( dtStart, dtEnd, dFuncArgs ): @param dFuncArgs: Dict of function args passed to the function @return DataFrame corresponding to the portfolio allocations """ - if not dFuncArgs.has_key('dmPrice'): - print 'Error: Strategy requires dmPrice information' + if 'dmPrice' not in dFuncArgs: + print('Error: Strategy requires dmPrice information') return dmPrice = dFuncArgs['dmPrice'] @@ -85,24 +85,24 @@ def stratMark( dtStart, dtEnd, dFuncArgs ): @param dFuncArgs: Dict of function args passed to the function @return DataFrame corresponding to the portfolio allocations """ - if not dFuncArgs.has_key('dmPrice'): - print 'Error:', stratMark.__name__, 'requires dmPrice information' + if 'dmPrice' not in dFuncArgs: + print('Error:', stratMark.__name__, 'requires dmPrice information') return - if not dFuncArgs.has_key('sPeriod'): - print 'Error:', stratMark.__name__, 'requires rebalancing period' + if 'sPeriod' not in dFuncArgs: + print('Error:', stratMark.__name__, 'requires rebalancing period') return - if not dFuncArgs.has_key('lLookback'): - print 'Error:', stratMark.__name__, 'requires lookback' + if 'lLookback' not in dFuncArgs: + print('Error:', stratMark.__name__, 'requires lookback') return - if not dFuncArgs.has_key('sMarkPoint'): - print 'Error:', stratMark.__name__, 'requires markowitz point to choose' + if 'sMarkPoint' not in dFuncArgs: + print('Error:', stratMark.__name__, 'requires markowitz point to choose') return ''' Optional variables ''' - if not dFuncArgs.has_key('bAddAlpha'): + if 'bAddAlpha' not in dFuncArgs: bAddAlpha = False else: bAddAlpha = dFuncArgs['bAddAlpha'] @@ -121,7 +121,7 @@ def stratMark( dtStart, dtEnd, dFuncArgs ): dtStart = dtDate - pand.DateOffset(days=lLookback) if( dtStart < dmPrice.index[0] ): - print 'Error, not enough data to rebalance' + print('Error, not enough data to rebalance') continue naRets = dmPrice.ix[ dtStart:dtDate ].values.copy() @@ -184,7 +184,7 @@ def stratMark( dtStart, dtEnd, dFuncArgs ): lInd = 0 else: - print 'Warning: invalid sMarkPoint''' + print('Warning: invalid sMarkPoint''') return diff --git a/QSTK/qstkstudy/EventProfiler.py b/QSTK/qstkstudy/EventProfiler.py index 0b103e0f5..d9126766e 100644 --- a/QSTK/qstkstudy/EventProfiler.py +++ b/QSTK/qstkstudy/EventProfiler.py @@ -32,7 +32,7 @@ def eventprofiler(df_events_arg, d_data, i_lookback=20, i_lookforward=20, tsu.returnize0(df_rets.values) if b_market_neutral == True: - df_rets = df_rets - df_rets[s_market_sym] + df_rets = df_rets.subtract(df_rets[s_market_sym], axis='index') del df_rets[s_market_sym] del df_events[s_market_sym] @@ -67,7 +67,7 @@ def eventprofiler(df_events_arg, d_data, i_lookback=20, i_lookforward=20, # Study Params na_mean = np.mean(na_event_rets, axis=0) na_std = np.std(na_event_rets, axis=0) - li_time = range(-i_lookback, i_lookforward + 1) + li_time = list(range(-i_lookback, i_lookforward + 1)) # Plotting the chart plt.clf() diff --git a/QSTK/qstkstudy/Events.py b/QSTK/qstkstudy/Events.py index 6058f95f3..693fdd119 100644 --- a/QSTK/qstkstudy/Events.py +++ b/QSTK/qstkstudy/Events.py @@ -43,10 +43,10 @@ def find_events(symbols, d_data, verbose=False): window = 10 if verbose: - print __name__ + " reading data" + print(__name__ + " reading data") close = d_data[closefield] if verbose: - print __name__ + " finding events" + print(__name__ + " finding events") for symbol in symbols: close[symbol][close[symbol]>= 1.0] = np.NAN for i in range(1,len(close[symbol])): diff --git a/QSTK/qstkstudy/study.py b/QSTK/qstkstudy/study.py index 5b252604f..dbca32362 100644 --- a/QSTK/qstkstudy/study.py +++ b/QSTK/qstkstudy/study.py @@ -16,7 +16,7 @@ dataobj = da.DataAccess('Yahoo') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) - d_data = dict(zip(ls_keys, ldf_data)) + d_data = dict(list(zip(ls_keys, ldf_data))) eventMatrix = ev.find_events(ls_symbols,d_data,verbose=True) ep.eventprofiler(eventMatrix, d_data, diff --git a/QSTK/qstktest/testDataAccess.py b/QSTK/qstktest/testDataAccess.py index b7daa0660..dffa3b5df 100644 --- a/QSTK/qstktest/testDataAccess.py +++ b/QSTK/qstktest/testDataAccess.py @@ -1,106 +1,106 @@ -''' -Created on Jun 1, 2010 - -@author: Shreyas Joshi -@summary: Just a quick way to test the DataAccess class... nothing more "I dare do all that may become a DataAccessTester. Who dares do more is none" -''' - -#Due to the momentary lack of a HDF viewer that installs/works without hassle- I decided to write a little something to check if the alpha -#values were being written properly - -#Main begins -#from DataAccess import * -#import DataAccessNew as da -import QSTK.qstkutil.DataAccess as da -import tables as pt -import numpy as np -from itertools import izip -import time -import dircache - -def getStocks(listOfPaths): - - listOfStocks=list() - #Path does not exist - print "Reading in all stock names..." - fileExtensionToRemove=".h5" - - for path in listOfPaths: - stocksAtThisPath=list () - - stocksAtThisPath= dircache.listdir(str(path)) - #Next, throw away everything that is not a .h5 And these are our stocks! - stocksAtThisPath = filter (lambda x:(str(x).find(str(fileExtensionToRemove)) > -1), stocksAtThisPath) - #Now, we remove the .h5 to get the name of the stock - stocksAtThisPath = map(lambda x:(x.partition(str(fileExtensionToRemove))[0]),stocksAtThisPath) - - for stock in stocksAtThisPath: - listOfStocks.append(stock) - return listOfStocks - #readStocksFromFile done - - - - -if __name__ == '__main__': - - print "Starting..." - dataItemsList=[] - - dataItemsList.append('alphaValue') - - - - - - #for gekko - #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/US_NASDAQ/") - #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/Delisted_US_Recent/") - #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/OTC/") - #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/US_AMEX/") - #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/US_Delisted/") - #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/US_NYSE/") - #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/US_NYSE Arca/") - #gekko paths end - - - - listOfStocks= list() - #listOfStocks.append("AAPL") - #listOfStocks.append("YHOO") - #listOfStocks.append("AMZN") - - listOfPaths=list() - listOfPaths.append("C:\\test\\temp\\") - #listOfPaths.append("C:\\test\\hdf\\") - - listOfStocks= getStocks(listOfPaths) - - - - - alpha= da.DataAccess (True, listOfPaths, "/StrategyData", "StrategyData", True, listOfStocks) # , 946702800 , 1262322000 - - #alpha= da.DataAccess (False, "C:\\test\\temp\\AAPL.h5", "/StrategyData", "StrategyData", True, None) # reading a single hdf5 file - - tslist= list(alpha.getTimestampArray()) - - #for ts in tslist: - # for stock in listOfStocks: - # print str(stock)+" "+ str(ts)+" "+str(alpha.getStockDataItem(str(stock), 'volume', ts)) - - - - - #alpha= da.DataAccess (False, "curveFittingAlphaVals.h5", "/alphaData", "alphaData", True, listOfStocks, None, None, None, dataItemsList) - - - listOfTS= alpha.getTimestampArray() - for stock in ["AAPL"]: - alphaList= alpha.getStockDataList(stock, 'volume') - ctr=0 - for val in alphaList: - print "stock: " + str(stock) + ", val: "+str(val) + ", ts: " + str(listOfTS[ctr]) - ctr+=1 - - print "DONE!" +''' +Created on Jun 1, 2010 + +@author: Shreyas Joshi +@summary: Just a quick way to test the DataAccess class... nothing more "I dare do all that may become a DataAccessTester. Who dares do more is none" +''' + +#Due to the momentary lack of a HDF viewer that installs/works without hassle- I decided to write a little something to check if the alpha +#values were being written properly + +#Main begins +#from DataAccess import * +#import DataAccessNew as da +import QSTK.qstkutil.DataAccess as da +import tables as pt +import numpy as np + +import time +from QSTK.qstkutil.utils import cached_listdir + +def getStocks(listOfPaths): + + listOfStocks=list() + #Path does not exist + print("Reading in all stock names...") + fileExtensionToRemove=".h5" + + for path in listOfPaths: + stocksAtThisPath=list () + + stocksAtThisPath= cached_listdir(str(path)) + #Next, throw away everything that is not a .h5 And these are our stocks! + stocksAtThisPath = [x for x in stocksAtThisPath if (str(x).find(str(fileExtensionToRemove)) > -1)] + #Now, we remove the .h5 to get the name of the stock + stocksAtThisPath = [(x.partition(str(fileExtensionToRemove))[0]) for x in stocksAtThisPath] + + for stock in stocksAtThisPath: + listOfStocks.append(stock) + return listOfStocks + #readStocksFromFile done + + + + +if __name__ == '__main__': + + print("Starting...") + dataItemsList=[] + + dataItemsList.append('alphaValue') + + + + + + #for gekko + #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/US_NASDAQ/") + #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/Delisted_US_Recent/") + #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/OTC/") + #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/US_AMEX/") + #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/US_Delisted/") + #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/US_NYSE/") + #listOfPaths.append("/hzr71/research/QSData/Processed/Norgate/Equities/US_NYSE Arca/") + #gekko paths end + + + + listOfStocks= list() + #listOfStocks.append("AAPL") + #listOfStocks.append("YHOO") + #listOfStocks.append("AMZN") + + listOfPaths=list() + listOfPaths.append("C:\\test\\temp\\") + #listOfPaths.append("C:\\test\\hdf\\") + + listOfStocks= getStocks(listOfPaths) + + + + + alpha= da.DataAccess (True, listOfPaths, "/StrategyData", "StrategyData", True, listOfStocks) # , 946702800 , 1262322000 + + #alpha= da.DataAccess (False, "C:\\test\\temp\\AAPL.h5", "/StrategyData", "StrategyData", True, None) # reading a single hdf5 file + + tslist= list(alpha.getTimestampArray()) + + #for ts in tslist: + # for stock in listOfStocks: + # print str(stock)+" "+ str(ts)+" "+str(alpha.getStockDataItem(str(stock), 'volume', ts)) + + + + + #alpha= da.DataAccess (False, "curveFittingAlphaVals.h5", "/alphaData", "alphaData", True, listOfStocks, None, None, None, dataItemsList) + + + listOfTS= alpha.getTimestampArray() + for stock in ["AAPL"]: + alphaList= alpha.getStockDataList(stock, 'volume') + ctr=0 + for val in alphaList: + print("stock: " + str(stock) + ", val: "+str(val) + ", ts: " + str(listOfTS[ctr])) + ctr+=1 + + print("DONE!") diff --git a/QSTK/qstktest/testLearner.py b/QSTK/qstktest/testLearner.py index 5511ffa01..ce6258846 100755 --- a/QSTK/qstktest/testLearner.py +++ b/QSTK/qstktest/testLearner.py @@ -132,13 +132,13 @@ def main(): # # Compare to ground truth # - print 'trainsize ' + str(trainsize) + print('trainsize ' + str(trainsize)) Ytruth = Y[-trainsize:] - print 'Ytruth.shape ' + str(Ytruth.shape) + print('Ytruth.shape ' + str(Ytruth.shape)) Xtest = dataX[-trainsize:,:] - print 'Xtest.shape ' + str(Xtest.shape) + print('Xtest.shape ' + str(Xtest.shape)) Ytest = learner.query(Xtest) # to check every point - print 'Ytest.shape ' + str(Ytest.shape) + print('Ytest.shape ' + str(Ytest.shape)) plt.clf() plt.scatter(Ytruth,Ytest,edgecolors='none') @@ -148,7 +148,7 @@ def main(): plt.ylabel('Estimated') savefig("scatterdata.png", format='png') - print corrcoef(Ytruth,Ytest) + print(corrcoef(Ytruth,Ytest)) if __name__ == '__main__': main() diff --git a/QSTK/qstktools/YahooDataPull.py b/QSTK/qstktools/YahooDataPull.py index bcc903c43..e5c5c7794 100644 --- a/QSTK/qstktools/YahooDataPull.py +++ b/QSTK/qstktools/YahooDataPull.py @@ -2,8 +2,8 @@ Pulling Yahoo CSV Data ''' -import urllib2 -import urllib +import urllib.request, urllib.error, urllib.parse +import urllib.request, urllib.parse, urllib.error import datetime import os import QSTK.qstkutil.DataAccess as da @@ -35,9 +35,9 @@ def get_yahoo_data(data_path, ls_symbols): # print "Getting {0}".format(symbol) try: - params = urllib.urlencode ({'a':0, 'b':1, 'c':2000, 'd':_now.month-1, 'e':_now.day, 'f':_now.year, 's': symbol}) + params = urllib.parse.urlencode ({'a':0, 'b':1, 'c':2000, 'd':_now.month-1, 'e':_now.day, 'f':_now.year, 's': symbol}) url = "http://ichart.finance.yahoo.com/table.csv?%s" % params - url_get = urllib2.urlopen(url) + url_get = urllib.request.urlopen(url) header = url_get.readline() symbol_data.append (url_get.readline()) @@ -58,16 +58,16 @@ def get_yahoo_data(data_path, ls_symbols): f.close() - except urllib2.HTTPError: + except urllib.error.HTTPError: miss_ctr += 1 ls_missed_syms.append(symbol_name) - print "Unable to fetch data for stock: {0} at {1}".format(symbol_name, url) - except urllib2.URLError: + print("Unable to fetch data for stock: {0} at {1}".format(symbol_name, url)) + except urllib.error.URLError: miss_ctr += 1 ls_missed_syms.append(symbol_name) - print "URL Error for stock: {0} at {1}".format(symbol_name, url) + print("URL Error for stock: {0} at {1}".format(symbol_name, url)) - print "All done. Got {0} stocks. Could not get {1}".format(len(ls_symbols) - miss_ctr, miss_ctr) + print("All done. Got {0} stocks. Could not get {1}".format(len(ls_symbols) - miss_ctr, miss_ctr)) return ls_missed_syms diff --git a/QSTK/qstktools/csv2fund.py b/QSTK/qstktools/csv2fund.py index c87ccb191..3089533e2 100644 --- a/QSTK/qstktools/csv2fund.py +++ b/QSTK/qstktools/csv2fund.py @@ -78,7 +78,7 @@ def analyze_transactions(filename, plot_name, share_table, show_transactions=Fal #first pass reader=csv.reader(open(filename,'rU'), delimiter=',') - reader.next() + next(reader) prev=0 first=1 diffs=[] @@ -256,7 +256,7 @@ def csv2fund(filename): @return commissions : value of slippage over the csv time """ reader=csv.reader(open(filename,'rU'), delimiter=',') - reader.next() + next(reader) symbols=[] dates=[] for row in reader: @@ -265,9 +265,9 @@ def csv2fund(filename): symbols.append(row[0]) if not(dp.parse(row[3]) in dates): dates.append(dp.parse(row[3])) - print symbols + print(symbols) reader=csv.reader(open(filename,'rU'), delimiter=',') - reader.next() + next(reader) if not("_CASH" in symbols): symbols.append("_CASH") vals=numpy.zeros([len(dates),len(symbols)]) @@ -309,7 +309,7 @@ def csv2fund(filename): column_index = sorted(share_table.columns) share_table = share_table.reindex(index=time_index, columns=column_index) i_start_cash = share_table["_CASH"].ix[0] - print i_start_cash + print(i_start_cash) return [share_table, commissions, i_start_cash] # def ofx2fund(filename, start_val): @@ -408,14 +408,14 @@ def share_table2fund(share_table): if __name__ == "__main__": filename = "Strat.csv" plot_name = "Strategy" - print "load csv" + print("load csv") [share_table, commissions, i_start_cash] = csv2fund(filename) - print share_table + print(share_table) [fund_ts, ts_leverage] = share_table2fund(share_table) - print "print report" - print fund_ts + print("print report") + print(fund_ts) report.print_stats(fund_ts, ["SPY"], plot_name, directory="./" + plot_name, commissions=commissions, i_start_cash=i_start_cash) - print "analyze transactions" + print("analyze transactions") #Generate new plot based off transactions alone analyze_transactions(filename, plot_name, share_table, True) - print "done" + print("done") diff --git a/QSTK/qstktools/report.py b/QSTK/qstktools/report.py index b3cac4186..5c7f43d7f 100644 --- a/QSTK/qstktools/report.py +++ b/QSTK/qstktools/report.py @@ -1,1182 +1,1182 @@ -''' -(c) 2011, 2012 Georgia Tech Research Corporation -This source code is released under the New BSD license. Please see -http://wiki.quantsoftware.org/index.php?title=QSTK_License -for license details. - -Created on Jan 1, 2011 - -@author:Drew Bratcher -@contact: dbratcher@gatech.edu -@summary: Contains tutorial for backtester and report. - -''' - -from os import path, makedirs -from os import sys -from QSTK.qstkutil import DataAccess as de -from QSTK.qstkutil import qsdateutil as du -from QSTK.qstkutil import tsutil as tsu -from QSTK.qstkutil import fundutil as fu -from dateutil.relativedelta import relativedelta -import numpy as np -from math import log10 -import locale -from pylab import savefig -from matplotlib import pyplot -from matplotlib import gridspec -import matplotlib.dates as mdates -import cPickle -import datetime as dt -import pandas -import numpy as np -from copy import deepcopy -import scipy.stats as scst - -def _dividend_rets_funds(df_funds, f_dividend_rets): - - df_funds_copy = deepcopy(df_funds) - f_price = deepcopy(df_funds_copy[0]) - - df_funds_copy.values[1:] = (df_funds_copy.values[1:]/df_funds_copy.values[0:-1]) - df_funds_copy.values[0] = 1 - - df_funds_copy = df_funds_copy + f_dividend_rets - - na_funds_copy = np.cumprod(df_funds_copy.values) - na_funds_copy = na_funds_copy*f_price - - df_funds = pandas.Series(na_funds_copy, index = df_funds_copy.index) - - return df_funds - -def print_header(html_file, name): - """ - @summary prints header of report html file - """ - html_file.write("\n") - html_file.write("\n") - html_file.write("QSTK Generated Report:" + name + "\n") - html_file.write("\n\n") - html_file.write("\n\n") - -def print_footer(html_file): - """ - @summary prints footer of report html file - """ - html_file.write("\n\n") - html_file.write("") - -def get_annual_return(fund_ts, years): - """ - @summary prints annual return for given fund and years to the given stream - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - lf_ret=[] - for year in years: - year_vals = [] - for date in fund_ts.index: - if(date.year ==year): - year_vals.append([fund_ts.ix[date]]) - day_rets = tsu.daily1(year_vals) - ret = tsu.get_ror_annual(day_rets) - ret=float(ret) - lf_ret.append(ret*100) #" %+8.2f%%" % (ret*100) - return lf_ret - -def get_winning_days(fund_ts, years): - """ - @summary prints winning days for given fund and years to the given stream - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - s_ret="" - for year in years: - year_vals = [] - for date in fund_ts.index: - if(date.year==year): - year_vals.append([fund_ts.ix[date]]) - ret = fu.get_winning_days(year_vals) - s_ret+=" % + 8.2f%%" % ret - return s_ret - -def get_max_draw_down(fund_ts, years): - """ - @summary prints max draw down for given fund and years to the given stream - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - s_ret="" - for year in years: - year_vals = [] - for date in fund_ts.index: - if(date.year==year): - year_vals.append(fund_ts.ix[date]) - ret = fu.get_max_draw_down(year_vals) - s_ret+=" % + 8.2f%%" % (ret*100) - return s_ret - -def get_daily_sharpe(fund_ts, years): - """ - @summary prints sharpe ratio for given fund and years to the given stream - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - s_ret="" - for year in years: - year_vals = [] - for date in fund_ts.index: - if(date.year==year): - year_vals.append([fund_ts.ix[date]]) - ret = fu.get_sharpe_ratio(year_vals) - s_ret+=" % + 8.2f " % ret - return s_ret - -def get_daily_sortino(fund_ts, years): - """ - @summary prints sortino ratio for given fund and years to the given stream - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - s_ret="" - for year in years: - year_vals = [] - for date in fund_ts.index: - if(date.year==year): - year_vals.append([fund_ts.ix[date]]) - ret = fu.get_sortino_ratio(year_vals) - s_ret+=" % + 8.2f " % ret - return s_ret - -def get_std_dev(fund_ts): - """ - @summary gets standard deviation of returns for a fund as a string - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - fund_ts=fund_ts.fillna(method='pad') - fund_ts=fund_ts.fillna(method='bfill') - ret=np.std(tsu.daily(fund_ts.values))*10000 - return ("%+7.2f bps " % ret) - - -def ks_statistic(fund_ts): - fund_ts = deepcopy(fund_ts) - if len(fund_ts.values) > 60: - seq1 = fund_ts.values[0:-60] - seq2 = fund_ts.values[-60:] - tsu.returnize0(seq1) - tsu.returnize0(seq2) - (ks, p) = scst.ks_2samp(seq1, seq2) - return ks, p - # elif len(fund_ts.values) > 5: - # seq1 = fund_ts.values[0:-5] - # seq2 = fund_ts.values[-5:] - # (ks, p) = scst.ks_2samp(seq1, seq2) - # return ks, p - - ks = -1 - p = -1 - return ks, p - -def ks_statistic_calc(fund_ts_past, fund_ts_month): - try: - seq1 = deepcopy(fund_ts_past.values) - seq2 = deepcopy(fund_ts_month.values) - tsu.returnize0(seq1) - tsu.returnize0(seq2) - (ks, p) = scst.ks_2samp(seq1, seq2) - return ks, p - except: - return -1,-1 - -def print_industry_coer(fund_ts, ostream): - """ - @summary prints standard deviation of returns for a fund - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - industries = [['$DJUSBM', 'Materials'], - ['$DJUSNC', 'Goods'], - ['$DJUSCY', 'Services'], - ['$DJUSFN', 'Financials'], - ['$DJUSHC', 'Health'], - ['$DJUSIN', 'Industrial'], - ['$DJUSEN', 'Oil & Gas'], - ['$DJUSTC', 'Technology'], - ['$DJUSTL', 'TeleComm'], - ['$DJUSUT', 'Utilities']] - for i in range(0, len(industries) ): - if(i%2==0): - ostream.write("\n") - #load data - norObj = de.DataAccess('Yahoo') - ldtTimestamps = du.getNYSEdays( fund_ts.index[0], fund_ts.index[-1], dt.timedelta(hours=16) ) - ldfData = norObj.get_data( ldtTimestamps, [industries[i][0]], ['close'] ) - #get corelation - ldfData[0]=ldfData[0].fillna(method='pad') - ldfData[0]=ldfData[0].fillna(method='bfill') - a=np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])),np.ravel(tsu.daily(fund_ts.values))) - b=np.ravel(tsu.daily(ldfData[0][industries[i][0]])) - f=np.ravel(tsu.daily(fund_ts)) - fBeta, unused = np.polyfit(b,f,1) - ostream.write("%10s(%s):%+6.2f, %+6.2f " % (industries[i][1], industries[i][0], a[0,1], fBeta)) - -def print_other_coer(fund_ts, ostream): - """ - @summary prints standard deviation of returns for a fund - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - industries = [['$SPX', ' S&P Index'], - ['$DJI', ' Dow Jones'], - ['$DJUSEN', 'Oil & Gas'], - ['$DJGSP', ' Metals']] - for i in range(0, len(industries) ): - if(i%2==0): - ostream.write("\n") - #load data - norObj =de.DataAccess('Yahoo') - ldtTimestamps = du.getNYSEdays( fund_ts.index[0], fund_ts.index[-1], dt.timedelta(hours=16) ) - ldfData = norObj.get_data( ldtTimestamps, [industries[i][0]], ['close'] ) - #get corelation - ldfData[0]=ldfData[0].fillna(method='pad') - ldfData[0]=ldfData[0].fillna(method='bfill') - a=np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])),np.ravel(tsu.daily(fund_ts.values))) - b=np.ravel(tsu.daily(ldfData[0][industries[i][0]])) - f=np.ravel(tsu.daily(fund_ts)) - fBeta, unused = np.polyfit(b,f,1) - ostream.write("%10s(%s):%+6.2f, %+6.2f " % (industries[i][1], industries[i][0], a[0,1], fBeta)) - - -def print_benchmark_coer(fund_ts, benchmark_close, sym, ostream): - """ - @summary prints standard deviation of returns for a fund - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - fund_ts=fund_ts.fillna(method='pad') - fund_ts=fund_ts.fillna(method='bfill') - benchmark_close=benchmark_close.fillna(method='pad') - benchmark_close=benchmark_close.fillna(method='bfill') - faCorr=np.corrcoef(np.ravel(tsu.daily(fund_ts.values)),np.ravel(tsu.daily(benchmark_close))); - b=np.ravel(tsu.daily(benchmark_close)) - f=np.ravel(tsu.daily(fund_ts)) - fBeta, unused = np.polyfit(b,f, 1); - print_line(sym+"Correlattion","%+6.2f" % faCorr[0,1],i_spacing=3,ostream=ostream) - print_line(sym+"Beta","%+6.2f" % fBeta,i_spacing=3,ostream=ostream) - -def print_monthly_returns(fund_ts, years, ostream): - """ - @summary prints monthly returns for given fund and years to the given stream - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - ostream.write(" ") - month_names = du.getMonthNames() - for name in month_names: - ostream.write(" " + str(name)) - ostream.write("\n") - i = 0 - mrets = tsu.monthly(fund_ts) - for year in years: - ostream.write(str(year)) - months = du.getMonths(fund_ts, year) - for k in range(1, months[0]): - ostream.write(" ") - for month in months: - ostream.write(" % + 6.2f" % (mrets[i]*100)) - i += 1 - ostream.write("\n") - - - -def print_monthly_turnover(fund_ts, years, ts_turnover, ostream): - """ - @summary prints monthly returns for given fund and years to the given stream - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - ostream.write(" ") - month_names = du.getMonthNames() - for name in month_names: - ostream.write(" " + str(name)) - ostream.write("\n") - i = 0 - # mrets = tsu.monthly(fund_ts) - for year in years: - ostream.write(str(year)) - months = du.getMonths(ts_turnover, year) - if months != []: - for k in range(1, months[0]): - ostream.write(" ") - for month in months: - ostream.write(" % + 6.2f" % (ts_turnover[i]*100)) - i += 1 - ostream.write("\n") - -def print_monthly_ks(fund_ts, years, ostream): - """ - @summary prints monthly returns for given fund and years to the given stream - @param fund_ts: pandas fund time series - @param years: list of years to print out - @param ostream: stream to print to - """ - ostream.write(" ") - month_names = du.getMonthNames() - for name in month_names: - ostream.write(" " + str(name)) - ostream.write("\n") - - # mrets = tsu.monthly(fund_ts) - m_str = [] - - for i, year in enumerate(years): - months = du.getMonths(fund_ts, year) - for j, month in enumerate(months): - if i == 0 and j < 3: - m_str.append(' ') - else: - # dt_st = max(fund_ts.index[0], dt.datetime(year, month, 1)-relativedelta(months=6)) - dt_st = fund_ts.index[0] - dt_today = dt.datetime(year, month, 1) - relativedelta(months=2) - dt_end = min(dt.datetime(year, month, 1) + relativedelta(months=1) + dt.timedelta(hours=-5), fund_ts.index[-1]) - fund_ts_past = fund_ts.ix[dt_st: dt_today] - fund_ts_month = fund_ts.ix[dt_today: dt_end] - ks, p = ks_statistic_calc(fund_ts_past, fund_ts_month) - if not(ks == -1 or p == -1): - if ks < p: - m_str.append('PASS') - else: - m_str.append('FAIL') - else: - m_str.append(' ') - - i = 0 - for year in years: - ostream.write(str(year)) - months = du.getMonths(fund_ts, year) - for k in range(1, months[0]): - ostream.write(" ") - for month in months: - ostream.write("%7s" % (m_str[i])) - i = i + 1 - ostream.write("\n") - - -def print_years(years, ostream): - ostream.write("\n") - s_line="" - s_line2="" - for f_token in years: - s_line+="%9d " % f_token - s_line2+="%10s" % '------' - - ostream.write("%35s %s%30s\n" % (" ", " "*4, s_line)) - ostream.write("%35s %s%30s\n" % (" ", " "*4, s_line2)) - - -def print_line(s_left_side, s_right_side, i_spacing=0, ostream="stdout"): - ostream.write("%35s:%s%30s\n" % (s_left_side, " "*i_spacing, s_right_side)) - -def print_stats(fund_ts, benchmark, name, lf_dividend_rets=0.0, original="",s_fund_name="Fund", - s_original_name="Original", d_trading_params="", d_hedge_params="", s_comments="", directory = False, - leverage = False, s_leverage_name="Leverage", commissions = 0, slippage = 0, borrowcost = 0, ostream = sys.stdout, - i_start_cash=1000000, ts_turnover="False"): - """ - @summary prints stats of a provided fund and benchmark - @param fund_ts: fund value in pandas timeseries - @param benchmark: benchmark symbol to compare fund to - @param name: name to associate with the fund in the report - @param directory: parameter to specify printing to a directory - @param leverage: time series to plot with report - @param commissions: value to print with report - @param slippage: value to print with report - @param ostream: stream to print stats to, defaults to stdout - """ - - #Set locale for currency conversions - locale.setlocale(locale.LC_ALL, '') - - if original != "" and type(original) != type([]): - original = [original] - if type(s_original_name) != type([]): - s_original_name = [s_original_name] - - #make names length independent for alignment - s_formatted_original_name = [] - for name_temp in s_original_name: - s_formatted_original_name.append("%15s" % name_temp) - s_formatted_fund_name = "%15s" % s_fund_name - - fund_ts=fund_ts.fillna(method='pad') - fund_ts=fund_ts.fillna(method='bfill') - fund_ts=fund_ts.fillna(1.0) - if directory != False : - if not path.exists(directory): - makedirs(directory) - - sfile = path.join(directory, "report-%s.html" % name ) - splot = "plot-%s.png" % name - splot_dir = path.join(directory, splot) - ostream = open(sfile, "wb") - ostream.write("
")
-        print "writing to ", sfile
-
-        if type(original)==type("str"):
-            if type(leverage)!=type(False):
-                print_plot(fund_ts, benchmark, name, splot_dir, lf_dividend_rets, leverage=leverage, i_start_cash = i_start_cash, s_leverage_name=s_leverage_name)
-            else:
-                print_plot(fund_ts, benchmark, name, splot_dir, lf_dividend_rets, i_start_cash = i_start_cash)
-        else:
-            if type(leverage)!=type(False):
-                print_plot([fund_ts, original], benchmark, name, splot_dir, s_original_name, lf_dividend_rets,
-                             leverage=leverage, i_start_cash = i_start_cash, s_leverage_name=s_leverage_name)
-            else:
-                print_plot([fund_ts, original], benchmark, name, splot_dir, s_original_name, lf_dividend_rets, i_start_cash = i_start_cash)
-
-    start_date = fund_ts.index[0].strftime("%m/%d/%Y")
-    end_date = fund_ts.index[-1].strftime("%m/%d/%Y")
-    ostream.write("Performance Summary for "\
-	 + str(path.basename(name)) + " Backtest\n")
-    ostream.write("For the dates " + str(start_date) + " to "\
-                                       + str(end_date) + "")
-
-    #paramater section
-    if d_trading_params!="":
-        ostream.write("\n\nTrading Paramaters\n\n")
-        for var in d_trading_params:
-            print_line(var, d_trading_params[var],ostream=ostream)
-    if d_hedge_params!="":
-        ostream.write("\nHedging Paramaters\n\n")
-        if type(d_hedge_params['Weight of Hedge']) == type(float):
-            d_hedge_params['Weight of Hedge'] = str(int(d_hedge_params['Weight of Hedge']*100)) + '%'
-        for var in d_hedge_params:
-            print_line(var, d_hedge_params[var],ostream=ostream)
-
-    #comment section
-    if s_comments!="":
-        ostream.write("\nComments\n\n%s" % s_comments)
-
-
-    if directory != False :
-        ostream.write("\n\n\n\n")
-
-    mult = i_start_cash/fund_ts.values[0]
-
-
-    timeofday = dt.timedelta(hours = 16)
-    timestamps = du.getNYSEdays(fund_ts.index[0], fund_ts.index[-1], timeofday)
-    dataobj =de.DataAccess('Yahoo')
-    years = du.getYears(fund_ts)
-    benchmark_close = dataobj.get_data(timestamps, benchmark, ["close"], \
-                                                     verbose = False)[0]
-    for bench_sym in benchmark:
-        benchmark_close[bench_sym]=benchmark_close[bench_sym].fillna(method='pad')
-        benchmark_close[bench_sym]=benchmark_close[bench_sym].fillna(method='bfill')
-        benchmark_close[bench_sym]=benchmark_close[bench_sym].fillna(1.0)
-
-    if type(lf_dividend_rets) != type(0.0):
-        for i,sym in enumerate(benchmark):
-            benchmark_close[sym] = _dividend_rets_funds(benchmark_close[sym], lf_dividend_rets[i])
-
-    ostream.write("Resulting Values in $ with an initial investment of "+ locale.currency(int(round(i_start_cash)), grouping=True) + "\n")
-
-    print_line(s_formatted_fund_name+" Resulting Value"," %15s, %10.2f%%" % (locale.currency(int(round(fund_ts.values[-1]*mult)), grouping=True), \
-                                                     float(100*((fund_ts.values[-1]/fund_ts.values[0])-1))), i_spacing=4, ostream=ostream)
-
-    # if type(original)!=type("str"):
-    #     mult3 = i_start_cash / original.values[0]
-    #     # print_line(s_formatted_original_name +" Resulting Value",(locale.currency(int(round(original.values[-1]*mult3)), grouping=True)),i_spacing=3, ostream=ostream)
-    #     print_line(s_formatted_original_name+" Resulting Value"," %15s, %10.2f%%" % (locale.currency(int(round(original.values[-1]*mult3)), grouping=True), \
-    #                                                  float(100*((original.values[-1]/original.values[0])-1))), i_spacing=4, ostream=ostream)
-
-    if type(original)!=type("str"):
-        for i in range(len(original)):
-            mult3 = i_start_cash / original[i].values[0]
-            # print_line(s_formatted_original_name +" Resulting Value",(locale.currency(int(round(original[i].values[-1]*mult3)), grouping=True)),i_spacing=3, ostream=ostream)
-            print_line(s_formatted_original_name[i]+" Resulting Value"," %15s, %10.2f%%" % (locale.currency(int(round(original[i].values[-1]*mult3)), grouping=True), \
-                                                     float(100*((original[i].values[-1]/original[i].values[0])-1))), i_spacing=4, ostream=ostream)
-
-    for bench_sym in benchmark:
-        mult2= i_start_cash / benchmark_close[bench_sym].values[0]
-        # print_line(bench_sym+" Resulting Value",locale.currency(int(round(benchmark_close[bench_sym].values[-1]*mult2)), grouping=True),i_spacing=3, ostream=ostream)
-        print_line(bench_sym+" Resulting Value"," %15s, %10.2f%%" % (locale.currency(int(round(benchmark_close[bench_sym].values[-1]*mult2)), grouping=True), \
-                                                     float(100*((benchmark_close[bench_sym].values[-1]/benchmark_close[bench_sym].values[0])-1))), i_spacing=4, ostream=ostream)
-
-    ostream.write("\n")
-
-    # if len(years) > 1:
-    print_line(s_formatted_fund_name+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(fund_ts.values)[0],i_spacing=4, ostream=ostream)
-    if type(original)!=type("str"):
-        for i in range(len(original)):
-            print_line(s_formatted_original_name[i]+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(original[i].values)[0],i_spacing=4, ostream=ostream)
-
-    for bench_sym in benchmark:
-        print_line(bench_sym+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(benchmark_close[bench_sym].values)[0],i_spacing=4,ostream=ostream)
-    ostream.write("\n")
-
-
-    # KS - Similarity
-    # ks, p = ks_statistic(fund_ts);
-    # if ks!= -1 and p!= -1:
-    #     if ks < p:
-    #         ostream.write("\nThe last three month's returns are consistent with previous performance (KS = %2.5f, p = %2.5f) \n\n"% (ks, p))
-    #     else:
-    #         ostream.write("\nThe last three month's returns are NOT CONSISTENT with previous performance (KS = %2.5f, p = %2.5f) \n\n"% (ks, p))
-
-
-    ostream.write("Transaction Costs\n")
-    print_line("Total Commissions"," %15s, %10.2f%%" % (locale.currency(int(round(commissions)), grouping=True), \
-                                                  float((round(commissions)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream)
-
-    print_line("Total Slippage"," %15s, %10.2f%%" % (locale.currency(int(round(slippage)), grouping=True), \
-                                                     float((round(slippage)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream)
-
-    print_line("Total Short Borrowing Cost"," %15s, %10.2f%%" % (locale.currency(int(round(borrowcost)), grouping=True), \
-                                                     float((round(borrowcost)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream)
-
-    print_line("Total Costs"," %15s, %10.2f%%" % (locale.currency(int(round(borrowcost+slippage+commissions)), grouping=True), \
-                                  float((round(borrowcost+slippage+commissions)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream)
-
-    ostream.write("\n")
-
-    print_line(s_formatted_fund_name+" Std Dev of Returns",get_std_dev(fund_ts),i_spacing=8, ostream=ostream)
-
-    if type(original)!=type("str"):
-        for i in range(len(original)):
-            print_line(s_formatted_original_name[i]+" Std Dev of Returns", get_std_dev(original[i]), i_spacing=8, ostream=ostream)
-
-    for bench_sym in benchmark:
-        print_line(bench_sym+" Std Dev of Returns", get_std_dev(benchmark_close[bench_sym]), i_spacing=8, ostream=ostream)
-
-    ostream.write("\n")
-
-
-    for bench_sym in benchmark:
-        print_benchmark_coer(fund_ts, benchmark_close[bench_sym], str(bench_sym), ostream)
-    ostream.write("\n")
-
-    ostream.write("\nYearly Performance Metrics")
-    print_years(years, ostream)
-
-
-    s_line=""
-    for f_token in get_annual_return(fund_ts, years):
-        s_line+=" %+8.2f%%" % f_token
-    print_line(s_formatted_fund_name+" Annualized Return",s_line, i_spacing=4, ostream=ostream)
-
-
-    if type(original)!=type("str"):
-        for i in range(len(original)):
-            s_line=""
-            for f_token in get_annual_return(original[i], years):
-                s_line+=" %+8.2f%%" % f_token
-            print_line(s_formatted_original_name[i]+" Annualized Return", s_line, i_spacing=4, ostream=ostream)
-
-    for bench_sym in benchmark:
-        s_line=""
-        for f_token in get_annual_return(benchmark_close[bench_sym], years):
-            s_line+=" %+8.2f%%" % f_token
-        print_line(bench_sym+" Annualized Return", s_line, i_spacing=4, ostream=ostream)
-
-    print_years(years, ostream)
-
-    print_line(s_formatted_fund_name+" Winning Days",get_winning_days(fund_ts, years), i_spacing=4, ostream=ostream)
-
-
-    if type(original)!=type("str"):
-        for i in range(len(original)):
-            print_line(s_formatted_original_name[i]+" Winning Days",get_winning_days(original[i], years), i_spacing=4, ostream=ostream)
-
-
-    for bench_sym in benchmark:
-        print_line(bench_sym+" Winning Days",get_winning_days(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream)
-
-
-    print_years(years, ostream)
-
-    print_line(s_formatted_fund_name+" Max Draw Down",get_max_draw_down(fund_ts, years), i_spacing=4, ostream=ostream)
-
-    if type(original)!=type("str"):
-        for i in range(len(original)):
-            print_line(s_formatted_original_name[i]+" Max Draw Down",get_max_draw_down(original[i], years), i_spacing=4, ostream=ostream)
-
-
-    for bench_sym in benchmark:
-        print_line(bench_sym+" Max Draw Down",get_max_draw_down(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream)
-
-
-    print_years(years, ostream)
-
-
-    print_line(s_formatted_fund_name+" Daily Sharpe Ratio",get_daily_sharpe(fund_ts, years), i_spacing=4, ostream=ostream)
-
-
-    if type(original)!=type("str"):
-        for i in range(len(original)):
-            print_line(s_formatted_original_name[i]+" Daily Sharpe Ratio",get_daily_sharpe(original[i], years), i_spacing=4, ostream=ostream)
-
-    for bench_sym in benchmark:
-        print_line(bench_sym+" Daily Sharpe Ratio",get_daily_sharpe(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream)
-
-
-    print_years(years, ostream)
-
-    print_line(s_formatted_fund_name+" Daily Sortino Ratio",get_daily_sortino(fund_ts, years), i_spacing=4, ostream=ostream)
-
-    if type(original)!=type("str"):
-        for i in range(len(original)):
-            print_line(s_formatted_original_name[i]+" Daily Sortino Ratio",get_daily_sortino(original[i], years), i_spacing=4, ostream=ostream)
-
-
-    for bench_sym in benchmark:
-        print_line(bench_sym+" Daily Sortino Ratio",get_daily_sortino(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream)
-
-
-    ostream.write("\n\n\nCorrelation and Beta with DJ Industries for the Fund ")
-
-    print_industry_coer(fund_ts,ostream)
-
-    ostream.write("\n\nCorrelation and Beta with Other Indices for the Fund ")
-
-    print_other_coer(fund_ts,ostream)
-
-    ostream.write("\n\n\nMonthly Returns for the Fund %\n")
-
-    print_monthly_returns(fund_ts, years, ostream)
-
-    if type(ts_turnover) != type("False"):
-        ostream.write("\n\nMonthly Turnover for the fund\n")
-        print_monthly_turnover(fund_ts, years, ts_turnover, ostream)
-
-    ostream.write("\n\n3 Month Kolmogorov-Smirnov 2-Sample Similarity Test\n")
-
-    print_monthly_ks(fund_ts, years, ostream)
-
-    ks, p = ks_statistic(fund_ts);
-    if ks!= -1 and p!= -1:
-        ostream.write("\nResults for the Similarity Test over last 3 months : (KS = %2.5f, p = %2.5f) \n\n"% (ks, p))
-
-    if directory != False:
-        ostream.write("
") - - -def print_html(fund_ts, benchmark, name, lf_dividend_rets=0.0, original="", - s_fund_name="Fund", s_original_name="Original", d_trading_params="", d_hedge_params="", - s_comments="", directory=False, leverage=False, s_leverage_name="Leverage",commissions=0, slippage=0, - borrowcost=0, ostream=sys.stdout, i_start_cash=1000000): - """ - @summary prints stats of a provided fund and benchmark - @param fund_ts: fund value in pandas timeseries - @param benchmark: benchmark symbol to compare fund to - @param name: name to associate with the fund in the report - @param directory: parameter to specify printing to a directory - @param leverage: time series to plot with report - @param commissions: value to print with report - @param slippage: value to print with report - @param ostream: stream to print stats to, defaults to stdout - """ - - #Set locale for currency conversions - locale.setlocale(locale.LC_ALL, '') - - #make names length independent for alignment - s_formatted_original_name="%15s" % s_original_name - s_formatted_fund_name = "%15s" % s_fund_name - - fund_ts=fund_ts.fillna(method='pad') - if directory != False : - if not path.exists(directory): - makedirs(directory) - - sfile = path.join(directory, "report-%s.html" % name ) - splot = "plot-%s.png" % name - splot_dir = path.join(directory, splot) - ostream = open(sfile, "wb") - print "writing to ", sfile - - if type(original)==type("str"): - if type(leverage)!=type(False): - print_plot(fund_ts, benchmark, name, splot_dir, lf_dividend_rets, leverage=leverage, i_start_cash = i_start_cash, s_leverage_name=s_leverage_name) - else: - print_plot(fund_ts, benchmark, name, splot_dir, lf_dividend_rets, i_start_cash = i_start_cash) - else: - if type(leverage)!=type(False): - print_plot([fund_ts, original], benchmark, name, splot_dir, s_original_name, lf_dividend_rets, leverage=leverage, i_start_cash = i_start_cash, s_leverage_name=s_leverage_name) - else: - print_plot([fund_ts, original], benchmark, name, splot_dir, s_original_name, lf_dividend_rets, i_start_cash = i_start_cash) - - print_header(ostream,name) - start_date = fund_ts.index[0].strftime("%m/%d/%Y") - end_date = fund_ts.index[-1].strftime("%m/%d/%Y") - ostream.write("Performance Summary for "\ - + str(path.basename(name)) + " Backtest\n") - ostream.write("For the dates " + str(start_date) + " to "\ - + str(end_date) + "") - - #paramater section - if d_trading_params!="": - ostream.write("\n\nTrading Paramaters\n\n") - for var in d_trading_params: - print_line(var, d_trading_params[var],ostream=ostream) - if d_hedge_params!="": - ostream.write("\nHedging Paramaters\n\n") - if type(d_hedge_params['Weight of Hedge']) == type(float): - d_hedge_params['Weight of Hedge'] = str(int(d_hedge_params['Weight of Hedge']*100)) + '%' - for var in d_hedge_params: - print_line(var, d_hedge_params[var],ostream=ostream) - - #comment section - if s_comments!="": - ostream.write("\nComments\n\n%s" % s_comments) - - - if directory != False : - ostream.write("\n\n\n\n") - - mult = i_start_cash/fund_ts.values[0] - - - timeofday = dt.timedelta(hours = 16) - timestamps = du.getNYSEdays(fund_ts.index[0], fund_ts.index[-1], timeofday) - dataobj =de.DataAccess('Yahoo') - years = du.getYears(fund_ts) - benchmark_close = dataobj.get_data(timestamps, benchmark, ["close"]) - benchmark_close=benchmark_close[0] - for bench_sym in benchmark: - benchmark_close[bench_sym]=benchmark_close[bench_sym].fillna(method='pad') - - if type(lf_dividend_rets) != type(0.0): - for i,sym in enumerate(benchmark): - benchmark_close[sym] = _dividend_rets_funds(benchmark_close[sym], lf_dividend_rets[i]) - - ostream.write("Resulting Values in $ with an initial investment of "+ locale.currency(int(round(i_start_cash)), grouping=True) + "\n") - - print_line(s_formatted_fund_name+" Resulting Value",(locale.currency(int(round(fund_ts.values[-1]*mult)), grouping=True)),i_spacing=3, ostream=ostream) - - if type(original)!=type("str"): - mult3 = i_start_cash / original.values[0] - print_line(s_formatted_original_name +" Resulting Value",(locale.currency(int(round(original.values[-1]*mult3)), grouping=True)),i_spacing=3, ostream=ostream) - - for bench_sym in benchmark: - mult2=i_start_cash/benchmark_close[bench_sym].values[0] - print_line(bench_sym+" Resulting Value",locale.currency(int(round(benchmark_close[bench_sym].values[-1]*mult2)), grouping=True),i_spacing=3, ostream=ostream) - - ostream.write("\n") - - if len(years) > 1: - print_line(s_formatted_fund_name+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(fund_ts.values)[0],i_spacing=4, ostream=ostream) - if type(original)!=type("str"): - print_line(s_formatted_original_name+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(original.values)[0],i_spacing=4, ostream=ostream) - - for bench_sym in benchmark: - print_line(bench_sym+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(benchmark_close[bench_sym].values)[0],i_spacing=4,ostream=ostream) - ostream.write("\n") - - ostream.write("Transaction Costs\n") - print_line("Total Commissions"," %15s, %10.2f%%" % (locale.currency(int(round(commissions)), grouping=True), \ - float((round(commissions)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream) - - print_line("Total Slippage"," %15s, %10.2f%%" % (locale.currency(int(round(slippage)), grouping=True), \ - float((round(slippage)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream) - - print_line("Total Short Borrowing Cost"," %15s, %10.2f%%" % (locale.currency(int(round(borrowcost)), grouping=True), \ - float((round(borrowcost)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream) - - print_line("Total Costs"," %15s, %10.2f%%" % (locale.currency(int(round(borrowcost+slippage+commissions)), grouping=True), \ - float((round(borrowcost+slippage+commissions)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream) - - ostream.write("\n") - - print_line(s_formatted_fund_name+" Std Dev of Returns",get_std_dev(fund_ts),i_spacing=8, ostream=ostream) - - if type(original)!=type("str"): - print_line(s_formatted_original_name+" Std Dev of Returns", get_std_dev(original), i_spacing=8, ostream=ostream) - - for bench_sym in benchmark: - print_line(bench_sym+" Std Dev of Returns", get_std_dev(benchmark_close[bench_sym]), i_spacing=8, ostream=ostream) - - ostream.write("\n") - - - for bench_sym in benchmark: - print_benchmark_coer(fund_ts, benchmark_close[bench_sym], str(bench_sym), ostream) - ostream.write("\n") - - ostream.write("\nYearly Performance Metrics") - print_years(years, ostream) - - s_line="" - for f_token in get_annual_return(fund_ts, years): - s_line+=" %+8.2f%%" % f_token - print_line(s_formatted_fund_name+" Annualized Return", s_line, i_spacing=4, ostream=ostream) - lf_vals=[get_annual_return(fund_ts, years)] - ls_labels=[name] - - if type(original)!=type("str"): - s_line="" - for f_token in get_annual_return(original, years): - s_line+=" %+8.2f%%" % f_token - print_line(s_formatted_original_name+" Annualized Return", s_line, i_spacing=4, ostream=ostream) - lf_vals.append(get_annual_return(original, years)) - ls_labels.append(s_original_name) - - for bench_sym in benchmark: - s_line="" - for f_token in get_annual_return(benchmark_close[bench_sym], years): - s_line+=" %+8.2f%%" % f_token - print_line(bench_sym+" Annualized Return", s_line, i_spacing=4, ostream=ostream) - lf_vals.append(get_annual_return(benchmark_close[bench_sym], years)) - ls_labels.append(bench_sym) - - print lf_vals - print ls_labels - ls_year_labels=[] - for i in range(0,len(years)): - ls_year_labels.append(str(years[i])) - print_bar_chart(lf_vals, ls_labels, ls_year_labels, directory+"/annual_rets.png") - - print_years(years, ostream) - - print_line(s_formatted_fund_name+" Winning Days",get_winning_days(fund_ts, years), i_spacing=4, ostream=ostream) - - - if type(original)!=type("str"): - print_line(s_formatted_original_name+" Winning Days",get_winning_days(original, years), i_spacing=4, ostream=ostream) - - - for bench_sym in benchmark: - print_line(bench_sym+" Winning Days",get_winning_days(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream) - - - print_years(years, ostream) - - print_line(s_formatted_fund_name+" Max Draw Down",get_max_draw_down(fund_ts, years), i_spacing=4, ostream=ostream) - - if type(original)!=type("str"): - print_line(s_formatted_original_name+" Max Draw Down",get_max_draw_down(original, years), i_spacing=4, ostream=ostream) - - - for bench_sym in benchmark: - print_line(bench_sym+" Max Draw Down",get_max_draw_down(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream) - - - print_years(years, ostream) - - - print_line(s_formatted_fund_name+" Daily Sharpe Ratio",get_daily_sharpe(fund_ts, years), i_spacing=4, ostream=ostream) - - - if type(original)!=type("str"): - print_line(s_formatted_original_name+" Daily Sharpe Ratio",get_daily_sharpe(original, years), i_spacing=4, ostream=ostream) - - for bench_sym in benchmark: - print_line(bench_sym+" Daily Sharpe Ratio",get_daily_sharpe(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream) - - - print_years(years, ostream) - - print_line(s_formatted_fund_name+" Daily Sortino Ratio",get_daily_sortino(fund_ts, years), i_spacing=4, ostream=ostream) - - if type(original)!=type("str"): - print_line(s_formatted_original_name+" Daily Sortino Ratio",get_daily_sortino(original, years), i_spacing=4, ostream=ostream) - - - for bench_sym in benchmark: - print_line(bench_sym+" Daily Sortino Ratio",get_daily_sortino(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream) - - - ostream.write("\n\n\nCorrelation and Beta with DJ Industries for the Fund ") - - print_industry_coer(fund_ts,ostream) - - ostream.write("\n\nCorrelation and Beta with Other Indices for the Fund ") - - print_other_coer(fund_ts,ostream) - - ostream.write("\n\n\nMonthly Returns for the Fund %\n") - - print_monthly_returns(fund_ts, years, ostream) - print_footer(ostream) - -def print_bar_chart(llf_vals, ls_fund_labels, ls_year_labels, s_filename): - llf_vals=((1,2,3),(3,2,1),(2,2,2)) - amin=min(min(llf_vals)) - print amin - min_lim=0 - if amin<0: - min_lim = amin - ls_fund_labels=("Fund 1","Benchmark","Original") - ls_year_labels=("2000","2001","2002") - pyplot.clf() - ind = np.arange(len(ls_year_labels)) - ind=ind*2 - width = 0.35 - fig = pyplot.figure() - ax = fig.add_subplot(111) - colors=('r','g','b') - rects=[] - for i in range(0,len(llf_vals)): - rects.append( ax.bar(ind+width*i, llf_vals[i], width, color=colors[i])) - ax.set_ylabel('Annual Return') - ax.set_ylim(min_lim, 5) - ax.set_title('Annual Return by Fund and Year') - ax.set_xticks(ind+width*len(llf_vals)/2) - ax.set_xticklabels(ls_year_labels) - plots=[] - for i in range(0,len(llf_vals)): - plots.append(rects[i][0]) - ax.legend(plots,ls_fund_labels) - - def autolabel(rects): - # attach some text labels - for rect in rects: - height = rect.get_height() - ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height), - ha='center', va='bottom') - for i in range(0,len(llf_vals)): - autolabel(rects[i]) - savefig(s_filename, format = 'png') - -def print_plot(fund, benchmark, graph_name, filename, s_original_name="", lf_dividend_rets=0.0, leverage=False, i_start_cash = 1000000, s_leverage_name="Leverage"): - """ - @summary prints a plot of a provided fund and benchmark - @param fund: fund value in pandas timeseries - @param benchmark: benchmark symbol to compare fund to - @param graph_name: name to associate with the fund in the report - @param filename: file location to store plot1 - """ - pyplot.clf() - fig = pyplot.figure() - from matplotlib.font_manager import FontProperties - fontP = FontProperties() - fontP.set_size('small') - - if type(leverage)==type(False): - ax = pyplot.subplot(111) - else: - gs = gridspec.GridSpec(2, 1, height_ratios=[4, 1]) - ax = pyplot.subplot(gs[0]) - - start_date = 0 - end_date = 0 - if(type(fund)!= type(list())): - if(start_date == 0 or start_date>fund.index[0]): - start_date = fund.index[0] - if(end_date == 0 or end_dateentity[j].index[0]): - start_date = entity[j].index[0] - if(end_date == 0 or end_dateentity.index[0]): - start_date = entity.index[0] - if(end_date == 0 or end_date\n") - html_file.write("
\n\n") - i = 0 - pyplot.clf() - #load spx for time frame - symbol = ["$SPX"] - start_date = 0 - end_date = 0 - for fund in funds_list: - if(type(fund)!= type(list())): - if(start_date == 0 or start_date>fund.index[0]): - start_date = fund.index[0] - if(end_date == 0 or end_datefund[0].index[0]): - start_date = fund[0].index[0] - if(end_date == 0 or end_dateQSTK Generated Report:" + out_file + "\n") - # html_file.write("\n") - html_file.write("\n") - html_file.write("
\n\n") - print_stats(fund_matrix, "robust funds", html_file) - print_footer(html_file) - -if __name__ == '__main__': - # Usage - # - # Normal: - # python report.py 'out.pkl' ['out2.pkl' ...] - # - # Robust: - # python report.py -r 'out.pkl' - # - - ROBUST = 0 - - if(sys.argv[1] == '-r'): - ROBUST = 1 - - FILENAME = "report.html" - - if(ROBUST == 1): - ANINPUT = open(sys.argv[2],"r") - FUNDS = cPickle.load(ANINPUT) - generate_robust_report(FUNDS, FILENAME) - else: - FILES = sys.argv - FILES.remove(FILES[0]) - FUNDS = [] - for AFILE in FILES: - ANINPUT = open(AFILE,"r") - FUND = cPickle.load(ANINPUT) - FUNDS.append(FUND) - generate_report(FUNDS, FILES, FILENAME) - - +''' +(c) 2011, 2012 Georgia Tech Research Corporation +This source code is released under the New BSD license. Please see +http://wiki.quantsoftware.org/index.php?title=QSTK_License +for license details. + +Created on Jan 1, 2011 + +@author:Drew Bratcher +@contact: dbratcher@gatech.edu +@summary: Contains tutorial for backtester and report. + +''' + +from os import path, makedirs +from os import sys +from QSTK.qstkutil import DataAccess as de +from QSTK.qstkutil import qsdateutil as du +from QSTK.qstkutil import tsutil as tsu +from QSTK.qstkutil import fundutil as fu +from dateutil.relativedelta import relativedelta +import numpy as np +from math import log10 +import locale +from pylab import savefig +from matplotlib import pyplot +from matplotlib import gridspec +import matplotlib.dates as mdates +import pickle +import datetime as dt +import pandas +import numpy as np +from copy import deepcopy +import scipy.stats as scst + +def _dividend_rets_funds(df_funds, f_dividend_rets): + + df_funds_copy = deepcopy(df_funds) + f_price = deepcopy(df_funds_copy[0]) + + df_funds_copy.values[1:] = (df_funds_copy.values[1:]/df_funds_copy.values[0:-1]) + df_funds_copy.values[0] = 1 + + df_funds_copy = df_funds_copy + f_dividend_rets + + na_funds_copy = np.cumprod(df_funds_copy.values) + na_funds_copy = na_funds_copy*f_price + + df_funds = pandas.Series(na_funds_copy, index = df_funds_copy.index) + + return df_funds + +def print_header(html_file, name): + """ + @summary prints header of report html file + """ + html_file.write("\n") + html_file.write("\n") + html_file.write("QSTK Generated Report:" + name + "\n") + html_file.write("\n\n") + html_file.write("\n\n") + +def print_footer(html_file): + """ + @summary prints footer of report html file + """ + html_file.write("\n\n") + html_file.write("") + +def get_annual_return(fund_ts, years): + """ + @summary prints annual return for given fund and years to the given stream + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + lf_ret=[] + for year in years: + year_vals = [] + for date in fund_ts.index: + if(date.year ==year): + year_vals.append([fund_ts.ix[date]]) + day_rets = tsu.daily1(year_vals) + ret = tsu.get_ror_annual(day_rets) + ret=float(ret) + lf_ret.append(ret*100) #" %+8.2f%%" % (ret*100) + return lf_ret + +def get_winning_days(fund_ts, years): + """ + @summary prints winning days for given fund and years to the given stream + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + s_ret="" + for year in years: + year_vals = [] + for date in fund_ts.index: + if(date.year==year): + year_vals.append([fund_ts.ix[date]]) + ret = fu.get_winning_days(year_vals) + s_ret+=" % + 8.2f%%" % ret + return s_ret + +def get_max_draw_down(fund_ts, years): + """ + @summary prints max draw down for given fund and years to the given stream + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + s_ret="" + for year in years: + year_vals = [] + for date in fund_ts.index: + if(date.year==year): + year_vals.append(fund_ts.ix[date]) + ret = fu.get_max_draw_down(year_vals) + s_ret+=" % + 8.2f%%" % (ret*100) + return s_ret + +def get_daily_sharpe(fund_ts, years): + """ + @summary prints sharpe ratio for given fund and years to the given stream + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + s_ret="" + for year in years: + year_vals = [] + for date in fund_ts.index: + if(date.year==year): + year_vals.append([fund_ts.ix[date]]) + ret = fu.get_sharpe_ratio(year_vals) + s_ret+=" % + 8.2f " % ret + return s_ret + +def get_daily_sortino(fund_ts, years): + """ + @summary prints sortino ratio for given fund and years to the given stream + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + s_ret="" + for year in years: + year_vals = [] + for date in fund_ts.index: + if(date.year==year): + year_vals.append([fund_ts.ix[date]]) + ret = fu.get_sortino_ratio(year_vals) + s_ret+=" % + 8.2f " % ret + return s_ret + +def get_std_dev(fund_ts): + """ + @summary gets standard deviation of returns for a fund as a string + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + fund_ts=fund_ts.fillna(method='pad') + fund_ts=fund_ts.fillna(method='bfill') + ret=np.std(tsu.daily(fund_ts.values))*10000 + return ("%+7.2f bps " % ret) + + +def ks_statistic(fund_ts): + fund_ts = deepcopy(fund_ts) + if len(fund_ts.values) > 60: + seq1 = fund_ts.values[0:-60] + seq2 = fund_ts.values[-60:] + tsu.returnize0(seq1) + tsu.returnize0(seq2) + (ks, p) = scst.ks_2samp(seq1, seq2) + return ks, p + # elif len(fund_ts.values) > 5: + # seq1 = fund_ts.values[0:-5] + # seq2 = fund_ts.values[-5:] + # (ks, p) = scst.ks_2samp(seq1, seq2) + # return ks, p + + ks = -1 + p = -1 + return ks, p + +def ks_statistic_calc(fund_ts_past, fund_ts_month): + try: + seq1 = deepcopy(fund_ts_past.values) + seq2 = deepcopy(fund_ts_month.values) + tsu.returnize0(seq1) + tsu.returnize0(seq2) + (ks, p) = scst.ks_2samp(seq1, seq2) + return ks, p + except: + return -1,-1 + +def print_industry_coer(fund_ts, ostream): + """ + @summary prints standard deviation of returns for a fund + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + industries = [['$DJUSBM', 'Materials'], + ['$DJUSNC', 'Goods'], + ['$DJUSCY', 'Services'], + ['$DJUSFN', 'Financials'], + ['$DJUSHC', 'Health'], + ['$DJUSIN', 'Industrial'], + ['$DJUSEN', 'Oil & Gas'], + ['$DJUSTC', 'Technology'], + ['$DJUSTL', 'TeleComm'], + ['$DJUSUT', 'Utilities']] + for i in range(0, len(industries) ): + if(i%2==0): + ostream.write("\n") + #load data + norObj = de.DataAccess('Yahoo') + ldtTimestamps = du.getNYSEdays( fund_ts.index[0], fund_ts.index[-1], dt.timedelta(hours=16) ) + ldfData = norObj.get_data( ldtTimestamps, [industries[i][0]], ['close'] ) + #get corelation + ldfData[0]=ldfData[0].fillna(method='pad') + ldfData[0]=ldfData[0].fillna(method='bfill') + a=np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])),np.ravel(tsu.daily(fund_ts.values))) + b=np.ravel(tsu.daily(ldfData[0][industries[i][0]])) + f=np.ravel(tsu.daily(fund_ts)) + fBeta, unused = np.polyfit(b,f,1) + ostream.write("%10s(%s):%+6.2f, %+6.2f " % (industries[i][1], industries[i][0], a[0,1], fBeta)) + +def print_other_coer(fund_ts, ostream): + """ + @summary prints standard deviation of returns for a fund + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + industries = [['$SPX', ' S&P Index'], + ['$DJI', ' Dow Jones'], + ['$DJUSEN', 'Oil & Gas'], + ['$DJGSP', ' Metals']] + for i in range(0, len(industries) ): + if(i%2==0): + ostream.write("\n") + #load data + norObj =de.DataAccess('Yahoo') + ldtTimestamps = du.getNYSEdays( fund_ts.index[0], fund_ts.index[-1], dt.timedelta(hours=16) ) + ldfData = norObj.get_data( ldtTimestamps, [industries[i][0]], ['close'] ) + #get corelation + ldfData[0]=ldfData[0].fillna(method='pad') + ldfData[0]=ldfData[0].fillna(method='bfill') + a=np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])),np.ravel(tsu.daily(fund_ts.values))) + b=np.ravel(tsu.daily(ldfData[0][industries[i][0]])) + f=np.ravel(tsu.daily(fund_ts)) + fBeta, unused = np.polyfit(b,f,1) + ostream.write("%10s(%s):%+6.2f, %+6.2f " % (industries[i][1], industries[i][0], a[0,1], fBeta)) + + +def print_benchmark_coer(fund_ts, benchmark_close, sym, ostream): + """ + @summary prints standard deviation of returns for a fund + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + fund_ts=fund_ts.fillna(method='pad') + fund_ts=fund_ts.fillna(method='bfill') + benchmark_close=benchmark_close.fillna(method='pad') + benchmark_close=benchmark_close.fillna(method='bfill') + faCorr=np.corrcoef(np.ravel(tsu.daily(fund_ts.values)),np.ravel(tsu.daily(benchmark_close))); + b=np.ravel(tsu.daily(benchmark_close)) + f=np.ravel(tsu.daily(fund_ts)) + fBeta, unused = np.polyfit(b,f, 1); + print_line(sym+"Correlattion","%+6.2f" % faCorr[0,1],i_spacing=3,ostream=ostream) + print_line(sym+"Beta","%+6.2f" % fBeta,i_spacing=3,ostream=ostream) + +def print_monthly_returns(fund_ts, years, ostream): + """ + @summary prints monthly returns for given fund and years to the given stream + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + ostream.write(" ") + month_names = du.getMonthNames() + for name in month_names: + ostream.write(" " + str(name)) + ostream.write("\n") + i = 0 + mrets = tsu.monthly(fund_ts) + for year in years: + ostream.write(str(year)) + months = du.getMonths(fund_ts, year) + for k in range(1, months[0]): + ostream.write(" ") + for month in months: + ostream.write(" % + 6.2f" % (mrets[i]*100)) + i += 1 + ostream.write("\n") + + + +def print_monthly_turnover(fund_ts, years, ts_turnover, ostream): + """ + @summary prints monthly returns for given fund and years to the given stream + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + ostream.write(" ") + month_names = du.getMonthNames() + for name in month_names: + ostream.write(" " + str(name)) + ostream.write("\n") + i = 0 + # mrets = tsu.monthly(fund_ts) + for year in years: + ostream.write(str(year)) + months = du.getMonths(ts_turnover, year) + if months != []: + for k in range(1, months[0]): + ostream.write(" ") + for month in months: + ostream.write(" % + 6.2f" % (ts_turnover[i]*100)) + i += 1 + ostream.write("\n") + +def print_monthly_ks(fund_ts, years, ostream): + """ + @summary prints monthly returns for given fund and years to the given stream + @param fund_ts: pandas fund time series + @param years: list of years to print out + @param ostream: stream to print to + """ + ostream.write(" ") + month_names = du.getMonthNames() + for name in month_names: + ostream.write(" " + str(name)) + ostream.write("\n") + + # mrets = tsu.monthly(fund_ts) + m_str = [] + + for i, year in enumerate(years): + months = du.getMonths(fund_ts, year) + for j, month in enumerate(months): + if i == 0 and j < 3: + m_str.append(' ') + else: + # dt_st = max(fund_ts.index[0], dt.datetime(year, month, 1)-relativedelta(months=6)) + dt_st = fund_ts.index[0] + dt_today = dt.datetime(year, month, 1) - relativedelta(months=2) + dt_end = min(dt.datetime(year, month, 1) + relativedelta(months=1) + dt.timedelta(hours=-5), fund_ts.index[-1]) + fund_ts_past = fund_ts.ix[dt_st: dt_today] + fund_ts_month = fund_ts.ix[dt_today: dt_end] + ks, p = ks_statistic_calc(fund_ts_past, fund_ts_month) + if not(ks == -1 or p == -1): + if ks < p: + m_str.append('PASS') + else: + m_str.append('FAIL') + else: + m_str.append(' ') + + i = 0 + for year in years: + ostream.write(str(year)) + months = du.getMonths(fund_ts, year) + for k in range(1, months[0]): + ostream.write(" ") + for month in months: + ostream.write("%7s" % (m_str[i])) + i = i + 1 + ostream.write("\n") + + +def print_years(years, ostream): + ostream.write("\n") + s_line="" + s_line2="" + for f_token in years: + s_line+="%9d " % f_token + s_line2+="%10s" % '------' + + ostream.write("%35s %s%30s\n" % (" ", " "*4, s_line)) + ostream.write("%35s %s%30s\n" % (" ", " "*4, s_line2)) + + +def print_line(s_left_side, s_right_side, i_spacing=0, ostream="stdout"): + ostream.write("%35s:%s%30s\n" % (s_left_side, " "*i_spacing, s_right_side)) + +def print_stats(fund_ts, benchmark, name, lf_dividend_rets=0.0, original="",s_fund_name="Fund", + s_original_name="Original", d_trading_params="", d_hedge_params="", s_comments="", directory = False, + leverage = False, s_leverage_name="Leverage", commissions = 0, slippage = 0, borrowcost = 0, ostream = sys.stdout, + i_start_cash=1000000, ts_turnover="False"): + """ + @summary prints stats of a provided fund and benchmark + @param fund_ts: fund value in pandas timeseries + @param benchmark: benchmark symbol to compare fund to + @param name: name to associate with the fund in the report + @param directory: parameter to specify printing to a directory + @param leverage: time series to plot with report + @param commissions: value to print with report + @param slippage: value to print with report + @param ostream: stream to print stats to, defaults to stdout + """ + + #Set locale for currency conversions + locale.setlocale(locale.LC_ALL, '') + + if original != "" and type(original) != type([]): + original = [original] + if type(s_original_name) != type([]): + s_original_name = [s_original_name] + + #make names length independent for alignment + s_formatted_original_name = [] + for name_temp in s_original_name: + s_formatted_original_name.append("%15s" % name_temp) + s_formatted_fund_name = "%15s" % s_fund_name + + fund_ts=fund_ts.fillna(method='pad') + fund_ts=fund_ts.fillna(method='bfill') + fund_ts=fund_ts.fillna(1.0) + if directory != False : + if not path.exists(directory): + makedirs(directory) + + sfile = path.join(directory, "report-%s.html" % name ) + splot = "plot-%s.png" % name + splot_dir = path.join(directory, splot) + ostream = open(sfile, "wb") + ostream.write("
")
+        print("writing to ", sfile)
+
+        if type(original)==type("str"):
+            if type(leverage)!=type(False):
+                print_plot(fund_ts, benchmark, name, splot_dir, lf_dividend_rets, leverage=leverage, i_start_cash = i_start_cash, s_leverage_name=s_leverage_name)
+            else:
+                print_plot(fund_ts, benchmark, name, splot_dir, lf_dividend_rets, i_start_cash = i_start_cash)
+        else:
+            if type(leverage)!=type(False):
+                print_plot([fund_ts, original], benchmark, name, splot_dir, s_original_name, lf_dividend_rets,
+                             leverage=leverage, i_start_cash = i_start_cash, s_leverage_name=s_leverage_name)
+            else:
+                print_plot([fund_ts, original], benchmark, name, splot_dir, s_original_name, lf_dividend_rets, i_start_cash = i_start_cash)
+
+    start_date = fund_ts.index[0].strftime("%m/%d/%Y")
+    end_date = fund_ts.index[-1].strftime("%m/%d/%Y")
+    ostream.write("Performance Summary for "\
+	 + str(path.basename(name)) + " Backtest\n")
+    ostream.write("For the dates " + str(start_date) + " to "\
+                                       + str(end_date) + "")
+
+    #paramater section
+    if d_trading_params!="":
+        ostream.write("\n\nTrading Paramaters\n\n")
+        for var in d_trading_params:
+            print_line(var, d_trading_params[var],ostream=ostream)
+    if d_hedge_params!="":
+        ostream.write("\nHedging Paramaters\n\n")
+        if type(d_hedge_params['Weight of Hedge']) == type(float):
+            d_hedge_params['Weight of Hedge'] = str(int(d_hedge_params['Weight of Hedge']*100)) + '%'
+        for var in d_hedge_params:
+            print_line(var, d_hedge_params[var],ostream=ostream)
+
+    #comment section
+    if s_comments!="":
+        ostream.write("\nComments\n\n%s" % s_comments)
+
+
+    if directory != False :
+        ostream.write("\n\n\n\n")
+
+    mult = i_start_cash/fund_ts.values[0]
+
+
+    timeofday = dt.timedelta(hours = 16)
+    timestamps = du.getNYSEdays(fund_ts.index[0], fund_ts.index[-1], timeofday)
+    dataobj =de.DataAccess('Yahoo')
+    years = du.getYears(fund_ts)
+    benchmark_close = dataobj.get_data(timestamps, benchmark, ["close"], \
+                                                     verbose = False)[0]
+    for bench_sym in benchmark:
+        benchmark_close[bench_sym]=benchmark_close[bench_sym].fillna(method='pad')
+        benchmark_close[bench_sym]=benchmark_close[bench_sym].fillna(method='bfill')
+        benchmark_close[bench_sym]=benchmark_close[bench_sym].fillna(1.0)
+
+    if type(lf_dividend_rets) != type(0.0):
+        for i,sym in enumerate(benchmark):
+            benchmark_close[sym] = _dividend_rets_funds(benchmark_close[sym], lf_dividend_rets[i])
+
+    ostream.write("Resulting Values in $ with an initial investment of "+ locale.currency(int(round(i_start_cash)), grouping=True) + "\n")
+
+    print_line(s_formatted_fund_name+" Resulting Value"," %15s, %10.2f%%" % (locale.currency(int(round(fund_ts.values[-1]*mult)), grouping=True), \
+                                                     float(100*((fund_ts.values[-1]/fund_ts.values[0])-1))), i_spacing=4, ostream=ostream)
+
+    # if type(original)!=type("str"):
+    #     mult3 = i_start_cash / original.values[0]
+    #     # print_line(s_formatted_original_name +" Resulting Value",(locale.currency(int(round(original.values[-1]*mult3)), grouping=True)),i_spacing=3, ostream=ostream)
+    #     print_line(s_formatted_original_name+" Resulting Value"," %15s, %10.2f%%" % (locale.currency(int(round(original.values[-1]*mult3)), grouping=True), \
+    #                                                  float(100*((original.values[-1]/original.values[0])-1))), i_spacing=4, ostream=ostream)
+
+    if type(original)!=type("str"):
+        for i in range(len(original)):
+            mult3 = i_start_cash / original[i].values[0]
+            # print_line(s_formatted_original_name +" Resulting Value",(locale.currency(int(round(original[i].values[-1]*mult3)), grouping=True)),i_spacing=3, ostream=ostream)
+            print_line(s_formatted_original_name[i]+" Resulting Value"," %15s, %10.2f%%" % (locale.currency(int(round(original[i].values[-1]*mult3)), grouping=True), \
+                                                     float(100*((original[i].values[-1]/original[i].values[0])-1))), i_spacing=4, ostream=ostream)
+
+    for bench_sym in benchmark:
+        mult2= i_start_cash / benchmark_close[bench_sym].values[0]
+        # print_line(bench_sym+" Resulting Value",locale.currency(int(round(benchmark_close[bench_sym].values[-1]*mult2)), grouping=True),i_spacing=3, ostream=ostream)
+        print_line(bench_sym+" Resulting Value"," %15s, %10.2f%%" % (locale.currency(int(round(benchmark_close[bench_sym].values[-1]*mult2)), grouping=True), \
+                                                     float(100*((benchmark_close[bench_sym].values[-1]/benchmark_close[bench_sym].values[0])-1))), i_spacing=4, ostream=ostream)
+
+    ostream.write("\n")
+
+    # if len(years) > 1:
+    print_line(s_formatted_fund_name+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(fund_ts.values)[0],i_spacing=4, ostream=ostream)
+    if type(original)!=type("str"):
+        for i in range(len(original)):
+            print_line(s_formatted_original_name[i]+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(original[i].values)[0],i_spacing=4, ostream=ostream)
+
+    for bench_sym in benchmark:
+        print_line(bench_sym+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(benchmark_close[bench_sym].values)[0],i_spacing=4,ostream=ostream)
+    ostream.write("\n")
+
+
+    # KS - Similarity
+    # ks, p = ks_statistic(fund_ts);
+    # if ks!= -1 and p!= -1:
+    #     if ks < p:
+    #         ostream.write("\nThe last three month's returns are consistent with previous performance (KS = %2.5f, p = %2.5f) \n\n"% (ks, p))
+    #     else:
+    #         ostream.write("\nThe last three month's returns are NOT CONSISTENT with previous performance (KS = %2.5f, p = %2.5f) \n\n"% (ks, p))
+
+
+    ostream.write("Transaction Costs\n")
+    print_line("Total Commissions"," %15s, %10.2f%%" % (locale.currency(int(round(commissions)), grouping=True), \
+                                                  float((round(commissions)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream)
+
+    print_line("Total Slippage"," %15s, %10.2f%%" % (locale.currency(int(round(slippage)), grouping=True), \
+                                                     float((round(slippage)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream)
+
+    print_line("Total Short Borrowing Cost"," %15s, %10.2f%%" % (locale.currency(int(round(borrowcost)), grouping=True), \
+                                                     float((round(borrowcost)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream)
+
+    print_line("Total Costs"," %15s, %10.2f%%" % (locale.currency(int(round(borrowcost+slippage+commissions)), grouping=True), \
+                                  float((round(borrowcost+slippage+commissions)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream)
+
+    ostream.write("\n")
+
+    print_line(s_formatted_fund_name+" Std Dev of Returns",get_std_dev(fund_ts),i_spacing=8, ostream=ostream)
+
+    if type(original)!=type("str"):
+        for i in range(len(original)):
+            print_line(s_formatted_original_name[i]+" Std Dev of Returns", get_std_dev(original[i]), i_spacing=8, ostream=ostream)
+
+    for bench_sym in benchmark:
+        print_line(bench_sym+" Std Dev of Returns", get_std_dev(benchmark_close[bench_sym]), i_spacing=8, ostream=ostream)
+
+    ostream.write("\n")
+
+
+    for bench_sym in benchmark:
+        print_benchmark_coer(fund_ts, benchmark_close[bench_sym], str(bench_sym), ostream)
+    ostream.write("\n")
+
+    ostream.write("\nYearly Performance Metrics")
+    print_years(years, ostream)
+
+
+    s_line=""
+    for f_token in get_annual_return(fund_ts, years):
+        s_line+=" %+8.2f%%" % f_token
+    print_line(s_formatted_fund_name+" Annualized Return",s_line, i_spacing=4, ostream=ostream)
+
+
+    if type(original)!=type("str"):
+        for i in range(len(original)):
+            s_line=""
+            for f_token in get_annual_return(original[i], years):
+                s_line+=" %+8.2f%%" % f_token
+            print_line(s_formatted_original_name[i]+" Annualized Return", s_line, i_spacing=4, ostream=ostream)
+
+    for bench_sym in benchmark:
+        s_line=""
+        for f_token in get_annual_return(benchmark_close[bench_sym], years):
+            s_line+=" %+8.2f%%" % f_token
+        print_line(bench_sym+" Annualized Return", s_line, i_spacing=4, ostream=ostream)
+
+    print_years(years, ostream)
+
+    print_line(s_formatted_fund_name+" Winning Days",get_winning_days(fund_ts, years), i_spacing=4, ostream=ostream)
+
+
+    if type(original)!=type("str"):
+        for i in range(len(original)):
+            print_line(s_formatted_original_name[i]+" Winning Days",get_winning_days(original[i], years), i_spacing=4, ostream=ostream)
+
+
+    for bench_sym in benchmark:
+        print_line(bench_sym+" Winning Days",get_winning_days(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream)
+
+
+    print_years(years, ostream)
+
+    print_line(s_formatted_fund_name+" Max Draw Down",get_max_draw_down(fund_ts, years), i_spacing=4, ostream=ostream)
+
+    if type(original)!=type("str"):
+        for i in range(len(original)):
+            print_line(s_formatted_original_name[i]+" Max Draw Down",get_max_draw_down(original[i], years), i_spacing=4, ostream=ostream)
+
+
+    for bench_sym in benchmark:
+        print_line(bench_sym+" Max Draw Down",get_max_draw_down(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream)
+
+
+    print_years(years, ostream)
+
+
+    print_line(s_formatted_fund_name+" Daily Sharpe Ratio",get_daily_sharpe(fund_ts, years), i_spacing=4, ostream=ostream)
+
+
+    if type(original)!=type("str"):
+        for i in range(len(original)):
+            print_line(s_formatted_original_name[i]+" Daily Sharpe Ratio",get_daily_sharpe(original[i], years), i_spacing=4, ostream=ostream)
+
+    for bench_sym in benchmark:
+        print_line(bench_sym+" Daily Sharpe Ratio",get_daily_sharpe(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream)
+
+
+    print_years(years, ostream)
+
+    print_line(s_formatted_fund_name+" Daily Sortino Ratio",get_daily_sortino(fund_ts, years), i_spacing=4, ostream=ostream)
+
+    if type(original)!=type("str"):
+        for i in range(len(original)):
+            print_line(s_formatted_original_name[i]+" Daily Sortino Ratio",get_daily_sortino(original[i], years), i_spacing=4, ostream=ostream)
+
+
+    for bench_sym in benchmark:
+        print_line(bench_sym+" Daily Sortino Ratio",get_daily_sortino(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream)
+
+
+    ostream.write("\n\n\nCorrelation and Beta with DJ Industries for the Fund ")
+
+    print_industry_coer(fund_ts,ostream)
+
+    ostream.write("\n\nCorrelation and Beta with Other Indices for the Fund ")
+
+    print_other_coer(fund_ts,ostream)
+
+    ostream.write("\n\n\nMonthly Returns for the Fund %\n")
+
+    print_monthly_returns(fund_ts, years, ostream)
+
+    if type(ts_turnover) != type("False"):
+        ostream.write("\n\nMonthly Turnover for the fund\n")
+        print_monthly_turnover(fund_ts, years, ts_turnover, ostream)
+
+    ostream.write("\n\n3 Month Kolmogorov-Smirnov 2-Sample Similarity Test\n")
+
+    print_monthly_ks(fund_ts, years, ostream)
+
+    ks, p = ks_statistic(fund_ts);
+    if ks!= -1 and p!= -1:
+        ostream.write("\nResults for the Similarity Test over last 3 months : (KS = %2.5f, p = %2.5f) \n\n"% (ks, p))
+
+    if directory != False:
+        ostream.write("
") + + +def print_html(fund_ts, benchmark, name, lf_dividend_rets=0.0, original="", + s_fund_name="Fund", s_original_name="Original", d_trading_params="", d_hedge_params="", + s_comments="", directory=False, leverage=False, s_leverage_name="Leverage",commissions=0, slippage=0, + borrowcost=0, ostream=sys.stdout, i_start_cash=1000000): + """ + @summary prints stats of a provided fund and benchmark + @param fund_ts: fund value in pandas timeseries + @param benchmark: benchmark symbol to compare fund to + @param name: name to associate with the fund in the report + @param directory: parameter to specify printing to a directory + @param leverage: time series to plot with report + @param commissions: value to print with report + @param slippage: value to print with report + @param ostream: stream to print stats to, defaults to stdout + """ + + #Set locale for currency conversions + locale.setlocale(locale.LC_ALL, '') + + #make names length independent for alignment + s_formatted_original_name="%15s" % s_original_name + s_formatted_fund_name = "%15s" % s_fund_name + + fund_ts=fund_ts.fillna(method='pad') + if directory != False : + if not path.exists(directory): + makedirs(directory) + + sfile = path.join(directory, "report-%s.html" % name ) + splot = "plot-%s.png" % name + splot_dir = path.join(directory, splot) + ostream = open(sfile, "wb") + print("writing to ", sfile) + + if type(original)==type("str"): + if type(leverage)!=type(False): + print_plot(fund_ts, benchmark, name, splot_dir, lf_dividend_rets, leverage=leverage, i_start_cash = i_start_cash, s_leverage_name=s_leverage_name) + else: + print_plot(fund_ts, benchmark, name, splot_dir, lf_dividend_rets, i_start_cash = i_start_cash) + else: + if type(leverage)!=type(False): + print_plot([fund_ts, original], benchmark, name, splot_dir, s_original_name, lf_dividend_rets, leverage=leverage, i_start_cash = i_start_cash, s_leverage_name=s_leverage_name) + else: + print_plot([fund_ts, original], benchmark, name, splot_dir, s_original_name, lf_dividend_rets, i_start_cash = i_start_cash) + + print_header(ostream,name) + start_date = fund_ts.index[0].strftime("%m/%d/%Y") + end_date = fund_ts.index[-1].strftime("%m/%d/%Y") + ostream.write("Performance Summary for "\ + + str(path.basename(name)) + " Backtest\n") + ostream.write("For the dates " + str(start_date) + " to "\ + + str(end_date) + "") + + #paramater section + if d_trading_params!="": + ostream.write("\n\nTrading Paramaters\n\n") + for var in d_trading_params: + print_line(var, d_trading_params[var],ostream=ostream) + if d_hedge_params!="": + ostream.write("\nHedging Paramaters\n\n") + if type(d_hedge_params['Weight of Hedge']) == type(float): + d_hedge_params['Weight of Hedge'] = str(int(d_hedge_params['Weight of Hedge']*100)) + '%' + for var in d_hedge_params: + print_line(var, d_hedge_params[var],ostream=ostream) + + #comment section + if s_comments!="": + ostream.write("\nComments\n\n%s" % s_comments) + + + if directory != False : + ostream.write("\n\n\n\n") + + mult = i_start_cash/fund_ts.values[0] + + + timeofday = dt.timedelta(hours = 16) + timestamps = du.getNYSEdays(fund_ts.index[0], fund_ts.index[-1], timeofday) + dataobj =de.DataAccess('Yahoo') + years = du.getYears(fund_ts) + benchmark_close = dataobj.get_data(timestamps, benchmark, ["close"]) + benchmark_close=benchmark_close[0] + for bench_sym in benchmark: + benchmark_close[bench_sym]=benchmark_close[bench_sym].fillna(method='pad') + + if type(lf_dividend_rets) != type(0.0): + for i,sym in enumerate(benchmark): + benchmark_close[sym] = _dividend_rets_funds(benchmark_close[sym], lf_dividend_rets[i]) + + ostream.write("Resulting Values in $ with an initial investment of "+ locale.currency(int(round(i_start_cash)), grouping=True) + "\n") + + print_line(s_formatted_fund_name+" Resulting Value",(locale.currency(int(round(fund_ts.values[-1]*mult)), grouping=True)),i_spacing=3, ostream=ostream) + + if type(original)!=type("str"): + mult3 = i_start_cash / original.values[0] + print_line(s_formatted_original_name +" Resulting Value",(locale.currency(int(round(original.values[-1]*mult3)), grouping=True)),i_spacing=3, ostream=ostream) + + for bench_sym in benchmark: + mult2=i_start_cash/benchmark_close[bench_sym].values[0] + print_line(bench_sym+" Resulting Value",locale.currency(int(round(benchmark_close[bench_sym].values[-1]*mult2)), grouping=True),i_spacing=3, ostream=ostream) + + ostream.write("\n") + + if len(years) > 1: + print_line(s_formatted_fund_name+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(fund_ts.values)[0],i_spacing=4, ostream=ostream) + if type(original)!=type("str"): + print_line(s_formatted_original_name+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(original.values)[0],i_spacing=4, ostream=ostream) + + for bench_sym in benchmark: + print_line(bench_sym+" Sharpe Ratio","%10.3f" % fu.get_sharpe_ratio(benchmark_close[bench_sym].values)[0],i_spacing=4,ostream=ostream) + ostream.write("\n") + + ostream.write("Transaction Costs\n") + print_line("Total Commissions"," %15s, %10.2f%%" % (locale.currency(int(round(commissions)), grouping=True), \ + float((round(commissions)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream) + + print_line("Total Slippage"," %15s, %10.2f%%" % (locale.currency(int(round(slippage)), grouping=True), \ + float((round(slippage)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream) + + print_line("Total Short Borrowing Cost"," %15s, %10.2f%%" % (locale.currency(int(round(borrowcost)), grouping=True), \ + float((round(borrowcost)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream) + + print_line("Total Costs"," %15s, %10.2f%%" % (locale.currency(int(round(borrowcost+slippage+commissions)), grouping=True), \ + float((round(borrowcost+slippage+commissions)*100)/(fund_ts.values[-1]*mult))), i_spacing=4, ostream=ostream) + + ostream.write("\n") + + print_line(s_formatted_fund_name+" Std Dev of Returns",get_std_dev(fund_ts),i_spacing=8, ostream=ostream) + + if type(original)!=type("str"): + print_line(s_formatted_original_name+" Std Dev of Returns", get_std_dev(original), i_spacing=8, ostream=ostream) + + for bench_sym in benchmark: + print_line(bench_sym+" Std Dev of Returns", get_std_dev(benchmark_close[bench_sym]), i_spacing=8, ostream=ostream) + + ostream.write("\n") + + + for bench_sym in benchmark: + print_benchmark_coer(fund_ts, benchmark_close[bench_sym], str(bench_sym), ostream) + ostream.write("\n") + + ostream.write("\nYearly Performance Metrics") + print_years(years, ostream) + + s_line="" + for f_token in get_annual_return(fund_ts, years): + s_line+=" %+8.2f%%" % f_token + print_line(s_formatted_fund_name+" Annualized Return", s_line, i_spacing=4, ostream=ostream) + lf_vals=[get_annual_return(fund_ts, years)] + ls_labels=[name] + + if type(original)!=type("str"): + s_line="" + for f_token in get_annual_return(original, years): + s_line+=" %+8.2f%%" % f_token + print_line(s_formatted_original_name+" Annualized Return", s_line, i_spacing=4, ostream=ostream) + lf_vals.append(get_annual_return(original, years)) + ls_labels.append(s_original_name) + + for bench_sym in benchmark: + s_line="" + for f_token in get_annual_return(benchmark_close[bench_sym], years): + s_line+=" %+8.2f%%" % f_token + print_line(bench_sym+" Annualized Return", s_line, i_spacing=4, ostream=ostream) + lf_vals.append(get_annual_return(benchmark_close[bench_sym], years)) + ls_labels.append(bench_sym) + + print(lf_vals) + print(ls_labels) + ls_year_labels=[] + for i in range(0,len(years)): + ls_year_labels.append(str(years[i])) + print_bar_chart(lf_vals, ls_labels, ls_year_labels, directory+"/annual_rets.png") + + print_years(years, ostream) + + print_line(s_formatted_fund_name+" Winning Days",get_winning_days(fund_ts, years), i_spacing=4, ostream=ostream) + + + if type(original)!=type("str"): + print_line(s_formatted_original_name+" Winning Days",get_winning_days(original, years), i_spacing=4, ostream=ostream) + + + for bench_sym in benchmark: + print_line(bench_sym+" Winning Days",get_winning_days(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream) + + + print_years(years, ostream) + + print_line(s_formatted_fund_name+" Max Draw Down",get_max_draw_down(fund_ts, years), i_spacing=4, ostream=ostream) + + if type(original)!=type("str"): + print_line(s_formatted_original_name+" Max Draw Down",get_max_draw_down(original, years), i_spacing=4, ostream=ostream) + + + for bench_sym in benchmark: + print_line(bench_sym+" Max Draw Down",get_max_draw_down(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream) + + + print_years(years, ostream) + + + print_line(s_formatted_fund_name+" Daily Sharpe Ratio",get_daily_sharpe(fund_ts, years), i_spacing=4, ostream=ostream) + + + if type(original)!=type("str"): + print_line(s_formatted_original_name+" Daily Sharpe Ratio",get_daily_sharpe(original, years), i_spacing=4, ostream=ostream) + + for bench_sym in benchmark: + print_line(bench_sym+" Daily Sharpe Ratio",get_daily_sharpe(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream) + + + print_years(years, ostream) + + print_line(s_formatted_fund_name+" Daily Sortino Ratio",get_daily_sortino(fund_ts, years), i_spacing=4, ostream=ostream) + + if type(original)!=type("str"): + print_line(s_formatted_original_name+" Daily Sortino Ratio",get_daily_sortino(original, years), i_spacing=4, ostream=ostream) + + + for bench_sym in benchmark: + print_line(bench_sym+" Daily Sortino Ratio",get_daily_sortino(benchmark_close[bench_sym], years), i_spacing=4, ostream=ostream) + + + ostream.write("\n\n\nCorrelation and Beta with DJ Industries for the Fund ") + + print_industry_coer(fund_ts,ostream) + + ostream.write("\n\nCorrelation and Beta with Other Indices for the Fund ") + + print_other_coer(fund_ts,ostream) + + ostream.write("\n\n\nMonthly Returns for the Fund %\n") + + print_monthly_returns(fund_ts, years, ostream) + print_footer(ostream) + +def print_bar_chart(llf_vals, ls_fund_labels, ls_year_labels, s_filename): + llf_vals=((1,2,3),(3,2,1),(2,2,2)) + amin=min(min(llf_vals)) + print(amin) + min_lim=0 + if amin<0: + min_lim = amin + ls_fund_labels=("Fund 1","Benchmark","Original") + ls_year_labels=("2000","2001","2002") + pyplot.clf() + ind = np.arange(len(ls_year_labels)) + ind=ind*2 + width = 0.35 + fig = pyplot.figure() + ax = fig.add_subplot(111) + colors=('r','g','b') + rects=[] + for i in range(0,len(llf_vals)): + rects.append( ax.bar(ind+width*i, llf_vals[i], width, color=colors[i])) + ax.set_ylabel('Annual Return') + ax.set_ylim(min_lim, 5) + ax.set_title('Annual Return by Fund and Year') + ax.set_xticks(ind+width*len(llf_vals)/2) + ax.set_xticklabels(ls_year_labels) + plots=[] + for i in range(0,len(llf_vals)): + plots.append(rects[i][0]) + ax.legend(plots,ls_fund_labels) + + def autolabel(rects): + # attach some text labels + for rect in rects: + height = rect.get_height() + ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height), + ha='center', va='bottom') + for i in range(0,len(llf_vals)): + autolabel(rects[i]) + savefig(s_filename, format = 'png') + +def print_plot(fund, benchmark, graph_name, filename, s_original_name="", lf_dividend_rets=0.0, leverage=False, i_start_cash = 1000000, s_leverage_name="Leverage"): + """ + @summary prints a plot of a provided fund and benchmark + @param fund: fund value in pandas timeseries + @param benchmark: benchmark symbol to compare fund to + @param graph_name: name to associate with the fund in the report + @param filename: file location to store plot1 + """ + pyplot.clf() + fig = pyplot.figure() + from matplotlib.font_manager import FontProperties + fontP = FontProperties() + fontP.set_size('small') + + if type(leverage)==type(False): + ax = pyplot.subplot(111) + else: + gs = gridspec.GridSpec(2, 1, height_ratios=[4, 1]) + ax = pyplot.subplot(gs[0]) + + start_date = 0 + end_date = 0 + if(type(fund)!= type(list())): + if(start_date == 0 or start_date>fund.index[0]): + start_date = fund.index[0] + if(end_date == 0 or end_dateentity[j].index[0]): + start_date = entity[j].index[0] + if(end_date == 0 or end_dateentity.index[0]): + start_date = entity.index[0] + if(end_date == 0 or end_date\n") + html_file.write("
\n\n") + i = 0 + pyplot.clf() + #load spx for time frame + symbol = ["$SPX"] + start_date = 0 + end_date = 0 + for fund in funds_list: + if(type(fund)!= type(list())): + if(start_date == 0 or start_date>fund.index[0]): + start_date = fund.index[0] + if(end_date == 0 or end_datefund[0].index[0]): + start_date = fund[0].index[0] + if(end_date == 0 or end_dateQSTK Generated Report:" + out_file + "\n") + # html_file.write("\n") + html_file.write("\n") + html_file.write("
\n\n") + print_stats(fund_matrix, "robust funds", html_file) + print_footer(html_file) + +if __name__ == '__main__': + # Usage + # + # Normal: + # python report.py 'out.pkl' ['out2.pkl' ...] + # + # Robust: + # python report.py -r 'out.pkl' + # + + ROBUST = 0 + + if(sys.argv[1] == '-r'): + ROBUST = 1 + + FILENAME = "report.html" + + if(ROBUST == 1): + ANINPUT = open(sys.argv[2],"r") + FUNDS = pickle.load(ANINPUT) + generate_robust_report(FUNDS, FILENAME) + else: + FILES = sys.argv + FILES.remove(FILES[0]) + FUNDS = [] + for AFILE in FILES: + ANINPUT = open(AFILE,"r") + FUND = pickle.load(ANINPUT) + FUNDS.append(FUND) + generate_report(FUNDS, FILES, FILENAME) + + diff --git a/QSTK/qstkutil/DataAccess.py b/QSTK/qstkutil/DataAccess.py index 9e466767a..95c9ddab6 100644 --- a/QSTK/qstkutil/DataAccess.py +++ b/QSTK/qstkutil/DataAccess.py @@ -20,9 +20,13 @@ import pickle as pkl import time import datetime as dt -import dircache +from QSTK.qstkutil.utils import cached_listdir import tempfile import copy +import hashlib + +def md5(str): + return hashlib.md5(str.encode('utf-8')).hexdigest() class Exchange (object): AMEX = 1 @@ -93,11 +97,11 @@ def __init__(self, sourcein=DataSource.YAHOO, s_datapath=None, self.scratchdir = os.path.join(tempfile.gettempdir(), 'QSScratch') if verbose: - print "Scratch Directory: ", self.scratchdir - print "Data Directory: ", self.rootdir + print("Scratch Directory: ", self.scratchdir) + print("Data Directory: ", self.rootdir) if not os.path.isdir(self.rootdir): - print "Data path provided is invalid" + print("Data path provided is invalid") raise if not os.path.exists(self.scratchdir): @@ -245,7 +249,7 @@ def get_data_hardread(self, ts_list, symbol_list, data_item, verbose=False, bInc if bIncDelist: lsDelPaths = self.getPathOfFile( symbol, True ) if file_path == None and len(lsDelPaths) > 0: - print 'Found delisted paths:', lsDelPaths + print('Found delisted paths:', lsDelPaths) ''' If we don't have a file path continue... unless we have delisted paths ''' if (type (file_path) != type ("random string")): @@ -253,10 +257,10 @@ def get_data_hardread(self, ts_list, symbol_list, data_item, verbose=False, bInc continue; #File not found if not file_path == None: - _file = open(file_path, "rb") + _file = open(file_path, "rt") except IOError: # If unable to read then continue. The value for this stock will be nan - print _file + print(_file) continue; assert( not _file == None or bIncDelist == True ) @@ -264,8 +268,8 @@ def get_data_hardread(self, ts_list, symbol_list, data_item, verbose=False, bInc if _file != None: if (self.source==DataSource.CUSTOM) or (self.source==DataSource.YAHOO)or (self.source==DataSource.MLT): creader = csv.reader(_file) - row=creader.next() - row=creader.next() + row=next(creader) + row=next(creader) #row.pop(0) for i, item in enumerate(row): if i==0: @@ -315,7 +319,7 @@ def get_data_hardread(self, ts_list, symbol_list, data_item, verbose=False, bInc #now remove all the columns except the timestamps and one data column if verbose: - print self.getPathOfFile(symbol) + print(self.getPathOfFile(symbol)) ''' Fix 1 row case by reshaping ''' if( naData.ndim == 1 ): @@ -337,7 +341,7 @@ def get_data_hardread(self, ts_list, symbol_list, data_item, verbose=False, bInc num_rows= temp_np.shape[0] - symbol_ts_list = range(num_rows) # preallocate + symbol_ts_list = list(range(num_rows)) # preallocate for i in range (0, num_rows): timebase = temp_np[i][0] @@ -435,20 +439,10 @@ def get_data (self, ts_list, symbol_list, data_item, verbose=False, bIncDelist=F ls_syms_copy = copy.deepcopy(symbol_list) - # Create the hash for the symbols - hashsyms = 0 - for i in symbol_list: - hashsyms = (hashsyms + hash(i)) % 10000000 - - # Create the hash for the timestamps - hashts = 0 - # print "test point 1: " + str(len(ts_list)) # spyfile=os.environ['QSDATA'] + '/Processed/Norgate/Stocks/US/NYSE Arca/SPY.pkl' - for i in ts_list: - hashts = (hashts + hash(i)) % 10000000 - hashstr = 'qstk-' + str (self.source)+'-' +str(abs(hashsyms)) + '-' + str(abs(hashts)) \ - + '-' + str(hash(str(data_item))) # + '-' + str(hash(str(os.path.getctime(spyfile)))) + hashstr = 'qstk-' + str (self.source)+'-' +md5(str(symbol_list)) + '-' + md5(str(ts_list)) \ + + '-' + md5(str(data_item)) # get the directory for scratch files from environment # try: @@ -460,7 +454,7 @@ def get_data (self, ts_list, symbol_list, data_item, verbose=False, bIncDelist=F # final complete filename cachefilename = self.scratchdir + '/' + hashstr + '.pkl' if verbose: - print "cachefilename is: " + cachefilename + print("cachefilename is: " + cachefilename) # now eather read the pkl file, or do a hardread readfile = False # indicate that we have not yet read the file @@ -476,7 +470,7 @@ def get_data (self, ts_list, symbol_list, data_item, verbose=False, bIncDelist=F if os.path.exists(cachefilename): if ((dt.datetime.now() - dt.datetime.fromtimestamp(os.path.getmtime(cachefilename))) < cachestall): if verbose: - print "cache hit" + print("cache hit") try: cachefile = open(cachefilename, "rb") start = time.time() # start timer @@ -486,36 +480,36 @@ def get_data (self, ts_list, symbol_list, data_item, verbose=False, bIncDelist=F cachefile.close() except IOError: if verbose: - print "error reading cache: " + cachefilename - print "recovering..." + print("error reading cache: " + cachefilename) + print("recovering...") except EOFError: if verbose: - print "error reading cache: " + cachefilename - print "recovering..." + print("error reading cache: " + cachefilename) + print("recovering...") if (readfile!=True): if verbose: - print "cache miss" - print "beginning hardread" + print("cache miss") + print("beginning hardread") start = time.time() # start timer if verbose: - print "data_item(s): " + str(data_item) - print "symbols to read: " + str(symbol_list) + print("data_item(s): " + str(data_item)) + print("symbols to read: " + str(symbol_list)) retval = self.get_data_hardread(ts_list, symbol_list, data_item, verbose, bIncDelist) elapsed = time.time() - start # end timer if verbose: - print "end hardread" - print "saving to cache" + print("end hardread") + print("saving to cache") try: cachefile = open(cachefilename,"wb") pkl.dump(retval, cachefile, -1) - os.chmod(cachefilename,0666) + os.chmod(cachefilename,0o666) except IOError: - print "error writing cache: " + cachefilename + print("error writing cache: " + cachefilename) if verbose: - print "end saving to cache" + print("end saving to cache") if verbose: - print "reading took " + str(elapsed) + " seconds" + print("reading took " + str(elapsed) + " seconds") if type(retval) == type([]): for i, df_single in enumerate(retval): @@ -548,14 +542,14 @@ def getPathOfFile(self, symbol_name, bDelisted=False): if re.search('Delisted Securities', sPath) == None: continue - for sFile in dircache.listdir(sPath): + for sFile in cached_listdir(sPath): if not re.match( '%s-\d*.pkl'%symbol_name, sFile ) == None: lsPaths.append(sPath + sFile) lsPaths.sort() return lsPaths - print "Did not find path to " + str(symbol_name) + ". Looks like this file is missing" + print("Did not find path to " + str(symbol_name) + ". Looks like this file is missing") def getPathOfCSVFile(self, symbol_name): @@ -565,7 +559,7 @@ def getPathOfCSVFile(self, symbol_name): return (str(str(path1)+str(symbol_name)+".csv")) #if ends #for ends - print "Did not find path to " + str (symbol_name)+". Looks like this file is missing" + print("Did not find path to " + str (symbol_name)+". Looks like this file is missing") def get_all_symbols (self): ''' @@ -582,11 +576,11 @@ def get_all_symbols (self): for path in self.folderList: stocksAtThisPath = list() #print str(path) - stocksAtThisPath = dircache.listdir(str(path)) + stocksAtThisPath = cached_listdir(str(path)) #Next, throw away everything that is not a .pkl And these are our stocks! - stocksAtThisPath = filter (lambda x:(str(x).find(str(self.fileExtensionToRemove)) > -1), stocksAtThisPath) + stocksAtThisPath = [x for x in stocksAtThisPath if (str(x).find(str(self.fileExtensionToRemove)) > -1)] #Now, we remove the .pkl to get the name of the stock - stocksAtThisPath = map(lambda x:(x.partition(str(self.fileExtensionToRemove))[0]),stocksAtThisPath) + stocksAtThisPath = [(x.partition(str(self.fileExtensionToRemove))[0]) for x in stocksAtThisPath] listOfStocks.extend(stocksAtThisPath) #for stock in stocksAtThisPath: @@ -641,15 +635,15 @@ def get_symbols_in_sublist (self, subdir): ''' pathtolook = self.rootdir + self.midPath + subdir - stocksAtThisPath = dircache.listdir(pathtolook) + stocksAtThisPath = cached_listdir(pathtolook) #Next, throw away everything that is not a .pkl And these are our stocks! try: - stocksAtThisPath = filter (lambda x:(str(x).find(str(self.fileExtensionToRemove)) > -1), stocksAtThisPath) + stocksAtThisPath = [x for x in stocksAtThisPath if (str(x).find(str(self.fileExtensionToRemove)) > -1)] #Now, we remove the .pkl to get the name of the stock - stocksAtThisPath = map(lambda x:(x.partition(str(self.fileExtensionToRemove))[0]),stocksAtThisPath) + stocksAtThisPath = [(x.partition(str(self.fileExtensionToRemove))[0]) for x in stocksAtThisPath] except: - print "error: no path to " + subdir + print("error: no path to " + subdir) stocksAtThisPath = list() return stocksAtThisPath @@ -671,7 +665,7 @@ def get_data_labels(self): ''' if (self.source != DataSource.COMPUSTAT): - print 'Function only valid for Compustat objects!' + print('Function only valid for Compustat objects!') return [] return DataItem.COMPUSTAT @@ -725,7 +719,7 @@ def get_info(self): else: retstr = "DataAccess internal error\n" - print retstr + print(retstr) return retstr #get_sublists @@ -737,18 +731,18 @@ def get_info(self): # Check if GOOG is a valid symbol. val = c_dataobj.check_symbol('GOOG') - print "Is GOOG a valid symbol? :" , val + print("Is GOOG a valid symbol? :" , val) # Check if QWERTY is a valid symbol. val = c_dataobj.check_symbol('QWERTY') - print "Is QWERTY a valid symbol? :" , val + print("Is QWERTY a valid symbol? :" , val) # Check if EBAY is part of SP5002012 list. val = c_dataobj.check_symbol('EBAY', s_list='sp5002012') - print "Is EBAY a valid symbol in SP5002012 list? :", val + print("Is EBAY a valid symbol in SP5002012 list? :", val) # Check if GLD is part of SP5002012 after checking if GLD is a valid symbol. val = c_dataobj.check_symbol('GLD') - print "Is GLD a valid symbol? : ", val + print("Is GLD a valid symbol? : ", val) val = c_dataobj.check_symbol('GLD', 'sp5002012') - print "Is GLD a valid symbol in sp5002012 list? :", val + print("Is GLD a valid symbol in sp5002012 list? :", val) diff --git a/QSTK/qstkutil/qsdateutil.py b/QSTK/qstkutil/qsdateutil.py index 14720188a..c5901bbf3 100644 --- a/QSTK/qstkutil/qsdateutil.py +++ b/QSTK/qstkutil/qsdateutil.py @@ -27,13 +27,13 @@ def _cache_dates(): # filename = os.environ['QS'] + "/qstkutil/NYSE_dates.txt" filename = os.path.join(os.path.dirname(__file__), 'NYSE_dates.txt') except KeyError: - print "Please be sure you have NYSE_dates.txt in the qstkutil directory" + print("Please be sure you have NYSE_dates.txt in the qstkutil directory") datestxt = np.loadtxt(filename, dtype=str) dates = [] for i in datestxt: dates.append(dt.datetime.strptime(i, "%m/%d/%Y")) - return pd.TimeSeries(index=dates, data=dates) + return pd.Series(index=dates, data=dates) GTS_DATES = _cache_dates() @@ -172,8 +172,8 @@ def getNextNNYSEdays(startday, days, timeofday): # filename = os.environ['QS'] + "/qstkutil/NYSE_dates.txt" filename = os.path.join(os.path.dirname(__file__), 'NYSE_dates.txt') except KeyError: - print "Please be sure to set the value for QS in config.sh or\n" - print "in local.sh and then \'source local.sh\'.\n" + print("Please be sure to set the value for QS in config.sh or\n") + print("in local.sh and then \'source local.sh\'.\n") datestxt = np.loadtxt(filename,dtype=str) dates=[] @@ -198,8 +198,8 @@ def getPrevNNYSEday(startday, timeofday): # filename = os.environ['QS'] + "/qstkutil/NYSE_dates.txt" filename = os.path.join(os.path.dirname(__file__), 'NYSE_dates.txt') except KeyError: - print "Please be sure to set the value for QS in config.sh or\n" - print "in local.sh and then \'source local.sh\'.\n" + print("Please be sure to set the value for QS in config.sh or\n") + print("in local.sh and then \'source local.sh\'.\n") datestxt = np.loadtxt(filename,dtype=str) @@ -254,7 +254,7 @@ def _trade_dates(dt_start, dt_end, s_period): # Note, dates are index as well as values, we select based on index # but return values since it is a numpy array of datetimes instead of # pandas specific. - ts_dates = pd.TimeSeries(index=ldt_timestamps, data=ldt_timestamps) + ts_dates = pd.Series(index=ldt_timestamps, data=ldt_timestamps) # These are the dates we want if s_period[:2] == 'BW': @@ -263,7 +263,7 @@ def _trade_dates(dt_start, dt_end, s_period): dr_range = pd.DateRange(dt_start, dt_end, timeRule=s_period[1:]) dr_range = np.asarray(dr_range) - li_even = np.array(range(len(dr_range))) + li_even = np.array(list(range(len(dr_range)))) dr_range = dr_range[li_even[li_even % 2 == 0]] else: dr_range = pd.DateRange(dt_start, dt_end, diff --git a/QSTK/qstkutil/tsutil.py b/QSTK/qstkutil/tsutil.py index cc093a548..c74dbc7e7 100644 --- a/QSTK/qstkutil/tsutil.py +++ b/QSTK/qstkutil/tsutil.py @@ -72,7 +72,7 @@ def monthly(funds): last_this_month = qsdateutil.getLastDay(funds, year, month) if last_last_month == -1 : last_last_month=qsdateutil.getFirstDay(funds, year, month) - if type(funds).__name__=='TimeSeries': + if type(funds).__name__=='Series': funds2.append(funds[last_this_month]/funds[last_last_month]-1) else: funds2.append(funds.xs(last_this_month)/funds.xs(last_last_month)-1) @@ -324,8 +324,8 @@ def getOptPort(rets, f_target, l_period=1, naLower=None, naUpper=None, lNagDebug pass import nagint as nag except ImportError: - print 'Could not import NAG library' - print 'make sure nagint.so is in your python path' + print('Could not import NAG library') + print('make sure nagint.so is in your python path') return ([], 0, 0) # Get number of stocks """ @@ -367,7 +367,7 @@ def getOptPort(rets, f_target, l_period=1, naLower=None, naUpper=None, lNagDebug naReturn = nag.optPort( naConstraints, naLower, naUpper, \ naCov, naInitial, lNagDebug ) except RuntimeError: - print 'NAG Runtime error with target: %.02lf'%(f_target) + print('NAG Runtime error with target: %.02lf'%(f_target)) return ( naInitial, sqrt( naCov[0][0] ) ) #return semi-junk to not mess up the rest of the plot @@ -402,7 +402,7 @@ def OptPort( naData, fTarget, naLower=None, naUpper=None, naExpected=None, s_typ from cvxopt.solvers import qp, options except ImportError: - print 'Could not import CVX library' + print('Could not import CVX library') raise ''' Get number of stocks ''' @@ -481,7 +481,7 @@ def OptPort( naData, fTarget, naLower=None, naUpper=None, naExpected=None, s_typ (fMin, fMax) = getRetRange(False, naLower, naUpper, naExpected, "long") #print (fTarget, fMin, fMax) if fTargetfMax: - print "Target not possible", fTarget, fMin, fMax + print("Target not possible", fTarget, fMin, fMax) b_error = True naLower = naLower*(-1) @@ -519,7 +519,7 @@ def OptPort( naData, fTarget, naLower=None, naUpper=None, naExpected=None, s_typ b_error = True if b_error == True: - print "Optimization not Possible" + print("Optimization not Possible") na_port = naLower*-1 if sum(na_port) < 1: if sum(naUpper) == 1: @@ -765,7 +765,7 @@ def stockFilter( dmPrice, dmVolume, fNonNan=0.95, fPriceVolume=100*1000 ): for sStock in dmPrice.columns: fValid = 0.0 - print sStock + print(sStock) # loop through all dates """ for dtDate in dmPrice.index: # Count null (nan/inf/etc) values """ @@ -830,7 +830,7 @@ def getRandPort( lNum, dtStart=None, dtEnd=None, lsStocks=None,\ lsRetStocks = [] # Loop until we have enough randomly selected stocks """ - llRemainingIndexes = range(0,len(lsStocks)) + llRemainingIndexes = list(range(0,len(lsStocks))) lsValid = None while( len(lsRetStocks) != lNum ): @@ -838,7 +838,7 @@ def getRandPort( lNum, dtStart=None, dtEnd=None, lsStocks=None,\ for i in range( lNum - len(lsRetStocks) ): lRemaining = len(llRemainingIndexes) if( lRemaining == 0 ): - print 'Error in getRandPort: ran out of stocks' + print('Error in getRandPort: ran out of stocks') return lsRetStocks # Pick a stock and remove it from the list of remaining stocks """ diff --git a/QSTK/qstkutil/utils.py b/QSTK/qstkutil/utils.py index 16e56cd4a..f8e9d5304 100644 --- a/QSTK/qstkutil/utils.py +++ b/QSTK/qstkutil/utils.py @@ -12,8 +12,12 @@ ''' -import dircache import os +from functools import lru_cache + +@lru_cache() +def cached_listdir(d): + return os.listdir(d) def clean_paths (paths_to_clean): ''' @@ -28,7 +32,7 @@ def clean_paths (paths_to_clean): for path in paths_to_clean: - files_at_this_path = dircache.listdir(str(path)) + files_at_this_path = cached_listdir(str(path)) for _file in files_at_this_path: if (os.path.isfile(path + _file)): os.remove(path + _file) diff --git a/README.md b/README.md index d932a892d..30322c749 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ QuantSoftware Toolkit [![PyPI version](https://badge.fury.io/py/QSTK.png)](http: - scipy >= 0.9.0, - matplotlib >= 1.1.0, - pandas >= 0.7.3, -- python-dateutil==1.5, +- python-dateutil>=2.0, - cvxopt >= 1.1.3, - scikit-learn >= 0.11 diff --git a/bin/DataGenerate_SineWave.py b/bin/DataGenerate_SineWave.py index f5ac178d4..f010cb68e 100644 --- a/bin/DataGenerate_SineWave.py +++ b/bin/DataGenerate_SineWave.py @@ -38,12 +38,12 @@ def write(ls_symbols, d_data, ldt_timestamps): def main(): - print "Creating Stock data from Sine Waves" + print("Creating Stock data from Sine Waves") dt_start = dt.datetime(2000, 1, 1) dt_end = dt.datetime(2012, 10, 31) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) - x = np.array(range(len(ldt_timestamps))) + x = np.array(list(range(len(ldt_timestamps)))) ls_symbols = ['SINE_FAST', 'SINE_SLOW', 'SINE_FAST_NOISE', 'SINE_SLOW_NOISE'] sine_fast = 10*np.sin(x/10.) + 100 @@ -52,7 +52,7 @@ def main(): sine_fast_noise = 10*(np.sin(x/10.) + np.random.randn(x.size)) + 100 sine_slow_noise = 10*(np.sin(x/30.) + np.random.randn(x.size)) + 100 - d_data = dict(zip(ls_symbols, [sine_fast, sine_slow, sine_fast_noise, sine_slow_noise])) + d_data = dict(list(zip(ls_symbols, [sine_fast, sine_slow, sine_fast_noise, sine_slow_noise]))) write(ls_symbols, d_data, ldt_timestamps) diff --git a/bin/Data_CSV.py b/bin/Data_CSV.py index abcfbe84c..2c08e2749 100644 --- a/bin/Data_CSV.py +++ b/bin/Data_CSV.py @@ -26,7 +26,7 @@ def get_data(ls_symbols, ls_keys): valid data on the start/enddates which requires lookback/forward @return: data dictionry ''' - print "Getting Data from MySQL" + print("Getting Data from MySQL") # Modify dates to ensure enough data for all features dt_start = dt.datetime(2005,1,1) dt_end = dt.datetime(2012, 8, 31) @@ -36,7 +36,7 @@ def get_data(ls_symbols, ls_keys): ldf_data = c_da.get_data(ldt_timestamps, ls_symbols, ls_keys) - d_data = dict(zip(ls_keys, ldf_data)) + d_data = dict(list(zip(ls_keys, ldf_data))) return d_data @@ -89,13 +89,13 @@ def main(s_directory, s_symbols_file): ls_keys = ['actual_open', 'actual_high', 'actual_low', 'actual_close', 'volume', 'close'] d_data = get_data(ls_symbols, ls_keys) # print d_data - print "Creating CSV files now" + print("Creating CSV files now") for sym in ls_symbols: - print sym + print(sym) csv_sym(sym,d_data, ls_keys, s_directory) - print "Created all CSV files" + print("Created all CSV files") if __name__ == '__main__' : diff --git a/bin/converter.py b/bin/converter.py index 96d516e49..a27270253 100644 --- a/bin/converter.py +++ b/bin/converter.py @@ -1,106 +1,106 @@ -''' -(c) 2011, 2012 Georgia Tech Research Corporation -This source code is released under the New BSD license. Please see -http://wiki.quantsoftware.org/index.php?title=QSTK_License -for license details. - -Created on Jan 1, 2011 - -@author:Drew Bratcher -@contact: dbratcher@gatech.edu -@summary: Contains tutorial for backtester and report. - -''' - -# -# fundsToPNG.py -# -# Short script which produces a graph of funds -# over time from a pickle file. -# -# Drew Bratcher -# - -from pylab import * -from QSTK.qstkutil import DataAccess as da -from QSTK.qstkutil import tsutil as tsu -# from quicksim import quickSim -from copy import deepcopy -import math -from pandas import * -import matplotlib.pyplot as plt -import cPickle - -def fundsToPNG(funds,output_file): - plt.clf() - if(type(funds)==type(list())): - for i in range(0,len(funds)): - plt.plot(funds[i].index,funds[i].values) - else: - plt.plot(funds.index,funds.values) - plt.ylabel('Fund Value') - plt.xlabel('Date') - plt.gcf().autofmt_xdate(rotation=45) - plt.draw() - savefig(output_file, format='png') - -def fundsAnalysisToPNG(funds,output_file): - plt.clf() - if(type(funds)!=type(list())): - print 'fundsmatrix only contains one timeseries, not able to analyze.' - #convert to daily returns - count=list() - dates=list() - sum=list() - for i in range(0,len(funds)): - ret=tsu.daily(funds[i].values) - for j in range(0, len(ret)): - if (funds[i].index[j] in dates): - sum[dates.index(funds[i].index[j])]+=ret[j] - count[dates.index(funds[i].index[j])]+=1 - else: - dates.append(funds[i].index[j]) - count.append(1) - sum.append(ret[j]) - #compute average - tot_ret=deepcopy(sum) - for i in range(0,len(sum)): - tot_ret[i]=sum[i]/count[i] - - #compute std - std=zeros(len(sum)) - for i in range(0,len(funds)): - temp=tsu.daily(funds[i].values) - for j in range(0,len(temp)): - std[dates.index(funds[i].index[j])]=0 - std[dates.index(funds[i].index[j])]+=math.pow(temp[j]-tot_ret[dates.index(funds[i].index[j])],2) - - for i in range(1, len(std)): -# std[i]=math.sqrt(std[i]/count[i])+std[i-1] - std[i]=math.sqrt(std[i]/count[i]) - - #compute total returns - lower=deepcopy(tot_ret) - upper=deepcopy(tot_ret) - tot_ret[0]=funds[0].values[0] - lower[0]=funds[0].values[0] - upper[0]=lower[0] -# for i in range(1,len(tot_ret)): -# tot_ret[i]=tot_ret[i-1]+(tot_ret[i])*tot_ret[i-1] -# lower[i]=tot_ret[i-1]-(std[i])*tot_ret[i-1] -# upper[i]=tot_ret[i-1]+(std[i])*tot_ret[i-1] - for i in range(1,len(tot_ret)): - lower[i]=(tot_ret[i]-std[i]+1)*lower[i-1] - upper[i]=(tot_ret[i]+std[i]+1)*upper[i-1] - tot_ret[i]=(tot_ret[i]+1)*tot_ret[i-1] - - - plt.clf() - plt.plot(dates,tot_ret) - plt.plot(dates,lower) - plt.plot(dates,upper) - plt.legend(('Tot_Ret','Lower','Upper'),loc='upper left') - plt.ylabel('Fund Total Return') - plt.ylim(ymin=0,ymax=2*tot_ret[0]) - plt.draw() - savefig(output_file, format='png') +''' +(c) 2011, 2012 Georgia Tech Research Corporation +This source code is released under the New BSD license. Please see +http://wiki.quantsoftware.org/index.php?title=QSTK_License +for license details. + +Created on Jan 1, 2011 + +@author:Drew Bratcher +@contact: dbratcher@gatech.edu +@summary: Contains tutorial for backtester and report. + +''' + +# +# fundsToPNG.py +# +# Short script which produces a graph of funds +# over time from a pickle file. +# +# Drew Bratcher +# + +from pylab import * +from QSTK.qstkutil import DataAccess as da +from QSTK.qstkutil import tsutil as tsu +# from quicksim import quickSim +from copy import deepcopy +import math +from pandas import * +import matplotlib.pyplot as plt +import pickle + +def fundsToPNG(funds,output_file): + plt.clf() + if(type(funds)==type(list())): + for i in range(0,len(funds)): + plt.plot(funds[i].index,funds[i].values) + else: + plt.plot(funds.index,funds.values) + plt.ylabel('Fund Value') + plt.xlabel('Date') + plt.gcf().autofmt_xdate(rotation=45) + plt.draw() + savefig(output_file, format='png') + +def fundsAnalysisToPNG(funds,output_file): + plt.clf() + if(type(funds)!=type(list())): + print('fundsmatrix only contains one timeseries, not able to analyze.') + #convert to daily returns + count=list() + dates=list() + sum=list() + for i in range(0,len(funds)): + ret=tsu.daily(funds[i].values) + for j in range(0, len(ret)): + if (funds[i].index[j] in dates): + sum[dates.index(funds[i].index[j])]+=ret[j] + count[dates.index(funds[i].index[j])]+=1 + else: + dates.append(funds[i].index[j]) + count.append(1) + sum.append(ret[j]) + #compute average + tot_ret=deepcopy(sum) + for i in range(0,len(sum)): + tot_ret[i]=sum[i]/count[i] + + #compute std + std=zeros(len(sum)) + for i in range(0,len(funds)): + temp=tsu.daily(funds[i].values) + for j in range(0,len(temp)): + std[dates.index(funds[i].index[j])]=0 + std[dates.index(funds[i].index[j])]+=math.pow(temp[j]-tot_ret[dates.index(funds[i].index[j])],2) + + for i in range(1, len(std)): +# std[i]=math.sqrt(std[i]/count[i])+std[i-1] + std[i]=math.sqrt(std[i]/count[i]) + + #compute total returns + lower=deepcopy(tot_ret) + upper=deepcopy(tot_ret) + tot_ret[0]=funds[0].values[0] + lower[0]=funds[0].values[0] + upper[0]=lower[0] +# for i in range(1,len(tot_ret)): +# tot_ret[i]=tot_ret[i-1]+(tot_ret[i])*tot_ret[i-1] +# lower[i]=tot_ret[i-1]-(std[i])*tot_ret[i-1] +# upper[i]=tot_ret[i-1]+(std[i])*tot_ret[i-1] + for i in range(1,len(tot_ret)): + lower[i]=(tot_ret[i]-std[i]+1)*lower[i-1] + upper[i]=(tot_ret[i]+std[i]+1)*upper[i-1] + tot_ret[i]=(tot_ret[i]+1)*tot_ret[i-1] + + + plt.clf() + plt.plot(dates,tot_ret) + plt.plot(dates,lower) + plt.plot(dates,upper) + plt.legend(('Tot_Ret','Lower','Upper'),loc='upper left') + plt.ylabel('Fund Total Return') + plt.ylim(ymin=0,ymax=2*tot_ret[0]) + plt.draw() + savefig(output_file, format='png') diff --git a/bin/csvformatter.py b/bin/csvformatter.py index 4b543279f..942fcdb6b 100644 --- a/bin/csvformatter.py +++ b/bin/csvformatter.py @@ -7,8 +7,8 @@ def csv_converter(inputfile, outputfile): actualheader = ['Symbol', 'Name', 'Type', 'Date', 'Shares', 'Price', 'Cash value', 'Commission', 'Notes'] reader = csv.reader(open(inputfile, 'r'), delimiter=',') - header = reader.next() - print "Header : ", header + header = next(reader) + print("Header : ", header) input_str = [] for row in reader: input_str.append(row) @@ -101,4 +101,4 @@ def csv_converter(inputfile, outputfile): inputfile = "./Settled.csv" outputfile = 'trans.csv' csv_converter(inputfile, outputfile) - print "Done" + print("Done") diff --git a/bin/gen_nyse_dates.py b/bin/gen_nyse_dates.py index d2cdf554e..c31b4a13f 100644 --- a/bin/gen_nyse_dates.py +++ b/bin/gen_nyse_dates.py @@ -267,7 +267,7 @@ log = [] # print it out for i in newdays: - print i.strftime("%m/%d/%Y") + print(i.strftime("%m/%d/%Y")) log.append(i.strftime("%m/%d/%Y")) np.savetxt('NYSE_dates.csv', log, fmt='%s', delimiter=',') diff --git a/bin/investors_report.py b/bin/investors_report.py index 4a3a82990..718f0285a 100644 --- a/bin/investors_report.py +++ b/bin/investors_report.py @@ -1,251 +1,251 @@ -# -# report.py -# -# Generates a html file containing a report based -# off a timeseries of funds from a pickle file. -# -# Drew Bratcher -# - -from pylab import * -import numpy -from QSTK.qstkutil import DataAccess as da -from QSTK.qstkutil import qsdateutil as du -from QSTK.qstkutil import tsutil as tsu -from QSTK.quicksim import quickSim as qs -import converter -import datetime as dt -from pandas import * -import matplotlib.pyplot as plt -import cPickle - -def readableDate(date): - return str(date.month)+"/"+str(date.day)+"/"+str(date.year) - -def getYearReturn(funds, year): - days=[] - for date in funds.index: - if(date.year==year): - days.append(date) - return funds[days[-1]]/funds[days[0]]-1 - -def getYearMaxDrop(funds, year): - days=[] - for date in funds.index: - if(date.year==year): - days.append(date) - maxdrop=0 - prevday=days[0] - for day in days[1:-1]: - if((funds[day]/funds[prevday]-1)f2ret[i]): - win+=1 - tot+=1 - return float(win)/tot - -def runOther(funds,symbols): - tsstart =dt.datetime(funds.index[0].year,funds.index[0].month,funds.index[0].day) - tsend =dt.datetime(funds.index[-1].year,funds.index[-1].month,funds.index[-1].day) - timeofday=dt.timedelta(hours=16) - timestamps=du.getNYSEdays(tsstart,tsend,timeofday) - dataobj=da.DataAccess('Norgate') - historic=dataobj.get_data(timestamps,symbols,"close") - alloc_val=float(0.1/(float(len(symbols))+1)) - alloc_vals=alloc_val*ones(len(symbols)) - alloc=DataMatrix(index=[historic.index[0]],data=[alloc_vals], columns=symbols) - alloc=alloc.append(DataMatrix(index=[historic.index[-1]], data=[alloc_vals], columns=symbols)) - alloc['_CASH']=alloc_val - return qs.quickSim(alloc,historic,1000) - -def reportFunctionality(funds, symbols,filename=sys.stdout): - if(len(symbols)!=0): - funds2=runOther(funds,symbols) - arg2=1 - else: - arg2=0 - - if(filename==sys.stdout): - html_file=sys.stdout - else: - html_file = open(filename,"w") - - #top - html_file.write("\n") - html_file.write("\n") - html_file.write("QSTK Generated Report from "+readableDate(funds.index[0])+" to "+readableDate(funds.index[-1])+"\n") - html_file.write("\n\n") - html_file.write("
\n\n") - - years=du.getYears(funds) - - html_file.write("

Performance Summary for "+sys.argv[1]+"

\n") - html_file.write("For the dates "+readableDate(funds.index[0])+" to "+readableDate(funds.index[-1])+"\n") - - - html_file.write("

Yearly Performance Metrics

\n") - - - html_file.write("\n") - html_file.write("\n") - for year in years: - html_file.write("\n") - html_file.write("\n") - - #yearly return - html_file.write("\n") - html_file.write("\n") - for year in years: - retur=getYearReturn(funds,year) - html_file.write("\n") - html_file.write("\n") - - #yearly winning days - html_file.write("\n") - html_file.write("\n") - for year in years: - # change to compare to inputs - ratio=tsu.getYearRatio(funds,year) - if(arg2!=0): - win=getWinningDays(funds,funds2,year) - html_file.write("\n") - else: - html_file.write("\n") - html_file.write("\n") - - #max draw down - html_file.write("\n") - html_file.write("\n") - for year in years: - drop=getYearMaxDrop(funds,year) - html_file.write("\n") - html_file.write("\n") - - #yearly sharpe ratio using daily rets - html_file.write("\n") - html_file.write("\n") - for year in years: - ratio=tsu.getYearRatio(funds,year) - html_file.write("\n") - html_file.write("\n") - - - #yearly sharpe ratio using monthly rets - html_file.write("\n") - html_file.write("\n") - for year in years: - ratio=getYearRatioUsingMonth(funds,year) - html_file.write("\n") - html_file.write("\n") - - html_file.write("
"+str(year)+"
Annualized Return:\n") - print >>html_file, "%.2f\n" % (retur*100) - html_file.write("%
Winning Days:\n") - print >>html_file, "%.2f\n" % (win*100) - html_file.write("%No comparison.
Max Draw Down:\n") - print >>html_file, "%.2f" % (drop*100) - html_file.write("%
Daily Sharpe Ratio:\n") - print >>html_file, "%.2f\n" % ratio - html_file.write("
Monthly Sharpe Ratio:\n") - print >>html_file, "%.2f\n" % ratio - html_file.write("
\n") - html_file.write("
\n\n") - - vals=funds.values; - vals2=np.append(vals,funds2.values,2) - - - df=DataMatrix(index=funds.index,data=funds.values, columns=['fund']) - df2=DataMatrix(index=funds2.index,data=funds2.values,columns=['other']) - df['other']=df2['other'] - - corrcoef=numpy.corrcoef(funds.values[0:-1],funds2.values) - html_file.write("

Correlation=") - print >>html_file, "%.2f\n" % corrcoef[0][1] - html_file.write("

\n") - html_file.write("
\n\n") - - - #montly returns - mrets=tsu.monthly(funds) - html_file.write("

Monthly Returns

\n") - html_file.write("\n") - html_file.write("\n") - html_file.write("\n") - month_names=du.getMonthNames() - for name in month_names: - html_file.write("\n") - html_file.write("\n") - - i=0 - for year in years: - html_file.write("\n") - html_file.write("\n") - months=du.getMonths(funds,year) - for month in months: - html_file.write("\n") - i+=1 - html_file.write("\n") - html_file.write("
"+str(name)+"
"+str(year)+"\n") - print >>html_file, "%.2f\n" % (mrets[i]*100) - html_file.write("%
\n") - html_file.write("
\n\n") - - #fund value graph - fundlist=[]; - fundlist.append(funds) - fundlist.append(funds2) - converter.fundsToPNG(fundlist,'funds.png') - html_file.write("\n") - html_file.write("
\n\n") - - #end - html_file.write("
\n\n") - html_file.write("") - - - -if __name__ == '__main__': - input=open(sys.argv[1],"r") - funds=cPickle.load(input) - - if(len(sys.argv)>2): - input2=sys.argv[2] - symbols=sys.argv[2].split(',') - reportFunctionality(funds,symbols,'investors_report.html') - else: - reportFunctionality(funds,0,'investors_report.html') +# +# report.py +# +# Generates a html file containing a report based +# off a timeseries of funds from a pickle file. +# +# Drew Bratcher +# + +from pylab import * +import numpy +from QSTK.qstkutil import DataAccess as da +from QSTK.qstkutil import qsdateutil as du +from QSTK.qstkutil import tsutil as tsu +from QSTK.quicksim import quickSim as qs +from . import converter +import datetime as dt +from pandas import * +import matplotlib.pyplot as plt +import pickle + +def readableDate(date): + return str(date.month)+"/"+str(date.day)+"/"+str(date.year) + +def getYearReturn(funds, year): + days=[] + for date in funds.index: + if(date.year==year): + days.append(date) + return funds[days[-1]]/funds[days[0]]-1 + +def getYearMaxDrop(funds, year): + days=[] + for date in funds.index: + if(date.year==year): + days.append(date) + maxdrop=0 + prevday=days[0] + for day in days[1:-1]: + if((funds[day]/funds[prevday]-1)f2ret[i]): + win+=1 + tot+=1 + return float(win)/tot + +def runOther(funds,symbols): + tsstart =dt.datetime(funds.index[0].year,funds.index[0].month,funds.index[0].day) + tsend =dt.datetime(funds.index[-1].year,funds.index[-1].month,funds.index[-1].day) + timeofday=dt.timedelta(hours=16) + timestamps=du.getNYSEdays(tsstart,tsend,timeofday) + dataobj=da.DataAccess('Norgate') + historic=dataobj.get_data(timestamps,symbols,"close") + alloc_val=float(0.1/(float(len(symbols))+1)) + alloc_vals=alloc_val*ones(len(symbols)) + alloc=DataMatrix(index=[historic.index[0]],data=[alloc_vals], columns=symbols) + alloc=alloc.append(DataMatrix(index=[historic.index[-1]], data=[alloc_vals], columns=symbols)) + alloc['_CASH']=alloc_val + return qs.quickSim(alloc,historic,1000) + +def reportFunctionality(funds, symbols,filename=sys.stdout): + if(len(symbols)!=0): + funds2=runOther(funds,symbols) + arg2=1 + else: + arg2=0 + + if(filename==sys.stdout): + html_file=sys.stdout + else: + html_file = open(filename,"w") + + #top + html_file.write("\n") + html_file.write("\n") + html_file.write("QSTK Generated Report from "+readableDate(funds.index[0])+" to "+readableDate(funds.index[-1])+"\n") + html_file.write("\n\n") + html_file.write("
\n\n") + + years=du.getYears(funds) + + html_file.write("

Performance Summary for "+sys.argv[1]+"

\n") + html_file.write("For the dates "+readableDate(funds.index[0])+" to "+readableDate(funds.index[-1])+"\n") + + + html_file.write("

Yearly Performance Metrics

\n") + + + html_file.write("\n") + html_file.write("\n") + for year in years: + html_file.write("\n") + html_file.write("\n") + + #yearly return + html_file.write("\n") + html_file.write("\n") + for year in years: + retur=getYearReturn(funds,year) + html_file.write("\n") + html_file.write("\n") + + #yearly winning days + html_file.write("\n") + html_file.write("\n") + for year in years: + # change to compare to inputs - ratio=tsu.getYearRatio(funds,year) + if(arg2!=0): + win=getWinningDays(funds,funds2,year) + html_file.write("\n") + else: + html_file.write("\n") + html_file.write("\n") + + #max draw down + html_file.write("\n") + html_file.write("\n") + for year in years: + drop=getYearMaxDrop(funds,year) + html_file.write("\n") + html_file.write("\n") + + #yearly sharpe ratio using daily rets + html_file.write("\n") + html_file.write("\n") + for year in years: + ratio=tsu.getYearRatio(funds,year) + html_file.write("\n") + html_file.write("\n") + + + #yearly sharpe ratio using monthly rets + html_file.write("\n") + html_file.write("\n") + for year in years: + ratio=getYearRatioUsingMonth(funds,year) + html_file.write("\n") + html_file.write("\n") + + html_file.write("
"+str(year)+"
Annualized Return:\n") + print("%.2f\n" % (retur*100), file=html_file) + html_file.write("%
Winning Days:\n") + print("%.2f\n" % (win*100), file=html_file) + html_file.write("%No comparison.
Max Draw Down:\n") + print("%.2f" % (drop*100), file=html_file) + html_file.write("%
Daily Sharpe Ratio:\n") + print("%.2f\n" % ratio, file=html_file) + html_file.write("
Monthly Sharpe Ratio:\n") + print("%.2f\n" % ratio, file=html_file) + html_file.write("
\n") + html_file.write("
\n\n") + + vals=funds.values; + vals2=np.append(vals,funds2.values,2) + + + df=DataMatrix(index=funds.index,data=funds.values, columns=['fund']) + df2=DataMatrix(index=funds2.index,data=funds2.values,columns=['other']) + df['other']=df2['other'] + + corrcoef=numpy.corrcoef(funds.values[0:-1],funds2.values) + html_file.write("

Correlation=") + print("%.2f\n" % corrcoef[0][1], file=html_file) + html_file.write("

\n") + html_file.write("
\n\n") + + + #montly returns + mrets=tsu.monthly(funds) + html_file.write("

Monthly Returns

\n") + html_file.write("\n") + html_file.write("\n") + html_file.write("\n") + month_names=du.getMonthNames() + for name in month_names: + html_file.write("\n") + html_file.write("\n") + + i=0 + for year in years: + html_file.write("\n") + html_file.write("\n") + months=du.getMonths(funds,year) + for month in months: + html_file.write("\n") + i+=1 + html_file.write("\n") + html_file.write("
"+str(name)+"
"+str(year)+"\n") + print("%.2f\n" % (mrets[i]*100), file=html_file) + html_file.write("%
\n") + html_file.write("
\n\n") + + #fund value graph + fundlist=[]; + fundlist.append(funds) + fundlist.append(funds2) + converter.fundsToPNG(fundlist,'funds.png') + html_file.write("\n") + html_file.write("
\n\n") + + #end + html_file.write("
\n\n") + html_file.write("") + + + +if __name__ == '__main__': + input=open(sys.argv[1],"r") + funds=pickle.load(input) + + if(len(sys.argv)>2): + input2=sys.argv[2] + symbols=sys.argv[2].split(',') + reportFunctionality(funds,symbols,'investors_report.html') + else: + reportFunctionality(funds,0,'investors_report.html') diff --git a/bin/sinewave_data_generator.py b/bin/sinewave_data_generator.py index 1f24ee48e..1970dfffe 100644 --- a/bin/sinewave_data_generator.py +++ b/bin/sinewave_data_generator.py @@ -1,6 +1,6 @@ import datetime import QSTK.qstkutil.qsdateutil -import StringIO +import io import math import random @@ -17,12 +17,12 @@ def genfile(fname,dt_start,dt_end): amp = 20.0*random.random() period = 10.0+(random.random()*100.0) sin_gen = lambda x: (mean+(amp*math.sin(((math.pi*2)/period)*x))) - print fname,"parameters" - print "Mean:",mean - print "Amplitude:",amp - print "Period:", period + print(fname,"parameters") + print("Mean:",mean) + print("Amplitude:",amp) + print("Period:", period) dllen = len(datelist) - for t in xrange(dllen): + for t in range(dllen): date = datelist[(dllen-1)-t] val = sin_gen(t) line = (date.date().isoformat(),)+((val,)*5) @@ -30,6 +30,6 @@ def genfile(fname,dt_start,dt_end): #print write_to.getvalue() write_to.close() -for i in xrange(NUMFILES): +for i in range(NUMFILES): genfile("ML4T-%03d.csv"%i,START,END) #genfile("foo.txt",datetime.datetime(2011,9,13),datetime.datetime(2012,9,13)) diff --git a/ez_setup.py b/ez_setup.py index b74adc065..a12de6b5f 100644 --- a/ez_setup.py +++ b/ez_setup.py @@ -70,10 +70,10 @@ def _validate_md5(egg_name, data): if egg_name in md5_data: digest = md5(data).hexdigest() if digest != md5_data[egg_name]: - print >>sys.stderr, ( + print(( "md5 validation of %s failed! (Possible download problem?)" % egg_name - ) + ), file=sys.stderr) sys.exit(2) return data @@ -103,14 +103,14 @@ def do_download(): return do_download() try: pkg_resources.require("setuptools>="+version); return - except pkg_resources.VersionConflict, e: + except pkg_resources.VersionConflict as e: if was_imported: - print >>sys.stderr, ( + print(( "The required version of setuptools (>=%s) is not available, and\n" "can't be installed while this script is running. Please install\n" " a more recent version first, using 'easy_install -U setuptools'." "\n\n(Currently using %r)" - ) % (version, e.args[0]) + ) % (version, e.args[0]), file=sys.stderr) sys.exit(2) except pkg_resources.DistributionNotFound: pass @@ -129,7 +129,7 @@ def download_setuptools( with a '/'). `to_dir` is the directory where the egg will be downloaded. `delay` is the number of seconds to pause before an actual download attempt. """ - import urllib2, shutil + import urllib.request, urllib.error, urllib.parse, shutil egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3]) url = download_base + egg_name saveto = os.path.join(to_dir, egg_name) @@ -155,7 +155,7 @@ def download_setuptools( version, download_base, delay, url ); from time import sleep; sleep(delay) log.warn("Downloading %s", url) - src = urllib2.urlopen(url) + src = urllib.request.urlopen(url) # Read/write all in one block, so we don't create a corrupt file # if the download is interrupted. data = _validate_md5(egg_name, src.read()) @@ -216,10 +216,10 @@ def main(argv, version=DEFAULT_VERSION): os.unlink(egg) else: if setuptools.__version__ == '0.0.1': - print >>sys.stderr, ( + print(( "You have an obsolete version of setuptools installed. Please\n" "remove it from your system entirely before rerunning this script." - ) + ), file=sys.stderr) sys.exit(2) req = "setuptools>="+version @@ -238,8 +238,8 @@ def main(argv, version=DEFAULT_VERSION): from setuptools.command.easy_install import main main(argv) else: - print "Setuptools version",version,"or greater has been installed." - print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' + print("Setuptools version",version,"or greater has been installed.") + print('(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)') def update_md5(filenames): """Update our built-in md5 registry""" @@ -252,7 +252,7 @@ def update_md5(filenames): md5_data[base] = md5(f.read()).hexdigest() f.close() - data = [" %r: %r,\n" % it for it in md5_data.items()] + data = [" %r: %r,\n" % it for it in list(md5_data.items())] data.sort() repl = "".join(data) @@ -262,7 +262,7 @@ def update_md5(filenames): match = re.search("\nmd5_data = {\n([^}]+)}", src) if not match: - print >>sys.stderr, "Internal error!" + print("Internal error!", file=sys.stderr) sys.exit(2) src = src[:match.start(1)] + repl + src[match.end(1):] diff --git a/setup.py b/setup.py index 996332f0a..d7bde41b5 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ include_package_data=True, long_description=open('README.md').read(), author_email='sourabh@sourabhbajaj.com', - url='https://github.com/tucker777/QuantSoftwareToolkit', + url='https://github.com/QuantSoftware/QuantSoftwareToolkit', license=open('LICENSE.txt').read(), description='QuantSoftware Toolkit', install_requires=[ @@ -20,8 +20,9 @@ "scipy >= 0.9.0", "matplotlib >= 1.1.0", "pandas >= 0.7.3", - "python-dateutil == 1.5", + "python-dateutil >= 2.0", "scikit-learn >= 0.11", + "cvxopt >= 1.1.3", ], classifiers=[ 'Development Status :: 5 - Production/Stable',