-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDataPrep.py
More file actions
109 lines (78 loc) · 3.9 KB
/
DataPrep.py
File metadata and controls
109 lines (78 loc) · 3.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
import pandas as pd
class StockCorrelation:
def __init__(self, closing_stock_price):
self.stock_prices = closing_stock_price
self.stock_covariances = [None] * np.size(closing_stock_price, axis=0)
self.indexed_stocks = [None] * np.size(closing_stock_price, axis=0)
self.num_stocks = np.size(self.stock_covariances, axis=0)
self.calculate_covariance()
self.sort()
def calculate_covariance(self):
self.stock_covariances = np.corrcoef(self.stock_prices)
def sort(self):
stock_list = np.arange(self.num_stocks)
for index, stock in enumerate(self.stock_covariances):
# subtract one, because we dont want to count the stock, as it is has a correlation of 1 to itself
temp_index = [None] * (self.num_stocks -1)
# gets the indices of the smallest to the largest stock
positions = stock.argsort()
# if we hit this stock, we need to adjust the remaining values and not add to our list
# e.g. Stock A should not be included in a list of stocks most similar to Stock A
skipped_current = 0
for i, _ in enumerate(stock_list):
# the list generated goes from smallest to largest, want to return a list of stocks
# most similar to least similar
# reverse_index will iterator through our generated list: positions[]
reverse_index = self.num_stocks - i - 1
# found ourselves, so skip
if stock_list[positions[reverse_index]] == index:
skipped_current -= 1
else:
# look up the index of next most similar stock and get the stock
temp_index[i + skipped_current] = stock_list[positions[reverse_index]]
self.indexed_stocks[index] = temp_index
def get_similar_stock_list(self, stock_index):
if 0 <= stock_index < self.num_stocks:
return self.indexed_stocks[stock_index]
else:
raise "Use a number between 0 and number of stocks-1"
class DataFromFile:
# stock data order: close,volume,open,high,low
def __init__(self):
self.stock_c_v_o_h_l = [None]*3
data = np.array(pd.read_csv('ClassTestData/APPL.csv',header=None,skiprows=2))
self.stock_c_v_o_h_l[0] = np.array(data[:,(1,2,3)])
self.apple_class = np.array(data[:,(5)])
data = np.array(pd.read_csv('ClassTestData/MSFT.csv',header=None,skiprows=2))
self.stock_c_v_o_h_l[1] = np.array(data[:,(1,2,3)])
data = np.array(pd.read_csv('ClassTestData/XOM.csv',header=None,skiprows=2))
self.stock_c_v_o_h_l[2] = np.array(data[:,(1,2,3)])
self.sample_params = None
self.sample_class = None
self.apple_similar = None
self.num_stocks = 3
self.apple_index = 0
self.msft_index = 1
self.xom_index = 2
def calc_stock_cor_for_apple(self):
# get correlation of other stocks similarity
closing_price = np.array(self.stock_c_v_o_h_l[0].T[0])
closing_price = np.vstack((closing_price, self.stock_c_v_o_h_l[1].T[0]))
closing_price = np.vstack((closing_price, self.stock_c_v_o_h_l[2].T[0]))
closing_price = np.float64(closing_price)
apple_cor = StockCorrelation(closing_price)
return apple_cor.get_similar_stock_list(self.apple_index)
def create_params(self, number=1):
if 0 <= number < self.num_stocks:
number = number
else:
number = 1
similar_list = self.calc_stock_cor_for_apple()
return_params = self.stock_c_v_o_h_l[0]
for i in range(0, number):
return_params = np.column_stack((return_params, self.stock_c_v_o_h_l[similar_list[i]]))
# add in class data for apple
return return_params
def get_class(self):
return self.apple_class