CS-584-Final-Project/DataPrep.py at master · joezhang2/CS-584-Final-Project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
import pandas as pd


class StockCorrelation:
    def __init__(self, closing_stock_price):
        self.stock_prices = closing_stock_price
        self.stock_covariances = [None] * np.size(closing_stock_price, axis=0)
        self.indexed_stocks = [None] * np.size(closing_stock_price, axis=0)

        self.num_stocks = np.size(self.stock_covariances, axis=0)

        self.calculate_covariance()
        self.sort()

    def calculate_covariance(self):
        self.stock_covariances = np.corrcoef(self.stock_prices)

    def sort(self):
        stock_list = np.arange(self.num_stocks)

        for index, stock in enumerate(self.stock_covariances):
            # subtract one, because we dont want to count the stock, as it is has a correlation of 1 to itself
            temp_index = [None] * (self.num_stocks -1)

            # gets the indices of the smallest to the largest stock
            positions = stock.argsort()


            # if we hit this stock, we need to adjust the remaining values and not add to our list
            # e.g. Stock A should not be included in a list of stocks most similar to Stock A
            skipped_current = 0

            for i, _ in enumerate(stock_list):
                # the list generated goes from smallest to largest, want to return a list of stocks
                # most similar to least similar
                # reverse_index will iterator through our generated list: positions[]
                reverse_index = self.num_stocks - i - 1

                # found ourselves, so skip
                if stock_list[positions[reverse_index]] == index:
                    skipped_current -= 1

                else:
                    # look up the index of next most similar stock and get the stock
                    temp_index[i + skipped_current] = stock_list[positions[reverse_index]]

            self.indexed_stocks[index] = temp_index

    def get_similar_stock_list(self, stock_index):
        if 0 <= stock_index < self.num_stocks:
            return self.indexed_stocks[stock_index]
        else:
            raise "Use a number between 0 and number of stocks-1"

class DataFromFile:

    # stock data order: close,volume,open,high,low
    def __init__(self):
        self.stock_c_v_o_h_l = [None]*3
        data = np.array(pd.read_csv('ClassTestData/APPL.csv',header=None,skiprows=2))
        self.stock_c_v_o_h_l[0] = np.array(data[:,(1,2,3)])
        self.apple_class = np.array(data[:,(5)])

        data = np.array(pd.read_csv('ClassTestData/MSFT.csv',header=None,skiprows=2))
        self.stock_c_v_o_h_l[1] = np.array(data[:,(1,2,3)])

        data = np.array(pd.read_csv('ClassTestData/XOM.csv',header=None,skiprows=2))
        self.stock_c_v_o_h_l[2] = np.array(data[:,(1,2,3)])

        self.sample_params = None
        self.sample_class = None

        self.apple_similar = None

        self.num_stocks = 3
        self.apple_index = 0
        self.msft_index = 1
        self.xom_index = 2

    def calc_stock_cor_for_apple(self):

        # get correlation of other stocks similarity
        closing_price = np.array(self.stock_c_v_o_h_l[0].T[0])
        closing_price = np.vstack((closing_price, self.stock_c_v_o_h_l[1].T[0]))
        closing_price = np.vstack((closing_price, self.stock_c_v_o_h_l[2].T[0]))
        closing_price = np.float64(closing_price)

        apple_cor = StockCorrelation(closing_price)

        return apple_cor.get_similar_stock_list(self.apple_index)

    def create_params(self, number=1):

        if 0 <= number < self.num_stocks:
            number = number
        else:
            number = 1

        similar_list = self.calc_stock_cor_for_apple()
        return_params = self.stock_c_v_o_h_l[0]
        for i in range(0, number):
            return_params = np.column_stack((return_params, self.stock_c_v_o_h_l[similar_list[i]]))

        # add in class data for apple
        return return_params

    def get_class(self):
        return self.apple_class