-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
70 lines (49 loc) · 2.66 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#Konrad Maciejczyk, 2023, Wrocław Uniwersity of Science and Technology
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import neighbors
from algorithms import KNN, NearestCentroid
def normalizeData(x_train, x_test):
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
return x_train, x_test
def dataSplit(data, random_state = 42, test_size=.2, verbose=False):
X = data.drop(["label", "filename"], axis = 1).to_numpy()
datapd['label'] = pd.factorize(datapd['label'])[0]
y = data['label'].to_numpy()
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=42, shuffle=True, test_size=.2)
if verbose:
print("x_train={}, x_test={}, y_train={}, y_test={}".format(x_train.shape, x_test.shape, y_train.shape, y_test.shape))
return x_train, x_test, y_train, y_test
def trainKNN(x_train, x_test, y_train, y_test, n = 3, perform_test = False):
_x_train, _x_test = normalizeData(x_train, x_test)
my_knn_clf = KNN(k=n)
my_knn_clf.fit(_x_train, y_train)
if perform_test:
print("Testing set score accuracy: {:.2f}% for KNN (my_implementation) ({} neighbors)".format(my_knn_clf.score(_x_test, y_test)*100, n))
sklearn_knn_clf = neighbors.KNeighborsClassifier(3)
sklearn_knn_clf.fit(_x_train, y_train)
if perform_test:
print("Testing set score accuracy: {:.2f}% for KNN (sklearn) ({} neighbors) \n".format(sklearn_knn_clf.score(_x_test, y_test)*100, n))
return sklearn_knn_clf
def trainNearestCentroid(x_train, x_test, y_train, y_test, perform_test = False):
_x_train, _x_test = normalizeData(x_train, x_test)
my_min_centroid_clf = NearestCentroid()
my_min_centroid_clf.fit(_x_train, y_train)
if perform_test:
print("Testing set score accuracy: {:.2f}% for NearestCentroid (my_implementation)".format(my_min_centroid_clf.score(_x_test, y_test)*100))
sklearn_nearest_centroid_clf = neighbors.NearestCentroid()
sklearn_nearest_centroid_clf.fit(_x_train, y_train)
if perform_test:
print("Testing set score accuracy: {:.2f}% for NearestCentroid (sklearn) \n".format(sklearn_nearest_centroid_clf.score(_x_test, y_test)*100))
return sklearn_nearest_centroid_clf
if __name__ == '__main__':
datapd = pd.read_csv('gtzan_extracted_features.csv')
x_train, x_test, y_train, y_test = dataSplit(datapd)
trainNearestCentroid(x_train, x_test, y_train, y_test, perform_test = True)
trainKNN(x_train, x_test, y_train, y_test, perform_test = True)