-
Notifications
You must be signed in to change notification settings - Fork 192
Expand file tree
/
Copy pathmodels.py
More file actions
112 lines (104 loc) · 6.02 KB
/
models.py
File metadata and controls
112 lines (104 loc) · 6.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from tensorflow.keras import layers
from tensorflow.keras.layers import TimeDistributed, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
import kapre
from kapre.composed import get_melspectrogram_layer
import tensorflow as tf
import os
def Conv1D(N_CLASSES=10, SR=16000, DT=1.0):
input_shape = (int(SR*DT), 1)
i = get_melspectrogram_layer(input_shape=input_shape,
n_mels=128,
pad_end=True,
n_fft=512,
win_length=400,
hop_length=160,
sample_rate=SR,
return_decibel=True,
input_data_format='channels_last',
output_data_format='channels_last')
x = LayerNormalization(axis=2, name='batch_norm')(i.output)
x = TimeDistributed(layers.Conv1D(8, kernel_size=(4), activation='tanh'), name='td_conv_1d_tanh')(x)
x = layers.MaxPooling2D(pool_size=(2,2), name='max_pool_2d_1')(x)
x = TimeDistributed(layers.Conv1D(16, kernel_size=(4), activation='relu'), name='td_conv_1d_relu_1')(x)
x = layers.MaxPooling2D(pool_size=(2,2), name='max_pool_2d_2')(x)
x = TimeDistributed(layers.Conv1D(32, kernel_size=(4), activation='relu'), name='td_conv_1d_relu_2')(x)
x = layers.MaxPooling2D(pool_size=(2,2), name='max_pool_2d_3')(x)
x = TimeDistributed(layers.Conv1D(64, kernel_size=(4), activation='relu'), name='td_conv_1d_relu_3')(x)
x = layers.MaxPooling2D(pool_size=(2,2), name='max_pool_2d_4')(x)
x = TimeDistributed(layers.Conv1D(128, kernel_size=(4), activation='relu'), name='td_conv_1d_relu_4')(x)
x = layers.GlobalMaxPooling2D(name='global_max_pooling_2d')(x)
x = layers.Dropout(rate=0.1, name='dropout')(x)
x = layers.Dense(64, activation='relu', activity_regularizer=l2(0.001), name='dense')(x)
o = layers.Dense(N_CLASSES, activation='softmax', name='softmax')(x)
model = Model(inputs=i.input, outputs=o, name='1d_convolution')
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
return model
def Conv2D(N_CLASSES=10, SR=16000, DT=1.0):
input_shape = (int(SR*DT), 1)
i = get_melspectrogram_layer(input_shape=input_shape,
n_mels=128,
pad_end=True,
n_fft=512,
win_length=400,
hop_length=160,
sample_rate=SR,
return_decibel=True,
input_data_format='channels_last',
output_data_format='channels_last')
x = LayerNormalization(axis=2, name='batch_norm')(i.output)
x = layers.Conv2D(8, kernel_size=(7,7), activation='tanh', padding='same', name='conv2d_tanh')(x)
x = layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_1')(x)
x = layers.Conv2D(16, kernel_size=(5,5), activation='relu', padding='same', name='conv2d_relu_1')(x)
x = layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_2')(x)
x = layers.Conv2D(16, kernel_size=(3,3), activation='relu', padding='same', name='conv2d_relu_2')(x)
x = layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_3')(x)
x = layers.Conv2D(32, kernel_size=(3,3), activation='relu', padding='same', name='conv2d_relu_3')(x)
x = layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_4')(x)
x = layers.Conv2D(32, kernel_size=(3,3), activation='relu', padding='same', name='conv2d_relu_4')(x)
x = layers.Flatten(name='flatten')(x)
x = layers.Dropout(rate=0.2, name='dropout')(x)
x = layers.Dense(64, activation='relu', activity_regularizer=l2(0.001), name='dense')(x)
o = layers.Dense(N_CLASSES, activation='softmax', name='softmax')(x)
model = Model(inputs=i.input, outputs=o, name='2d_convolution')
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
return model
def LSTM(N_CLASSES=10, SR=16000, DT=1.0):
input_shape = (int(SR*DT), 1)
i = get_melspectrogram_layer(input_shape=input_shape,
n_mels=128,
pad_end=True,
n_fft=512,
win_length=400,
hop_length=160,
sample_rate=SR,
return_decibel=True,
input_data_format='channels_last',
output_data_format='channels_last',
name='2d_convolution')
x = LayerNormalization(axis=2, name='batch_norm')(i.output)
x = TimeDistributed(layers.Reshape((-1,)), name='reshape')(x)
s = TimeDistributed(layers.Dense(64, activation='tanh'),
name='td_dense_tanh')(x)
x = layers.Bidirectional(layers.LSTM(32, return_sequences=True),
name='bidirectional_lstm')(s)
x = layers.concatenate([s, x], axis=2, name='skip_connection')
x = layers.Dense(64, activation='relu', name='dense_1_relu')(x)
x = layers.MaxPooling1D(name='max_pool_1d')(x)
x = layers.Dense(32, activation='relu', name='dense_2_relu')(x)
x = layers.Flatten(name='flatten')(x)
x = layers.Dropout(rate=0.2, name='dropout')(x)
x = layers.Dense(32, activation='relu',
activity_regularizer=l2(0.001),
name='dense_3_relu')(x)
o = layers.Dense(N_CLASSES, activation='softmax', name='softmax')(x)
model = Model(inputs=i.input, outputs=o, name='long_short_term_memory')
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
return model