-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaudio_input.py
182 lines (149 loc) · 6.19 KB
/
audio_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import pyaudio
import wave
import librosa
import numpy as np
import sys
import tensorflow as tf
import time
import threading
mutex = threading.Lock()
class RingBuffer:
""" class that implements a not-yet-full buffer """
def __init__(self,size_max):
self.max = size_max
self.data = []
class __Full:
""" class that implements a full buffer """
def append(self, x):
""" Append an element overwriting the oldest one. """
self.data[self.cur] = x
self.cur = (self.cur+1) % self.max
def get(self):
""" return list of elements in correct order """
return self.data[self.cur:]+self.data[:self.cur]
def append(self,x):
"""append an element at the end of the buffer"""
self.data.append(x)
if len(self.data) == self.max:
self.cur = 0
# Permanently change self's class from non-full to full
self.__class__ = self.__Full
def get(self):
""" Return a list of elements from the oldest to the newest. """
return self.data
ringBuffer = RingBuffer(100 * 4096)
def buf_to_float(x, n_bytes=2, dtype=np.float32):
# Invert the scale of the data
scale = 1./float(1 << ((8 * n_bytes) - 1))
# Construct the format string
fmt = '<i{:d}'.format(n_bytes)
# Rescale and format the data buffer
return scale * np.frombuffer(x, fmt).astype(dtype)
def extract_feature(data,sr=8000):
#X, sample_rate = librosa.load(file_name)
sample_rate=sr
X=data
stft = np.abs(librosa.stft(X))
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate,fmin=60.0).T,axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
#rmse=np.mean(librosa.feature.rmse(y=X,frame_length=128).T,axis=0)
return mfccs,chroma,mel,contrast,tonnetz
"""
def callback(in_data, frame_count, time_info, flag):
#extraction data
x=buf_to_float(in_data)
mfccs, chroma, mel, contrast,tonnetz = extract_feature(x)
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
features= np.empty((0,193))
features = np.vstack([features,ext_features])
test_x =features
#tensorflow execution
training_epochs = 5000
n_dim = 193#features.shape[1]
n_classes =10
n_hidden_units_one = 193
n_hidden_units_two = 386
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.01
X = tf.placeholder(tf.float32,[None,n_dim])
W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd),name="Weight_1")#193,280
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd),name="bias_1")
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)
W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd),name="Weight_2")#280,300
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd),name="bias_2")
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)
W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd),name="W")#300,7
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd),name="b")
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)
saver = tf.train.Saver()
y_pred = None
with tf.Session() as sess:
saver.restore(sess, "D:\\source\\python\\UrbanSound8K\\pkl\\audio.ckpt")
y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: test_x})
#print(y_pred)
if (y_pred==6):
print('gunshot detected')
return None, pyaudio.paContinue
"""
def callback(in_data, frame_count, time_info, flag):
#print(frame_count)
#print(len(in_data))
mutex.acquire()
ringBuffer.append(in_data)
mutex.release()
return None, pyaudio.paContinue
training_epochs = 5000
n_dim = 193#features.shape[1]
n_classes =10
n_hidden_units_one = 193
n_hidden_units_two = 386
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.01
X = tf.placeholder(tf.float32,[None,n_dim])
W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd),name="Weight_1")
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd),name="bias_1")
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)
W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd),name="Weight_2")
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd),name="bias_2")
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)
W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd),name="W")
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd),name="b")
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)
saver = tf.train.Saver()
CHUNK = 2048
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 8000
RECORD_SECONDS = 3
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
stream_callback=callback)
stream.start_stream()
with tf.Session() as sess:
saver.restore(sess, "D:\\source\\python\\UrbanSound8K\\pkl\\audio.ckpt")
print("restore session")
while stream.is_active():
time.sleep(0.25)
mutex.acquire()
in_data=np.array(ringBuffer.get())
mutex.release()
#print(in_data)
x=buf_to_float(in_data)
print(x)
mfccs, chroma, mel, contrast,tonnetz = extract_feature(x)
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
features= np.empty((0,193))
features = np.vstack([features,ext_features])
test_x =features
print('execute prediction')
y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: test_x})
print(y_pred)
stream.close()
p.terminate()