Skip to content

Commit fdd6638

Browse files
author
Alexandre Lissy
committed
Fix #3355: Add valgrind runs
1 parent 86bba80 commit fdd6638

22 files changed

+11651
-1
lines changed

ds_generic.supp

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
libgomp_malloc
3+
Memcheck:Leak
4+
match-leak-kinds: reachable
5+
fun:malloc
6+
obj:/usr/lib/*/libgomp.so.1.0.0
7+
}

ds_lib.supp

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
deepspeech_tflite_error_reporter
3+
Memcheck:Leak
4+
match-leak-kinds: reachable
5+
fun:_Znwm
6+
fun:_ZN6tflite20DefaultErrorReporterEv
7+
fun:_ZN16TFLiteModelState4initEPKc
8+
fun:DS_CreateModel
9+
fun:main
10+
}

ds_openfst.supp

+1,356
Large diffs are not rendered by default.

ds_sox.supp

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
sox_effect_gain
3+
Memcheck:Leak
4+
match-leak-kinds: reachable
5+
fun:malloc
6+
fun:realloc
7+
fun:lsx_realloc
8+
fun:lsx_usage_lines
9+
fun:lsx_gain_effect_fn
10+
fun:sox_find_effect
11+
fun:_Z14GetAudioBufferPKci
12+
fun:_Z11ProcessFileP10ModelStatePKcb
13+
fun:main
14+
}
15+
{
16+
sox_effect_rate
17+
Memcheck:Leak
18+
match-leak-kinds: reachable
19+
fun:malloc
20+
fun:realloc
21+
fun:lsx_realloc
22+
fun:lsx_usage_lines
23+
fun:lsx_rate_effect_fn
24+
fun:sox_find_effect
25+
fun:_Z14GetAudioBufferPKci
26+
fun:_Z11ProcessFileP10ModelStatePKcb
27+
fun:main
28+
}
29+
{
30+
sox_effect_flanger
31+
Memcheck:Leak
32+
match-leak-kinds: reachable
33+
fun:malloc
34+
fun:realloc
35+
fun:lsx_realloc
36+
fun:lsx_usage_lines
37+
fun:lsx_flanger_effect_fn
38+
fun:sox_find_effect
39+
fun:_Z14GetAudioBufferPKci
40+
fun:_Z11ProcessFileP10ModelStatePKcb
41+
fun:main
42+
}

native_client/args.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ int json_candidate_transcripts = 3;
3838

3939
int stream_size = 0;
4040

41+
int extended_stream_size = 0;
42+
4143
char* hot_words = NULL;
4244

4345
void PrintHelp(const char* bin)
@@ -58,6 +60,7 @@ void PrintHelp(const char* bin)
5860
"\t--json\t\t\t\tExtended output, shows word timings as JSON\n"
5961
"\t--candidate_transcripts NUMBER\tNumber of candidate transcripts to include in JSON output\n"
6062
"\t--stream size\t\t\tRun in stream mode, output intermediate results\n"
63+
"\t--extended_stream size\t\t\tRun in stream mode using metadata output, output intermediate results\n"
6164
"\t--hot_words\t\t\tHot-words and their boosts. Word:Boost pairs are comma-separated\n"
6265
"\t--help\t\t\t\tShow help\n"
6366
"\t--version\t\t\tPrint version and exits\n";
@@ -82,6 +85,7 @@ bool ProcessArgs(int argc, char** argv)
8285
{"json", no_argument, nullptr, 'j'},
8386
{"candidate_transcripts", required_argument, nullptr, 150},
8487
{"stream", required_argument, nullptr, 's'},
88+
{"extended_stream", required_argument, nullptr, 'S'},
8589
{"hot_words", required_argument, nullptr, 'w'},
8690
{"version", no_argument, nullptr, 'v'},
8791
{"help", no_argument, nullptr, 'h'},
@@ -144,6 +148,10 @@ bool ProcessArgs(int argc, char** argv)
144148
stream_size = atoi(optarg);
145149
break;
146150

151+
case 'S':
152+
extended_stream_size = atoi(optarg);
153+
break;
154+
147155
case 'v':
148156
has_versions = true;
149157
break;
@@ -172,7 +180,7 @@ bool ProcessArgs(int argc, char** argv)
172180
return false;
173181
}
174182

175-
if (stream_size < 0 || stream_size % 160 != 0) {
183+
if ((stream_size < 0 || stream_size % 160 != 0) || (extended_stream_size < 0 || extended_stream_size % 160 != 0)) {
176184
std::cout <<
177185
"Stream buffer size must be multiples of 160\n";
178186
return false;

native_client/client.cc

+32
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,38 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
205205
DS_FreeString((char *) last);
206206
}
207207
res.string = DS_FinishStream(ctx);
208+
} else if (extended_stream_size > 0) {
209+
StreamingState* ctx;
210+
int status = DS_CreateStream(aCtx, &ctx);
211+
if (status != DS_ERR_OK) {
212+
res.string = strdup("");
213+
return res;
214+
}
215+
size_t off = 0;
216+
const char *last = nullptr;
217+
const char *prev = nullptr;
218+
while (off < aBufferSize) {
219+
size_t cur = aBufferSize - off > extended_stream_size ? extended_stream_size : aBufferSize - off;
220+
DS_FeedAudioContent(ctx, aBuffer + off, cur);
221+
off += cur;
222+
prev = last;
223+
const Metadata* result = DS_IntermediateDecodeWithMetadata(ctx, 1);
224+
const char* partial = CandidateTranscriptToString(&result->transcripts[0]);
225+
if (last == nullptr || strcmp(last, partial)) {
226+
printf("%s\n", partial);
227+
last = partial;
228+
} else {
229+
free((char *) partial);
230+
}
231+
if (prev != nullptr && prev != last) {
232+
free((char *) prev);
233+
}
234+
DS_FreeMetadata((Metadata *)result);
235+
}
236+
const Metadata* result = DS_FinishStreamWithMetadata(ctx, 1);
237+
res.string = CandidateTranscriptToString(&result->transcripts[0]);
238+
DS_FreeMetadata((Metadata *)result);
239+
free((char *) last);
208240
} else {
209241
res.string = DS_SpeechToText(aCtx, aBuffer, aBufferSize);
210242
}

parse_valgrind_suppressions.sh

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#! /usr/bin/awk -f
2+
# A script to extract the actual suppression info from the output of (for example) valgrind --leak-check=full --show-reachable=yes --error-limit=no --gen-suppressions=all ./minimal
3+
# The desired bits are between ^{ and ^} (including the braces themselves).
4+
# The combined output should either be appended to /usr/lib/valgrind/default.supp, or placed in a .supp of its own
5+
# If the latter, either tell valgrind about it each time with --suppressions=<filename>, or add that line to ~/.valgrindrc
6+
7+
# NB This script uses the |& operator, which I believe is gawk-specific. In case of failure, check that you're using gawk rather than some other awk
8+
9+
# The script looks for suppressions. When it finds one it stores it temporarily in an array,
10+
# and also feeds it line by line to the external app 'md5sum' which generates a unique checksum for it.
11+
# The checksum is used as an index in a different array. If an item with that index already exists the suppression must be a duplicate and is discarded.
12+
13+
BEGIN { suppression=0; md5sum = "md5sum" }
14+
# If the line begins with '{', it's the start of a supression; so set the var and initialise things
15+
/^{/ {
16+
suppression=1; i=0; next
17+
}
18+
# If the line begins with '}' its the end of a suppression
19+
/^}/ {
20+
if (suppression)
21+
{ suppression=0;
22+
close(md5sum, "to") # We've finished sending data to md5sum, so close that part of the pipe
23+
ProcessInput() # Do the slightly-complicated stuff in functions
24+
delete supparray # We don't want subsequent suppressions to append to it!
25+
}
26+
}
27+
# Otherwise, it's a normal line. If we're inside a supression, store it, and pipe it to md5sum. Otherwise it's cruft, so ignore it
28+
{ if (suppression)
29+
{
30+
supparray[++i] = $0
31+
print |& md5sum
32+
}
33+
}
34+
35+
36+
function ProcessInput()
37+
{
38+
# Pipe the result from md5sum, then close it
39+
md5sum |& getline result
40+
close(md5sum)
41+
# gawk can't cope with enormous ints like $result would be, so stringify it first by prefixing a definite string
42+
resultstring = "prefix"result
43+
44+
if (! (resultstring in chksum_array) )
45+
{ chksum_array[resultstring] = 0; # This checksum hasn't been seen before, so add it to the array
46+
OutputSuppression() # and output the contents of the suppression
47+
}
48+
}
49+
50+
function OutputSuppression()
51+
{
52+
# A suppression is surrounded by '{' and '}'. Its data was stored line by line in the array
53+
print "{"
54+
for (n=1; n <= i; ++n)
55+
{ print supparray[n] }
56+
print "}"
57+
}

taskcluster/.shared.yml

+3
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ tensorflow:
2828
packages_win:
2929
pacman: 'pacman --noconfirm -S patch unzip tar'
3030
msys64: 'ln -s $USERPROFILE/msys64 $TASKCLUSTER_TASK_DIR/msys64'
31+
valgrind:
32+
packages_bionic:
33+
apt: 'apt-get -qq update && apt-get -qq -y install python3 python3-simplejson python-is-python3 valgrind'
3134
java:
3235
packages_xenial:
3336
apt: 'apt-get -qq -y install curl software-properties-common wget unzip && add-apt-repository --yes ppa:openjdk-r/ppa && apt-get -qq update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y --force-yes install openjdk-8-jdk && java -version && update-ca-certificates -f'

taskcluster/tc-tests-utils.sh

+3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ source ${tc_tests_utils}/tc-node-utils.sh
2222
# Scoping of .Net-related tooling
2323
source ${tc_tests_utils}/tc-dotnet-utils.sh
2424

25+
# For checking with valgrind
26+
source ${tc_tests_utils}/tc-valgrind-utils.sh
27+
2528
# Functions that controls directly the build process
2629
source ${tc_tests_utils}/tc-build-utils.sh
2730

taskcluster/tc-valgrind-cpp.sh

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
3+
set -xe
4+
5+
kind=$1
6+
7+
source $(dirname "$0")/tc-tests-utils.sh
8+
9+
set_ldc_sample_filename "16k"
10+
11+
download_material "${TASKCLUSTER_TMP_DIR}/ds"
12+
13+
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
14+
15+
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
16+
17+
if [ "${kind}" = "--basic" ]; then
18+
run_valgrind_basic
19+
run_valgrind_stream
20+
fi
21+
22+
if [ "${kind}" = "--metadata" ]; then
23+
run_valgrind_extended
24+
run_valgrind_extended_stream
25+
fi

taskcluster/tc-valgrind-cpp_tflite.sh

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/bin/bash
2+
3+
set -xe
4+
5+
kind=$1
6+
7+
source $(dirname "$0")/tc-tests-utils.sh
8+
9+
set_ldc_sample_filename "16k"
10+
11+
model_source=${DEEPSPEECH_TEST_MODEL//.pb/.tflite}
12+
model_name=$(basename "${model_source}")
13+
model_name_mmap=$(basename "${model_source}")
14+
15+
download_material "${TASKCLUSTER_TMP_DIR}/ds"
16+
17+
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
18+
19+
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
20+
21+
if [ "${kind}" = "--basic" ]; then
22+
run_valgrind_basic
23+
run_valgrind_stream
24+
fi
25+
26+
if [ "${kind}" = "--metadata" ]; then
27+
run_valgrind_extended
28+
run_valgrind_extended_stream
29+
fi

taskcluster/tc-valgrind-utils.sh

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#!/bin/bash
2+
3+
set -xe
4+
5+
# How to generate / update valgrind suppression lists:
6+
# https://wiki.wxwidgets.org/Valgrind_Suppression_File_Howto#How_to_make_a_suppression_file
7+
#
8+
# $ valgrind --leak-check=full --show-reachable=yes --error-limit=no --gen-suppressions=all --log-file=minimalraw.log ./minimal
9+
# $ cat ./minimalraw.log | ./parse_valgrind_suppressions.sh > minimal.supp
10+
11+
VALGRIND_CMD=${VALGRIND_CMD:-"valgrind \
12+
--error-exitcode=4242 \
13+
--errors-for-leak-kinds=all \
14+
--leak-check=full \
15+
--leak-resolution=high \
16+
--show-reachable=yes \
17+
--track-origins=yes \
18+
--gen-suppressions=all \
19+
--suppressions=${DS_DSDIR}/ds_generic.supp \
20+
--suppressions=${DS_DSDIR}/ds_lib.supp \
21+
--suppressions=${DS_DSDIR}/ds_sox.supp \
22+
--suppressions=${DS_DSDIR}/ds_openfst.supp \
23+
--suppressions=${DS_DSDIR}/tensorflow_full_runtime.supp \
24+
--suppressions=${DS_DSDIR}/tensorflow_tflite_runtime.supp \
25+
"}
26+
27+
run_valgrind_basic()
28+
{
29+
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_basic.log \
30+
deepspeech \
31+
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
32+
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
33+
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
34+
-t
35+
}
36+
37+
run_valgrind_stream()
38+
{
39+
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_stream.log \
40+
deepspeech \
41+
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
42+
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
43+
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
44+
--stream 320 \
45+
-t
46+
}
47+
48+
run_valgrind_extended()
49+
{
50+
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_extended.log \
51+
deepspeech \
52+
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
53+
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
54+
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
55+
--extended \
56+
-t
57+
}
58+
59+
run_valgrind_extended_stream()
60+
{
61+
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_stream_extended.log \
62+
deepspeech \
63+
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
64+
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
65+
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
66+
--extended_stream 320 \
67+
-t
68+
}

taskcluster/test-cpp_16k_tflite-linux-amd64-opt.yml

100755100644
File mode changed.

taskcluster/test-cpp_8k_tflite-linux-amd64-opt.yml

100755100644
File mode changed.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
build:
2+
template_file: test-linux-opt-base.tyml
3+
dependencies:
4+
- "linux-amd64-tflite-dbg"
5+
- "test-training_16k-linux-amd64-py36m-opt"
6+
test_model_task: "test-training_16k-linux-amd64-py36m-opt"
7+
docker_image: "ubuntu:20.04"
8+
system_setup:
9+
>
10+
${valgrind.packages_bionic.apt}
11+
args:
12+
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-valgrind-cpp_tflite.sh --basic"
13+
workerType: "${docker.dsHighMemTests}"
14+
metadata:
15+
name: "DeepSpeech Linux AMD64 valgrind C++ TFLite basic tests"
16+
description: "Testing basic DeepSpeech valgrind C++ TFLite for Linux/AMD64"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
build:
2+
template_file: test-linux-opt-base.tyml
3+
dependencies:
4+
- "linux-amd64-cpu-dbg"
5+
- "test-training_16k-linux-amd64-py36m-opt"
6+
test_model_task: "test-training_16k-linux-amd64-py36m-opt"
7+
docker_image: "ubuntu:20.04"
8+
system_setup:
9+
>
10+
${valgrind.packages_bionic.apt}
11+
args:
12+
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-valgrind-cpp.sh --basic"
13+
workerType: "${docker.dsHighMemTests}"
14+
metadata:
15+
name: "DeepSpeech Linux AMD64 valgrind C++ basic tests"
16+
description: "Testing basic DeepSpeech valgrind C++ for Linux/AMD64"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
build:
2+
template_file: test-linux-opt-base.tyml
3+
dependencies:
4+
- "linux-amd64-tflite-dbg"
5+
- "test-training_16k-linux-amd64-py36m-opt"
6+
test_model_task: "test-training_16k-linux-amd64-py36m-opt"
7+
docker_image: "ubuntu:20.04"
8+
system_setup:
9+
>
10+
${valgrind.packages_bionic.apt}
11+
args:
12+
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-valgrind-cpp_tflite.sh --metadata"
13+
workerType: "${docker.dsHighMemTests}"
14+
metadata:
15+
name: "DeepSpeech Linux AMD64 valgrind C++ TFLite metadata tests"
16+
description: "Testing metadata DeepSpeech valgrind C++ TFLite for Linux/AMD64"

0 commit comments

Comments
 (0)