зеркало из https://github.com/mozilla/DeepSpeech.git
Fix #3355: Add valgrind runs
This commit is contained in:
Родитель
86bba80b0e
Коммит
fdd663829a
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
libgomp_malloc
|
||||
Memcheck:Leak
|
||||
match-leak-kinds: reachable
|
||||
fun:malloc
|
||||
obj:/usr/lib/*/libgomp.so.1.0.0
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
deepspeech_tflite_error_reporter
|
||||
Memcheck:Leak
|
||||
match-leak-kinds: reachable
|
||||
fun:_Znwm
|
||||
fun:_ZN6tflite20DefaultErrorReporterEv
|
||||
fun:_ZN16TFLiteModelState4initEPKc
|
||||
fun:DS_CreateModel
|
||||
fun:main
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
sox_effect_gain
|
||||
Memcheck:Leak
|
||||
match-leak-kinds: reachable
|
||||
fun:malloc
|
||||
fun:realloc
|
||||
fun:lsx_realloc
|
||||
fun:lsx_usage_lines
|
||||
fun:lsx_gain_effect_fn
|
||||
fun:sox_find_effect
|
||||
fun:_Z14GetAudioBufferPKci
|
||||
fun:_Z11ProcessFileP10ModelStatePKcb
|
||||
fun:main
|
||||
}
|
||||
{
|
||||
sox_effect_rate
|
||||
Memcheck:Leak
|
||||
match-leak-kinds: reachable
|
||||
fun:malloc
|
||||
fun:realloc
|
||||
fun:lsx_realloc
|
||||
fun:lsx_usage_lines
|
||||
fun:lsx_rate_effect_fn
|
||||
fun:sox_find_effect
|
||||
fun:_Z14GetAudioBufferPKci
|
||||
fun:_Z11ProcessFileP10ModelStatePKcb
|
||||
fun:main
|
||||
}
|
||||
{
|
||||
sox_effect_flanger
|
||||
Memcheck:Leak
|
||||
match-leak-kinds: reachable
|
||||
fun:malloc
|
||||
fun:realloc
|
||||
fun:lsx_realloc
|
||||
fun:lsx_usage_lines
|
||||
fun:lsx_flanger_effect_fn
|
||||
fun:sox_find_effect
|
||||
fun:_Z14GetAudioBufferPKci
|
||||
fun:_Z11ProcessFileP10ModelStatePKcb
|
||||
fun:main
|
||||
}
|
|
@ -38,6 +38,8 @@ int json_candidate_transcripts = 3;
|
|||
|
||||
int stream_size = 0;
|
||||
|
||||
int extended_stream_size = 0;
|
||||
|
||||
char* hot_words = NULL;
|
||||
|
||||
void PrintHelp(const char* bin)
|
||||
|
@ -58,6 +60,7 @@ void PrintHelp(const char* bin)
|
|||
"\t--json\t\t\t\tExtended output, shows word timings as JSON\n"
|
||||
"\t--candidate_transcripts NUMBER\tNumber of candidate transcripts to include in JSON output\n"
|
||||
"\t--stream size\t\t\tRun in stream mode, output intermediate results\n"
|
||||
"\t--extended_stream size\t\t\tRun in stream mode using metadata output, output intermediate results\n"
|
||||
"\t--hot_words\t\t\tHot-words and their boosts. Word:Boost pairs are comma-separated\n"
|
||||
"\t--help\t\t\t\tShow help\n"
|
||||
"\t--version\t\t\tPrint version and exits\n";
|
||||
|
@ -82,6 +85,7 @@ bool ProcessArgs(int argc, char** argv)
|
|||
{"json", no_argument, nullptr, 'j'},
|
||||
{"candidate_transcripts", required_argument, nullptr, 150},
|
||||
{"stream", required_argument, nullptr, 's'},
|
||||
{"extended_stream", required_argument, nullptr, 'S'},
|
||||
{"hot_words", required_argument, nullptr, 'w'},
|
||||
{"version", no_argument, nullptr, 'v'},
|
||||
{"help", no_argument, nullptr, 'h'},
|
||||
|
@ -144,6 +148,10 @@ bool ProcessArgs(int argc, char** argv)
|
|||
stream_size = atoi(optarg);
|
||||
break;
|
||||
|
||||
case 'S':
|
||||
extended_stream_size = atoi(optarg);
|
||||
break;
|
||||
|
||||
case 'v':
|
||||
has_versions = true;
|
||||
break;
|
||||
|
@ -172,7 +180,7 @@ bool ProcessArgs(int argc, char** argv)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (stream_size < 0 || stream_size % 160 != 0) {
|
||||
if ((stream_size < 0 || stream_size % 160 != 0) || (extended_stream_size < 0 || extended_stream_size % 160 != 0)) {
|
||||
std::cout <<
|
||||
"Stream buffer size must be multiples of 160\n";
|
||||
return false;
|
||||
|
|
|
@ -205,6 +205,38 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
|
|||
DS_FreeString((char *) last);
|
||||
}
|
||||
res.string = DS_FinishStream(ctx);
|
||||
} else if (extended_stream_size > 0) {
|
||||
StreamingState* ctx;
|
||||
int status = DS_CreateStream(aCtx, &ctx);
|
||||
if (status != DS_ERR_OK) {
|
||||
res.string = strdup("");
|
||||
return res;
|
||||
}
|
||||
size_t off = 0;
|
||||
const char *last = nullptr;
|
||||
const char *prev = nullptr;
|
||||
while (off < aBufferSize) {
|
||||
size_t cur = aBufferSize - off > extended_stream_size ? extended_stream_size : aBufferSize - off;
|
||||
DS_FeedAudioContent(ctx, aBuffer + off, cur);
|
||||
off += cur;
|
||||
prev = last;
|
||||
const Metadata* result = DS_IntermediateDecodeWithMetadata(ctx, 1);
|
||||
const char* partial = CandidateTranscriptToString(&result->transcripts[0]);
|
||||
if (last == nullptr || strcmp(last, partial)) {
|
||||
printf("%s\n", partial);
|
||||
last = partial;
|
||||
} else {
|
||||
free((char *) partial);
|
||||
}
|
||||
if (prev != nullptr && prev != last) {
|
||||
free((char *) prev);
|
||||
}
|
||||
DS_FreeMetadata((Metadata *)result);
|
||||
}
|
||||
const Metadata* result = DS_FinishStreamWithMetadata(ctx, 1);
|
||||
res.string = CandidateTranscriptToString(&result->transcripts[0]);
|
||||
DS_FreeMetadata((Metadata *)result);
|
||||
free((char *) last);
|
||||
} else {
|
||||
res.string = DS_SpeechToText(aCtx, aBuffer, aBufferSize);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
#! /usr/bin/awk -f
|
||||
# A script to extract the actual suppression info from the output of (for example) valgrind --leak-check=full --show-reachable=yes --error-limit=no --gen-suppressions=all ./minimal
|
||||
# The desired bits are between ^{ and ^} (including the braces themselves).
|
||||
# The combined output should either be appended to /usr/lib/valgrind/default.supp, or placed in a .supp of its own
|
||||
# If the latter, either tell valgrind about it each time with --suppressions=<filename>, or add that line to ~/.valgrindrc
|
||||
|
||||
# NB This script uses the |& operator, which I believe is gawk-specific. In case of failure, check that you're using gawk rather than some other awk
|
||||
|
||||
# The script looks for suppressions. When it finds one it stores it temporarily in an array,
|
||||
# and also feeds it line by line to the external app 'md5sum' which generates a unique checksum for it.
|
||||
# The checksum is used as an index in a different array. If an item with that index already exists the suppression must be a duplicate and is discarded.
|
||||
|
||||
BEGIN { suppression=0; md5sum = "md5sum" }
|
||||
# If the line begins with '{', it's the start of a supression; so set the var and initialise things
|
||||
/^{/ {
|
||||
suppression=1; i=0; next
|
||||
}
|
||||
# If the line begins with '}' its the end of a suppression
|
||||
/^}/ {
|
||||
if (suppression)
|
||||
{ suppression=0;
|
||||
close(md5sum, "to") # We've finished sending data to md5sum, so close that part of the pipe
|
||||
ProcessInput() # Do the slightly-complicated stuff in functions
|
||||
delete supparray # We don't want subsequent suppressions to append to it!
|
||||
}
|
||||
}
|
||||
# Otherwise, it's a normal line. If we're inside a supression, store it, and pipe it to md5sum. Otherwise it's cruft, so ignore it
|
||||
{ if (suppression)
|
||||
{
|
||||
supparray[++i] = $0
|
||||
print |& md5sum
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function ProcessInput()
|
||||
{
|
||||
# Pipe the result from md5sum, then close it
|
||||
md5sum |& getline result
|
||||
close(md5sum)
|
||||
# gawk can't cope with enormous ints like $result would be, so stringify it first by prefixing a definite string
|
||||
resultstring = "prefix"result
|
||||
|
||||
if (! (resultstring in chksum_array) )
|
||||
{ chksum_array[resultstring] = 0; # This checksum hasn't been seen before, so add it to the array
|
||||
OutputSuppression() # and output the contents of the suppression
|
||||
}
|
||||
}
|
||||
|
||||
function OutputSuppression()
|
||||
{
|
||||
# A suppression is surrounded by '{' and '}'. Its data was stored line by line in the array
|
||||
print "{"
|
||||
for (n=1; n <= i; ++n)
|
||||
{ print supparray[n] }
|
||||
print "}"
|
||||
}
|
|
@ -28,6 +28,9 @@ tensorflow:
|
|||
packages_win:
|
||||
pacman: 'pacman --noconfirm -S patch unzip tar'
|
||||
msys64: 'ln -s $USERPROFILE/msys64 $TASKCLUSTER_TASK_DIR/msys64'
|
||||
valgrind:
|
||||
packages_bionic:
|
||||
apt: 'apt-get -qq update && apt-get -qq -y install python3 python3-simplejson python-is-python3 valgrind'
|
||||
java:
|
||||
packages_xenial:
|
||||
apt: 'apt-get -qq -y install curl software-properties-common wget unzip && add-apt-repository --yes ppa:openjdk-r/ppa && apt-get -qq update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y --force-yes install openjdk-8-jdk && java -version && update-ca-certificates -f'
|
||||
|
|
|
@ -22,6 +22,9 @@ source ${tc_tests_utils}/tc-node-utils.sh
|
|||
# Scoping of .Net-related tooling
|
||||
source ${tc_tests_utils}/tc-dotnet-utils.sh
|
||||
|
||||
# For checking with valgrind
|
||||
source ${tc_tests_utils}/tc-valgrind-utils.sh
|
||||
|
||||
# Functions that controls directly the build process
|
||||
source ${tc_tests_utils}/tc-build-utils.sh
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
kind=$1
|
||||
|
||||
source $(dirname "$0")/tc-tests-utils.sh
|
||||
|
||||
set_ldc_sample_filename "16k"
|
||||
|
||||
download_material "${TASKCLUSTER_TMP_DIR}/ds"
|
||||
|
||||
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
|
||||
|
||||
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
|
||||
|
||||
if [ "${kind}" = "--basic" ]; then
|
||||
run_valgrind_basic
|
||||
run_valgrind_stream
|
||||
fi
|
||||
|
||||
if [ "${kind}" = "--metadata" ]; then
|
||||
run_valgrind_extended
|
||||
run_valgrind_extended_stream
|
||||
fi
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
kind=$1
|
||||
|
||||
source $(dirname "$0")/tc-tests-utils.sh
|
||||
|
||||
set_ldc_sample_filename "16k"
|
||||
|
||||
model_source=${DEEPSPEECH_TEST_MODEL//.pb/.tflite}
|
||||
model_name=$(basename "${model_source}")
|
||||
model_name_mmap=$(basename "${model_source}")
|
||||
|
||||
download_material "${TASKCLUSTER_TMP_DIR}/ds"
|
||||
|
||||
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
|
||||
|
||||
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
|
||||
|
||||
if [ "${kind}" = "--basic" ]; then
|
||||
run_valgrind_basic
|
||||
run_valgrind_stream
|
||||
fi
|
||||
|
||||
if [ "${kind}" = "--metadata" ]; then
|
||||
run_valgrind_extended
|
||||
run_valgrind_extended_stream
|
||||
fi
|
|
@ -0,0 +1,68 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
# How to generate / update valgrind suppression lists:
|
||||
# https://wiki.wxwidgets.org/Valgrind_Suppression_File_Howto#How_to_make_a_suppression_file
|
||||
#
|
||||
# $ valgrind --leak-check=full --show-reachable=yes --error-limit=no --gen-suppressions=all --log-file=minimalraw.log ./minimal
|
||||
# $ cat ./minimalraw.log | ./parse_valgrind_suppressions.sh > minimal.supp
|
||||
|
||||
VALGRIND_CMD=${VALGRIND_CMD:-"valgrind \
|
||||
--error-exitcode=4242 \
|
||||
--errors-for-leak-kinds=all \
|
||||
--leak-check=full \
|
||||
--leak-resolution=high \
|
||||
--show-reachable=yes \
|
||||
--track-origins=yes \
|
||||
--gen-suppressions=all \
|
||||
--suppressions=${DS_DSDIR}/ds_generic.supp \
|
||||
--suppressions=${DS_DSDIR}/ds_lib.supp \
|
||||
--suppressions=${DS_DSDIR}/ds_sox.supp \
|
||||
--suppressions=${DS_DSDIR}/ds_openfst.supp \
|
||||
--suppressions=${DS_DSDIR}/tensorflow_full_runtime.supp \
|
||||
--suppressions=${DS_DSDIR}/tensorflow_tflite_runtime.supp \
|
||||
"}
|
||||
|
||||
run_valgrind_basic()
|
||||
{
|
||||
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_basic.log \
|
||||
deepspeech \
|
||||
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
|
||||
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
|
||||
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
|
||||
-t
|
||||
}
|
||||
|
||||
run_valgrind_stream()
|
||||
{
|
||||
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_stream.log \
|
||||
deepspeech \
|
||||
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
|
||||
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
|
||||
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
|
||||
--stream 320 \
|
||||
-t
|
||||
}
|
||||
|
||||
run_valgrind_extended()
|
||||
{
|
||||
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_extended.log \
|
||||
deepspeech \
|
||||
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
|
||||
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
|
||||
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
|
||||
--extended \
|
||||
-t
|
||||
}
|
||||
|
||||
run_valgrind_extended_stream()
|
||||
{
|
||||
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_stream_extended.log \
|
||||
deepspeech \
|
||||
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
|
||||
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
|
||||
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
|
||||
--extended_stream 320 \
|
||||
-t
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
build:
|
||||
template_file: test-linux-opt-base.tyml
|
||||
dependencies:
|
||||
- "linux-amd64-tflite-dbg"
|
||||
- "test-training_16k-linux-amd64-py36m-opt"
|
||||
test_model_task: "test-training_16k-linux-amd64-py36m-opt"
|
||||
docker_image: "ubuntu:20.04"
|
||||
system_setup:
|
||||
>
|
||||
${valgrind.packages_bionic.apt}
|
||||
args:
|
||||
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-valgrind-cpp_tflite.sh --basic"
|
||||
workerType: "${docker.dsHighMemTests}"
|
||||
metadata:
|
||||
name: "DeepSpeech Linux AMD64 valgrind C++ TFLite basic tests"
|
||||
description: "Testing basic DeepSpeech valgrind C++ TFLite for Linux/AMD64"
|
|
@ -0,0 +1,16 @@
|
|||
build:
|
||||
template_file: test-linux-opt-base.tyml
|
||||
dependencies:
|
||||
- "linux-amd64-cpu-dbg"
|
||||
- "test-training_16k-linux-amd64-py36m-opt"
|
||||
test_model_task: "test-training_16k-linux-amd64-py36m-opt"
|
||||
docker_image: "ubuntu:20.04"
|
||||
system_setup:
|
||||
>
|
||||
${valgrind.packages_bionic.apt}
|
||||
args:
|
||||
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-valgrind-cpp.sh --basic"
|
||||
workerType: "${docker.dsHighMemTests}"
|
||||
metadata:
|
||||
name: "DeepSpeech Linux AMD64 valgrind C++ basic tests"
|
||||
description: "Testing basic DeepSpeech valgrind C++ for Linux/AMD64"
|
|
@ -0,0 +1,16 @@
|
|||
build:
|
||||
template_file: test-linux-opt-base.tyml
|
||||
dependencies:
|
||||
- "linux-amd64-tflite-dbg"
|
||||
- "test-training_16k-linux-amd64-py36m-opt"
|
||||
test_model_task: "test-training_16k-linux-amd64-py36m-opt"
|
||||
docker_image: "ubuntu:20.04"
|
||||
system_setup:
|
||||
>
|
||||
${valgrind.packages_bionic.apt}
|
||||
args:
|
||||
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-valgrind-cpp_tflite.sh --metadata"
|
||||
workerType: "${docker.dsHighMemTests}"
|
||||
metadata:
|
||||
name: "DeepSpeech Linux AMD64 valgrind C++ TFLite metadata tests"
|
||||
description: "Testing metadata DeepSpeech valgrind C++ TFLite for Linux/AMD64"
|
|
@ -0,0 +1,16 @@
|
|||
build:
|
||||
template_file: test-linux-opt-base.tyml
|
||||
dependencies:
|
||||
- "linux-amd64-cpu-dbg"
|
||||
- "test-training_16k-linux-amd64-py36m-opt"
|
||||
test_model_task: "test-training_16k-linux-amd64-py36m-opt"
|
||||
docker_image: "ubuntu:20.04"
|
||||
system_setup:
|
||||
>
|
||||
${valgrind.packages_bionic.apt}
|
||||
args:
|
||||
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-valgrind-cpp.sh --metadata"
|
||||
workerType: "${docker.dsHighMemTests}"
|
||||
metadata:
|
||||
name: "DeepSpeech Linux AMD64 valgrind C++ metadata tests"
|
||||
description: "Testing metadata DeepSpeech valgrind C++ for Linux/AMD64"
|
|
@ -34,6 +34,7 @@ then:
|
|||
DECODER_ARTIFACTS_ROOT: https://community-tc.services.mozilla.com/api/queue/v1/task/${linux_amd64_ctc}/artifacts/public
|
||||
PIP_DEFAULT_TIMEOUT: "60"
|
||||
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
|
||||
DEBIAN_FRONTEND: "noninteractive"
|
||||
|
||||
command:
|
||||
- "/bin/bash"
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче