runtests: improve error logging

Give more information about test harness error conditions to help figure
out what might be wrong. Print some internal test state when SIGUSR1 is
sent to runtests.pl.

Ref: #11328
This commit is contained in:
Dan Fandrich 2023-06-14 15:27:07 -07:00
Родитель 847e42d868
Коммит 2fe52412b1
2 изменённых файлов: 30 добавлений и 8 удалений

Просмотреть файл

@ -185,9 +185,10 @@ sub runner_init {
# Create a separate process in multiprocess mode
my $child = fork();
if(0 == $child) {
# TODO: set up a better signal handler
# TODO: set up better signal handlers
$SIG{INT} = 'IGNORE';
$SIG{TERM} = 'IGNORE';
$SIG{USR1} = 'IGNORE';
$thisrunnerid = $$;
print "Runner $thisrunnerid starting\n" if($verbose);
@ -1270,6 +1271,7 @@ sub runnerar_ready {
$maxfileno = $fd;
}
}
$maxfileno || die "Internal error: no runners are available to wait on\n";
# Wait for any pipe from any runner to be ready
# TODO: this is relatively slow with hundreds of fds
@ -1293,13 +1295,13 @@ sub ipcrecv {
my $err;
my $datalen;
while(! defined ($err = sysread($runnerr, $datalen, 4)) || $err <= 0) {
$!{EINTR} || die "error in ipcrecv: $!\n";
$!{EINTR} || die "error $err in ipcrecv: $! in runner $$ for $LOGDIR\n";
# system call was interrupted, probably by ^C; restart it so we stay in sync
}
my $len=unpack("L", $datalen);
my $buf;
while(! defined ($err = sysread($runnerr, $buf, $len)) || $err <= 0) {
$!{EINTR} || die "error in ipcrecv: $!\n";
$!{EINTR} || die "error $err in ipcrecv: $! in runner $$ for $LOGDIR\n";
# system call was interrupted, probably by ^C; restart it so we stay in sync
}
@ -1336,7 +1338,7 @@ sub ipcrecv {
# Marshall the results to return
$buf = freeze \@res;
syswrite($runnerw, (pack "L", length($buf)) . $buf);
defined syswrite($runnerw, (pack "L", length($buf)) . $buf) || $!{EINTR} || die "error $err in ipcrecv write: $! in runner $$ for $LOGDIR\n";
return 0;
}

Просмотреть файл

@ -250,12 +250,29 @@ sub singletest_dumplogs {
sub catch_zap {
my $signame = shift;
logmsg "runtests.pl received SIG$signame, exiting\n";
print "runtests.pl received SIG$signame, exiting\r\n";
$globalabort = 1;
}
$SIG{INT} = \&catch_zap;
$SIG{TERM} = \&catch_zap;
sub catch_usr1 {
print "runtests.pl internal state:\r\n";
print scalar(%runnersrunning) . " busy test runner(s) of " . scalar(keys %runnerids) . "\r\n";
foreach my $rid (sort(keys(%runnersrunning))) {
my $runnernum = "unknown";
foreach my $rnum (keys %runnerids) {
if($runnerids{$rnum} == $rid) {
$runnernum = $rnum;
last;
}
}
print "Runner $runnernum (id $rid) running test $runnersrunning{$rid} in state $singletest_state{$rid}\r\n";
}
}
$SIG{USR1} = \&catch_usr1;
##########################################################################
# Clear all possible '*_proxy' environment variables for various protocols
# to prevent them to interfere with our testing!
@ -2734,14 +2751,17 @@ while () {
if($globalabort) {
logmsg singletest_dumplogs();
logmsg "Aborting tests\n";
logmsg "Waiting for tests to finish...\n";
logmsg "Waiting for " . scalar((keys %runnersrunning)) . " outstanding test(s) to finish...\n";
# Wait for the last requests to complete and throw them away so
# that IPC calls & responses stay in sync
# TODO: send a signal to the runners to interrupt a long test
foreach my $rid (keys %runnersrunning) {
runnerar($rid);
delete $runnersrunning{$rid};
logmsg ".";
$| = 1;
}
logmsg "\n";
last;
}
@ -2770,7 +2790,7 @@ while () {
# See if we've completed all the tests
if(!scalar(%runnersrunning)) {
# No runners are running; we must be done
scalar(@runtests) && die 'Internal error: tests to run';
scalar(@runtests) && die 'Internal error: still have tests to run';
last;
}
@ -2783,7 +2803,7 @@ while () {
if($ridready) {
# This runner is ready to be serviced
my $testnum = $runnersrunning{$ridready};
defined $testnum || die 'Internal error: test for runner unknown';
defined $testnum || die "Internal error: test for runner $ridready unknown";
delete $runnersrunning{$ridready};
my ($error, $again) = singletest($ridready, $testnum, $countforrunner{$ridready}, $totaltests);
if($again) {