runtests: improve error logging

Give more information about test harness error conditions to help figure
out what might be wrong. Print some internal test state when SIGUSR1 is
sent to runtests.pl.

Ref: #11328
This commit is contained in:
Dan Fandrich 2023-06-14 15:27:07 -07:00
Родитель 847e42d868
Коммит 2fe52412b1
2 изменённых файлов: 30 добавлений и 8 удалений

Просмотреть файл

@ -185,9 +185,10 @@ sub runner_init {
# Create a separate process in multiprocess mode # Create a separate process in multiprocess mode
my $child = fork(); my $child = fork();
if(0 == $child) { if(0 == $child) {
# TODO: set up a better signal handler # TODO: set up better signal handlers
$SIG{INT} = 'IGNORE'; $SIG{INT} = 'IGNORE';
$SIG{TERM} = 'IGNORE'; $SIG{TERM} = 'IGNORE';
$SIG{USR1} = 'IGNORE';
$thisrunnerid = $$; $thisrunnerid = $$;
print "Runner $thisrunnerid starting\n" if($verbose); print "Runner $thisrunnerid starting\n" if($verbose);
@ -1270,6 +1271,7 @@ sub runnerar_ready {
$maxfileno = $fd; $maxfileno = $fd;
} }
} }
$maxfileno || die "Internal error: no runners are available to wait on\n";
# Wait for any pipe from any runner to be ready # Wait for any pipe from any runner to be ready
# TODO: this is relatively slow with hundreds of fds # TODO: this is relatively slow with hundreds of fds
@ -1293,13 +1295,13 @@ sub ipcrecv {
my $err; my $err;
my $datalen; my $datalen;
while(! defined ($err = sysread($runnerr, $datalen, 4)) || $err <= 0) { while(! defined ($err = sysread($runnerr, $datalen, 4)) || $err <= 0) {
$!{EINTR} || die "error in ipcrecv: $!\n"; $!{EINTR} || die "error $err in ipcrecv: $! in runner $$ for $LOGDIR\n";
# system call was interrupted, probably by ^C; restart it so we stay in sync # system call was interrupted, probably by ^C; restart it so we stay in sync
} }
my $len=unpack("L", $datalen); my $len=unpack("L", $datalen);
my $buf; my $buf;
while(! defined ($err = sysread($runnerr, $buf, $len)) || $err <= 0) { while(! defined ($err = sysread($runnerr, $buf, $len)) || $err <= 0) {
$!{EINTR} || die "error in ipcrecv: $!\n"; $!{EINTR} || die "error $err in ipcrecv: $! in runner $$ for $LOGDIR\n";
# system call was interrupted, probably by ^C; restart it so we stay in sync # system call was interrupted, probably by ^C; restart it so we stay in sync
} }
@ -1336,7 +1338,7 @@ sub ipcrecv {
# Marshall the results to return # Marshall the results to return
$buf = freeze \@res; $buf = freeze \@res;
syswrite($runnerw, (pack "L", length($buf)) . $buf); defined syswrite($runnerw, (pack "L", length($buf)) . $buf) || $!{EINTR} || die "error $err in ipcrecv write: $! in runner $$ for $LOGDIR\n";
return 0; return 0;
} }

Просмотреть файл

@ -250,12 +250,29 @@ sub singletest_dumplogs {
sub catch_zap { sub catch_zap {
my $signame = shift; my $signame = shift;
logmsg "runtests.pl received SIG$signame, exiting\n"; print "runtests.pl received SIG$signame, exiting\r\n";
$globalabort = 1; $globalabort = 1;
} }
$SIG{INT} = \&catch_zap; $SIG{INT} = \&catch_zap;
$SIG{TERM} = \&catch_zap; $SIG{TERM} = \&catch_zap;
sub catch_usr1 {
print "runtests.pl internal state:\r\n";
print scalar(%runnersrunning) . " busy test runner(s) of " . scalar(keys %runnerids) . "\r\n";
foreach my $rid (sort(keys(%runnersrunning))) {
my $runnernum = "unknown";
foreach my $rnum (keys %runnerids) {
if($runnerids{$rnum} == $rid) {
$runnernum = $rnum;
last;
}
}
print "Runner $runnernum (id $rid) running test $runnersrunning{$rid} in state $singletest_state{$rid}\r\n";
}
}
$SIG{USR1} = \&catch_usr1;
########################################################################## ##########################################################################
# Clear all possible '*_proxy' environment variables for various protocols # Clear all possible '*_proxy' environment variables for various protocols
# to prevent them to interfere with our testing! # to prevent them to interfere with our testing!
@ -2734,14 +2751,17 @@ while () {
if($globalabort) { if($globalabort) {
logmsg singletest_dumplogs(); logmsg singletest_dumplogs();
logmsg "Aborting tests\n"; logmsg "Aborting tests\n";
logmsg "Waiting for tests to finish...\n"; logmsg "Waiting for " . scalar((keys %runnersrunning)) . " outstanding test(s) to finish...\n";
# Wait for the last requests to complete and throw them away so # Wait for the last requests to complete and throw them away so
# that IPC calls & responses stay in sync # that IPC calls & responses stay in sync
# TODO: send a signal to the runners to interrupt a long test # TODO: send a signal to the runners to interrupt a long test
foreach my $rid (keys %runnersrunning) { foreach my $rid (keys %runnersrunning) {
runnerar($rid); runnerar($rid);
delete $runnersrunning{$rid}; delete $runnersrunning{$rid};
logmsg ".";
$| = 1;
} }
logmsg "\n";
last; last;
} }
@ -2770,7 +2790,7 @@ while () {
# See if we've completed all the tests # See if we've completed all the tests
if(!scalar(%runnersrunning)) { if(!scalar(%runnersrunning)) {
# No runners are running; we must be done # No runners are running; we must be done
scalar(@runtests) && die 'Internal error: tests to run'; scalar(@runtests) && die 'Internal error: still have tests to run';
last; last;
} }
@ -2783,7 +2803,7 @@ while () {
if($ridready) { if($ridready) {
# This runner is ready to be serviced # This runner is ready to be serviced
my $testnum = $runnersrunning{$ridready}; my $testnum = $runnersrunning{$ridready};
defined $testnum || die 'Internal error: test for runner unknown'; defined $testnum || die "Internal error: test for runner $ridready unknown";
delete $runnersrunning{$ridready}; delete $runnersrunning{$ridready};
my ($error, $again) = singletest($ridready, $testnum, $countforrunner{$ridready}, $totaltests); my ($error, $again) = singletest($ridready, $testnum, $countforrunner{$ridready}, $totaltests);
if($again) { if($again) {