From 2fe52412b1b6ce46f1437c5f7758215a2b9ce05e Mon Sep 17 00:00:00 2001 From: Dan Fandrich Date: Wed, 14 Jun 2023 15:27:07 -0700 Subject: [PATCH] runtests: improve error logging Give more information about test harness error conditions to help figure out what might be wrong. Print some internal test state when SIGUSR1 is sent to runtests.pl. Ref: #11328 --- tests/runner.pm | 10 ++++++---- tests/runtests.pl | 28 ++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/tests/runner.pm b/tests/runner.pm index 5627b5517..c4d37f8e3 100644 --- a/tests/runner.pm +++ b/tests/runner.pm @@ -185,9 +185,10 @@ sub runner_init { # Create a separate process in multiprocess mode my $child = fork(); if(0 == $child) { - # TODO: set up a better signal handler + # TODO: set up better signal handlers $SIG{INT} = 'IGNORE'; $SIG{TERM} = 'IGNORE'; + $SIG{USR1} = 'IGNORE'; $thisrunnerid = $$; print "Runner $thisrunnerid starting\n" if($verbose); @@ -1270,6 +1271,7 @@ sub runnerar_ready { $maxfileno = $fd; } } + $maxfileno || die "Internal error: no runners are available to wait on\n"; # Wait for any pipe from any runner to be ready # TODO: this is relatively slow with hundreds of fds @@ -1293,13 +1295,13 @@ sub ipcrecv { my $err; my $datalen; while(! defined ($err = sysread($runnerr, $datalen, 4)) || $err <= 0) { - $!{EINTR} || die "error in ipcrecv: $!\n"; + $!{EINTR} || die "error $err in ipcrecv: $! in runner $$ for $LOGDIR\n"; # system call was interrupted, probably by ^C; restart it so we stay in sync } my $len=unpack("L", $datalen); my $buf; while(! defined ($err = sysread($runnerr, $buf, $len)) || $err <= 0) { - $!{EINTR} || die "error in ipcrecv: $!\n"; + $!{EINTR} || die "error $err in ipcrecv: $! in runner $$ for $LOGDIR\n"; # system call was interrupted, probably by ^C; restart it so we stay in sync } @@ -1336,7 +1338,7 @@ sub ipcrecv { # Marshall the results to return $buf = freeze \@res; - syswrite($runnerw, (pack "L", length($buf)) . $buf); + defined syswrite($runnerw, (pack "L", length($buf)) . $buf) || $!{EINTR} || die "error $err in ipcrecv write: $! in runner $$ for $LOGDIR\n"; return 0; } diff --git a/tests/runtests.pl b/tests/runtests.pl index 83933bd7b..2c10c1277 100755 --- a/tests/runtests.pl +++ b/tests/runtests.pl @@ -250,12 +250,29 @@ sub singletest_dumplogs { sub catch_zap { my $signame = shift; - logmsg "runtests.pl received SIG$signame, exiting\n"; + print "runtests.pl received SIG$signame, exiting\r\n"; $globalabort = 1; } $SIG{INT} = \&catch_zap; $SIG{TERM} = \&catch_zap; +sub catch_usr1 { + print "runtests.pl internal state:\r\n"; + print scalar(%runnersrunning) . " busy test runner(s) of " . scalar(keys %runnerids) . "\r\n"; + foreach my $rid (sort(keys(%runnersrunning))) { + my $runnernum = "unknown"; + foreach my $rnum (keys %runnerids) { + if($runnerids{$rnum} == $rid) { + $runnernum = $rnum; + last; + } + } + print "Runner $runnernum (id $rid) running test $runnersrunning{$rid} in state $singletest_state{$rid}\r\n"; + } +} + +$SIG{USR1} = \&catch_usr1; + ########################################################################## # Clear all possible '*_proxy' environment variables for various protocols # to prevent them to interfere with our testing! @@ -2734,14 +2751,17 @@ while () { if($globalabort) { logmsg singletest_dumplogs(); logmsg "Aborting tests\n"; - logmsg "Waiting for tests to finish...\n"; + logmsg "Waiting for " . scalar((keys %runnersrunning)) . " outstanding test(s) to finish...\n"; # Wait for the last requests to complete and throw them away so # that IPC calls & responses stay in sync # TODO: send a signal to the runners to interrupt a long test foreach my $rid (keys %runnersrunning) { runnerar($rid); delete $runnersrunning{$rid}; + logmsg "."; + $| = 1; } + logmsg "\n"; last; } @@ -2770,7 +2790,7 @@ while () { # See if we've completed all the tests if(!scalar(%runnersrunning)) { # No runners are running; we must be done - scalar(@runtests) && die 'Internal error: tests to run'; + scalar(@runtests) && die 'Internal error: still have tests to run'; last; } @@ -2783,7 +2803,7 @@ while () { if($ridready) { # This runner is ready to be serviced my $testnum = $runnersrunning{$ridready}; - defined $testnum || die 'Internal error: test for runner unknown'; + defined $testnum || die "Internal error: test for runner $ridready unknown"; delete $runnersrunning{$ridready}; my ($error, $again) = singletest($ridready, $testnum, $countforrunner{$ridready}, $totaltests); if($again) {