Bug 540004, part 2: Detect hangs while awaiting synchronous IPC replies (on POSIX). r=bent

This commit is contained in:
Chris Jones 2010-02-09 18:02:54 -06:00
Родитель 188261c0b2
Коммит 4d42ce54b3
7 изменённых файлов: 160 добавлений и 35 удалений

Просмотреть файл

@ -154,10 +154,13 @@ AsyncChannel::Open(Transport* aTransport, MessageLoop* aIOLoop)
void
AsyncChannel::Close()
{
AssertWorkerThread();
{
MutexAutoLock lock(mMutex);
if (ChannelError == mChannelState) {
if (ChannelError == mChannelState ||
ChannelTimeout == mChannelState) {
// See bug 538586: if the listener gets deleted while the
// IO thread's NotifyChannelError event is still enqueued
// and subsequently deletes us, then the error event will
@ -179,23 +182,25 @@ AsyncChannel::Close()
// notify the other side that we're about to close our socket
SendSpecialMessage(new GoodbyeMessage());
mChannelState = ChannelClosing;
// and post the task will do the actual close
mIOLoop->PostTask(
FROM_HERE, NewRunnableMethod(this, &AsyncChannel::OnCloseChannel));
while (ChannelClosing == mChannelState)
mCvar.Wait();
// TODO sort out Close() on this side racing with Close() on the
// other side
mChannelState = ChannelClosed;
SynchronouslyClose();
}
return NotifyChannelClosed();
}
void
AsyncChannel::SynchronouslyClose()
{
AssertWorkerThread();
mMutex.AssertCurrentThreadOwns();
mIOLoop->PostTask(
FROM_HERE, NewRunnableMethod(this, &AsyncChannel::OnCloseChannel));
while (ChannelClosed != mChannelState)
mCvar.Wait();
}
bool
AsyncChannel::Send(Message* msg)
{
@ -369,6 +374,8 @@ AsyncChannel::ReportConnectionError(const char* channelName)
case ChannelOpening:
errorMsg = "Opening channel: not yet ready for send/recv";
break;
case ChannelTimeout:
errorMsg = "Channel timeout: cannot send/recv";
case ChannelError:
errorMsg = "Channel error: cannot send/recv";
break;

Просмотреть файл

@ -75,6 +75,7 @@ protected:
ChannelClosed,
ChannelOpening,
ChannelConnected,
ChannelTimeout,
ChannelClosing,
ChannelError
};
@ -146,6 +147,9 @@ protected:
virtual bool OnSpecialMessage(uint16 id, const Message& msg);
void SendSpecialMessage(Message* msg);
// Tell the IO thread to close the channel and wait for it to ACK.
void SynchronouslyClose();
bool MaybeHandleError(Result code, const char* channelName);
void ReportConnectionError(const char* channelName);

Просмотреть файл

@ -114,6 +114,20 @@ RPCChannel::~RPCChannel()
int RPCChannel::sInnerEventLoopDepth = 0;
#endif
bool
RPCChannel::EventOccurred()
{
AssertWorkerThread();
mMutex.AssertCurrentThreadOwns();
RPC_ASSERT(StackDepth() > 0, "not in wait loop");
return (!Connected() ||
!mPending.empty() ||
(!mOutOfTurnReplies.empty() &&
mOutOfTurnReplies.find(mStack.top().seqno())
!= mOutOfTurnReplies.end()));
}
bool
RPCChannel::Call(Message* msg, Message* reply)
{
@ -146,11 +160,17 @@ RPCChannel::Call(Message* msg, Message* reply)
// here we're waiting for something to happen. see long
// comment about the queue in RPCChannel.h
while (Connected() && mPending.empty() &&
(mOutOfTurnReplies.empty() ||
mOutOfTurnReplies.find(mStack.top().seqno())
== mOutOfTurnReplies.end())) {
RPCChannel::WaitForNotify();
while (!EventOccurred()) {
bool maybeTimedOut = !RPCChannel::WaitForNotify();
if (EventOccurred())
break;
// an event didn't occur. So we better have timed out!
NS_ABORT_IF_FALSE(maybeTimedOut,
"neither received a reply nor detected a hang!");
if (!ShouldContinueFromTimeout())
return false;
}
if (!Connected()) {
@ -589,7 +609,7 @@ RPCChannel::OnMessageReceived(const Message& msg)
mWorkerLoop->PostTask(
FROM_HERE,
NewRunnableMethod(this, &RPCChannel::OnMaybeDequeueOne));
else
else if (!AwaitingSyncReply())
NotifyWorkerThread();
}

Просмотреть файл

@ -138,6 +138,8 @@ protected:
private:
// Called on worker thread only
bool EventOccurred();
void MaybeProcessDeferredIncall();
void EnqueuePendingMessages();

Просмотреть файл

@ -54,11 +54,14 @@ struct RunnableMethodTraits<mozilla::ipc::SyncChannel>
namespace mozilla {
namespace ipc {
const int32 SyncChannel::kNoTimeout = PR_INT32_MIN;
SyncChannel::SyncChannel(SyncListener* aListener)
: AsyncChannel(aListener),
mPendingReply(0),
mProcessingSyncMessage(false),
mNextSeqno(0)
mNextSeqno(0),
mTimeoutMs(kNoTimeout)
{
MOZ_COUNT_CTOR(SyncChannel);
}
@ -66,12 +69,21 @@ SyncChannel::SyncChannel(SyncListener* aListener)
SyncChannel::~SyncChannel()
{
MOZ_COUNT_DTOR(SyncChannel);
// FIXME/cjones: impl
}
// static
bool SyncChannel::sIsPumpingMessages = false;
bool
SyncChannel::EventOccurred()
{
AssertWorkerThread();
mMutex.AssertCurrentThreadOwns();
NS_ABORT_IF_FALSE(AwaitingSyncReply(), "not in wait loop");
return (!Connected() || 0 != mRecvd.type());
}
bool
SyncChannel::Send(Message* msg, Message* reply)
{
@ -96,16 +108,18 @@ SyncChannel::Send(Message* msg, Message* reply)
FROM_HERE,
NewRunnableMethod(this, &SyncChannel::OnSend, msg));
// NB: this is a do-while loop instead of a single wait because if
// there's a pending RPC out- or in-call below us, and the sync
// message handler on the other side sends us an async message,
// the IO thread will Notify() this thread of the async message.
// See https://bugzilla.mozilla.org/show_bug.cgi?id=538239.
do {
// wait for the next sync message to arrive
SyncChannel::WaitForNotify();
} while(Connected() &&
mPendingReply != mRecvd.type() && !mRecvd.is_reply_error());
while (1) {
bool maybeTimedOut = !SyncChannel::WaitForNotify();
if (EventOccurred())
break;
// an event didn't occur. So we better have timed out!
NS_ABORT_IF_FALSE(maybeTimedOut,
"neither received a reply nor detected a hang!");
if (!ShouldContinueFromTimeout())
return false;
}
if (!Connected()) {
ReportConnectionError("SyncChannel");
@ -206,15 +220,64 @@ SyncChannel::OnChannelError()
// Synchronization between worker and IO threads
//
namespace {
bool
IsTimeoutExpired(PRIntervalTime aStart, PRIntervalTime aTimeout)
{
return (aTimeout != PR_INTERVAL_NO_TIMEOUT) &&
(aTimeout <= (PR_IntervalNow() - aStart));
}
} // namespace <anon>
bool
SyncChannel::ShouldContinueFromTimeout()
{
AssertWorkerThread();
mMutex.AssertCurrentThreadOwns();
bool cont = true;
if (!cont) {
// NB: there's a sublety here. If parents were allowed to
// send sync messages to children, then it would be possible
// for this synchronous close-on-timeout to race with async
// |OnMessageReceived| tasks arriving from the child, posted
// to the worker thread's event loop. This would complicate
// cleanup of the *Channel. But since IPDL forbids this (and
// since it doesn't support children timing out on parents),
// the parent can only block on RPC messages to the child, and
// in that case arriving async messages are enqueued to the
// RPC channel's special queue. They're then ignored because
// the channel state changes to ChannelTimeout
// (i.e. !Connected).
SynchronouslyClose();
mChannelState = ChannelTimeout;
}
return cont;
}
// Windows versions of the following two functions live in
// WindowsMessageLoop.cpp.
#ifndef OS_WIN
void
bool
SyncChannel::WaitForNotify()
{
mCvar.Wait();
PRIntervalTime timeout = (kNoTimeout == mTimeoutMs) ?
PR_INTERVAL_NO_TIMEOUT :
PR_MillisecondsToInterval(mTimeoutMs);
// XXX could optimize away this syscall for "no timeout" case if desired
PRIntervalTime waitStart = PR_IntervalNow();
mCvar.Wait(timeout);
// if the timeout didn't expire, we know we received an event.
// The converse is not true.
return !IsTimeoutExpired(waitStart, timeout);
}
void

Просмотреть файл

@ -40,6 +40,10 @@
#ifndef ipc_glue_SyncChannel_h
#define ipc_glue_SyncChannel_h 1
#include "base/basictypes.h"
#include "prinrval.h"
#include "mozilla/ipc/AsyncChannel.h"
namespace mozilla {
@ -52,6 +56,8 @@ protected:
typedef uint16 MessageId;
public:
static const int32 kNoTimeout;
class /*NS_INTERFACE_CLASS*/ SyncListener :
public AsyncChannel::AsyncListener
{
@ -101,7 +107,23 @@ protected:
return AsyncChannel::OnSpecialMessage(id, msg);
}
void WaitForNotify();
//
// Return true if the wait ended because a notification was
// received. That is, true => event received.
//
// Return false if the time elapsed from when we started the
// process of waiting until afterwards exceeded the currently
// allotted timeout. That *DOES NOT* mean false => "no event" (==
// timeout); there are many circumstances that could cause the
// measured elapsed time to exceed the timeout EVEN WHEN we were
// notified.
//
// So in sum: true is a meaningful return value; false isn't,
// necessarily.
//
bool WaitForNotify();
bool ShouldContinueFromTimeout();
// Executed on the IO thread.
void OnSendReply(Message* msg);
@ -126,6 +148,11 @@ protected:
int32 mNextSeqno;
static bool sIsPumpingMessages;
private:
bool EventOccurred();
int32 mTimeoutMs;
};

Просмотреть файл

@ -612,7 +612,7 @@ RPCChannel::IsMessagePending()
return false;
}
void
bool
SyncChannel::WaitForNotify()
{
mMutex.AssertCurrentThreadOwns();
@ -861,6 +861,8 @@ SyncChannel::NotifyWorkerThread()
if (!PostThreadMessage(gUIThreadId, gEventLoopMessage, 0, 0)) {
NS_WARNING("Failed to post thread message!");
}
return true;
}
void