зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1694949 - Add event telemetry containing failure reasons that lead to TRR confirmation failure r=nhnt11,dragana,necko-reviewers
Differential Revision: https://phabricator.services.mozilla.com/D106618
This commit is contained in:
Родитель
f3d447ac6a
Коммит
a13ad605e4
|
@ -930,8 +930,7 @@ void TRR::ReportStatus(nsresult aStatusCode) {
|
||||||
// it as failed; otherwise it can cause the confirmation to fail.
|
// it as failed; otherwise it can cause the confirmation to fail.
|
||||||
if (UseDefaultServer() && aStatusCode != NS_ERROR_ABORT) {
|
if (UseDefaultServer() && aStatusCode != NS_ERROR_ABORT) {
|
||||||
// Bad content is still considered "okay" if the HTTP response is okay
|
// Bad content is still considered "okay" if the HTTP response is okay
|
||||||
gTRRService->TRRIsOkay(NS_SUCCEEDED(aStatusCode) ? TRRService::OKAY_NORMAL
|
gTRRService->TRRIsOkay(aStatusCode);
|
||||||
: TRRService::OKAY_BAD);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -947,26 +947,72 @@ TRRService::Notify(nsITimer* aTimer) {
|
||||||
return NS_OK;
|
return NS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TRRService::TRRIsOkay(enum TrrOkay aReason) {
|
static char StatusToChar(nsresult aLookupStatus, nsresult aChannelStatus) {
|
||||||
|
// If the resolution fails in the TRR channel then we'll have a failed
|
||||||
|
// aChannelStatus. Otherwise, we parse the response - if it's not a valid DNS
|
||||||
|
// packet or doesn't contain the correct responses aLookupStatus will be a
|
||||||
|
// failure code.
|
||||||
|
if (aChannelStatus == NS_OK) {
|
||||||
|
// Return + if confirmation was OK, or - if confirmation failed
|
||||||
|
return aLookupStatus == NS_OK ? '+' : '-';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nsCOMPtr<nsIIOService> ios = do_GetIOService()) {
|
||||||
|
bool hasConnectiviy = true;
|
||||||
|
ios->GetConnectivity(&hasConnectiviy);
|
||||||
|
if (!hasConnectiviy) {
|
||||||
|
// Browser has no active network interfaces = is offline.
|
||||||
|
return 'o';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (aChannelStatus) {
|
||||||
|
case NS_ERROR_NET_TIMEOUT_EXTERNAL:
|
||||||
|
// TRR timeout expired
|
||||||
|
return 't';
|
||||||
|
case NS_ERROR_UNKNOWN_HOST:
|
||||||
|
// TRRServiceChannel failed to due to unresolved host
|
||||||
|
return 'd';
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The error is a network error
|
||||||
|
if (NS_ERROR_GET_MODULE(aChannelStatus) == NS_ERROR_MODULE_NETWORK) {
|
||||||
|
return 'n';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some other kind of failure.
|
||||||
|
return '?';
|
||||||
|
}
|
||||||
|
|
||||||
|
void TRRService::TRRIsOkay(nsresult aChannelStatus) {
|
||||||
MOZ_ASSERT_IF(XRE_IsParentProcess(), NS_IsMainThread() || IsOnTRRThread());
|
MOZ_ASSERT_IF(XRE_IsParentProcess(), NS_IsMainThread() || IsOnTRRThread());
|
||||||
MOZ_ASSERT_IF(XRE_IsSocketProcess(), NS_IsMainThread());
|
MOZ_ASSERT_IF(XRE_IsSocketProcess(), NS_IsMainThread());
|
||||||
|
|
||||||
Telemetry::AccumulateCategoricalKeyed(
|
Telemetry::AccumulateCategoricalKeyed(
|
||||||
ProviderKey(), aReason == OKAY_NORMAL
|
ProviderKey(), NS_SUCCEEDED(aChannelStatus)
|
||||||
? Telemetry::LABELS_DNS_TRR_SUCCESS3::Fine
|
? Telemetry::LABELS_DNS_TRR_SUCCESS3::Fine
|
||||||
: (aReason == OKAY_TIMEOUT
|
: (aChannelStatus == NS_ERROR_NET_TIMEOUT_EXTERNAL
|
||||||
? Telemetry::LABELS_DNS_TRR_SUCCESS3::Timeout
|
? Telemetry::LABELS_DNS_TRR_SUCCESS3::Timeout
|
||||||
: Telemetry::LABELS_DNS_TRR_SUCCESS3::Bad));
|
: Telemetry::LABELS_DNS_TRR_SUCCESS3::Bad));
|
||||||
if (aReason == OKAY_NORMAL) {
|
if (NS_SUCCEEDED(aChannelStatus)) {
|
||||||
mConfirmation.mTRRFailures = 0;
|
mConfirmation.mTRRFailures = 0;
|
||||||
} else if ((mMode == nsIDNSService::MODE_TRRFIRST) &&
|
} else if ((mMode == nsIDNSService::MODE_TRRFIRST) &&
|
||||||
(mConfirmation.mState == CONFIRM_OK)) {
|
(mConfirmation.mState == CONFIRM_OK)) {
|
||||||
// only count failures while in OK state
|
// only count failures while in OK state
|
||||||
|
mConfirmation.mFailureReasons[mConfirmation.mTRRFailures %
|
||||||
|
ConfirmationContext::RESULTS_SIZE] =
|
||||||
|
StatusToChar(NS_OK, aChannelStatus);
|
||||||
uint32_t fails = ++mConfirmation.mTRRFailures;
|
uint32_t fails = ++mConfirmation.mTRRFailures;
|
||||||
|
|
||||||
if (fails >= StaticPrefs::network_trr_max_fails()) {
|
if (fails >= StaticPrefs::network_trr_max_fails()) {
|
||||||
LOG(("TRRService goes FAILED after %u failures in a row\n", fails));
|
LOG(("TRRService goes FAILED after %u failures in a row\n", fails));
|
||||||
mConfirmation.mState = CONFIRM_FAILED;
|
mConfirmation.mState = CONFIRM_FAILED;
|
||||||
mConfirmation.mTrigger.Assign("failed-lookups");
|
mConfirmation.mTrigger.Assign("failed-lookups");
|
||||||
|
mConfirmation.mFailedLookups =
|
||||||
|
nsDependentCSubstring(mConfirmation.mFailureReasons,
|
||||||
|
fails % ConfirmationContext::RESULTS_SIZE);
|
||||||
// Fire off a timer and start re-trying the NS domain again
|
// Fire off a timer and start re-trying the NS domain again
|
||||||
NS_NewTimerWithCallback(getter_AddRefs(mConfirmation.mTimer), this,
|
NS_NewTimerWithCallback(getter_AddRefs(mConfirmation.mTimer), this,
|
||||||
mConfirmation.mRetryInterval,
|
mConfirmation.mRetryInterval,
|
||||||
|
@ -985,6 +1031,7 @@ void TRRService::ConfirmationContext::RecordEvent(const char* aReason) {
|
||||||
mFirstRequestTime = TimeStamp();
|
mFirstRequestTime = TimeStamp();
|
||||||
mContextChangeReason.Assign(aReason);
|
mContextChangeReason.Assign(aReason);
|
||||||
mTrigger.Truncate();
|
mTrigger.Truncate();
|
||||||
|
mFailedLookups.Truncate();
|
||||||
|
|
||||||
mRetryInterval = StaticPrefs::network_trr_retry_timeout_ms();
|
mRetryInterval = StaticPrefs::network_trr_retry_timeout_ms();
|
||||||
};
|
};
|
||||||
|
@ -1038,6 +1085,11 @@ void TRRService::ConfirmationContext::RecordEvent(const char* aReason) {
|
||||||
nsPrintfCString("%i", mCaptivePortalStatus)},
|
nsPrintfCString("%i", mCaptivePortalStatus)},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (mTrigger.Equals("failed-lookups"_ns)) {
|
||||||
|
extra.ref().AppendElement(
|
||||||
|
Telemetry::EventExtraEntry{"failedLookups"_ns, mFailedLookups});
|
||||||
|
}
|
||||||
|
|
||||||
ConfirmationState state = mState;
|
ConfirmationState state = mState;
|
||||||
Telemetry::RecordEvent(eventType, mozilla::Some(nsPrintfCString("%u", state)),
|
Telemetry::RecordEvent(eventType, mozilla::Some(nsPrintfCString("%u", state)),
|
||||||
extra);
|
extra);
|
||||||
|
@ -1047,33 +1099,8 @@ void TRRService::ConfirmationContext::RecordEvent(const char* aReason) {
|
||||||
|
|
||||||
void TRRService::ConfirmationContext::RequestCompleted(
|
void TRRService::ConfirmationContext::RequestCompleted(
|
||||||
nsresult aLookupStatus, nsresult aChannelStatus) {
|
nsresult aLookupStatus, nsresult aChannelStatus) {
|
||||||
auto statusToChar = [aLookupStatus, aChannelStatus]() -> char {
|
mResults[mAttemptCount % RESULTS_SIZE] =
|
||||||
if (aChannelStatus == NS_OK) {
|
StatusToChar(aLookupStatus, aChannelStatus);
|
||||||
// Return + if confirmation was OK, or - if confirmation failed
|
|
||||||
return aLookupStatus == NS_OK ? '+' : '-';
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (aChannelStatus) {
|
|
||||||
case NS_ERROR_NET_TIMEOUT_EXTERNAL:
|
|
||||||
// TRR timeout expired
|
|
||||||
return 't';
|
|
||||||
case NS_ERROR_UNKNOWN_HOST:
|
|
||||||
// TRRServiceChannel failed to due to unresolved host
|
|
||||||
return 'd';
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The error is a network error
|
|
||||||
if (NS_ERROR_GET_MODULE(aChannelStatus) == NS_ERROR_MODULE_NETWORK) {
|
|
||||||
return 'n';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Some other kind of failure.
|
|
||||||
return '?';
|
|
||||||
};
|
|
||||||
|
|
||||||
mResults[mAttemptCount % RESULTS_SIZE] = statusToChar();
|
|
||||||
mAttemptCount++;
|
mAttemptCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -62,8 +62,7 @@ class TRRService : public TRRServiceBase,
|
||||||
bool IsExcludedFromTRR(const nsACString& aHost);
|
bool IsExcludedFromTRR(const nsACString& aHost);
|
||||||
|
|
||||||
bool MaybeBootstrap(const nsACString& possible, nsACString& result);
|
bool MaybeBootstrap(const nsACString& possible, nsACString& result);
|
||||||
enum TrrOkay { OKAY_NORMAL = 0, OKAY_TIMEOUT = 1, OKAY_BAD = 2 };
|
void TRRIsOkay(nsresult aChannelStatus);
|
||||||
void TRRIsOkay(enum TrrOkay aReason);
|
|
||||||
bool ParentalControlEnabled() const { return mParentalControlEnabled; }
|
bool ParentalControlEnabled() const { return mParentalControlEnabled; }
|
||||||
|
|
||||||
nsresult DispatchTRRRequest(TRR* aTrrRequest);
|
nsresult DispatchTRRRequest(TRR* aTrrRequest);
|
||||||
|
@ -146,9 +145,9 @@ class TRRService : public TRRServiceBase,
|
||||||
};
|
};
|
||||||
|
|
||||||
class ConfirmationContext {
|
class ConfirmationContext {
|
||||||
|
public:
|
||||||
static const size_t RESULTS_SIZE = 32;
|
static const size_t RESULTS_SIZE = 32;
|
||||||
|
|
||||||
public:
|
|
||||||
Atomic<ConfirmationState, Relaxed> mState;
|
Atomic<ConfirmationState, Relaxed> mState;
|
||||||
RefPtr<TRR> mTask;
|
RefPtr<TRR> mTask;
|
||||||
nsCOMPtr<nsITimer> mTimer;
|
nsCOMPtr<nsITimer> mTimer;
|
||||||
|
@ -156,6 +155,10 @@ class TRRService : public TRRServiceBase,
|
||||||
// The number of TRR requests that failed in a row.
|
// The number of TRR requests that failed in a row.
|
||||||
Atomic<uint32_t, Relaxed> mTRRFailures;
|
Atomic<uint32_t, Relaxed> mTRRFailures;
|
||||||
|
|
||||||
|
// This buffer holds consecutive TRR failures reported by calling
|
||||||
|
// TRRIsOkay(). It is only meant for reporting event telemetry.
|
||||||
|
char mFailureReasons[RESULTS_SIZE] = {0};
|
||||||
|
|
||||||
// The number of confirmation retries.
|
// The number of confirmation retries.
|
||||||
uint32_t mAttemptCount = 0;
|
uint32_t mAttemptCount = 0;
|
||||||
|
|
||||||
|
@ -177,6 +180,10 @@ class TRRService : public TRRServiceBase,
|
||||||
// What triggered the confirmation
|
// What triggered the confirmation
|
||||||
nsCString mTrigger;
|
nsCString mTrigger;
|
||||||
|
|
||||||
|
// String representation of consecutive failed lookups that triggered
|
||||||
|
// confirmation.
|
||||||
|
nsCString mFailedLookups;
|
||||||
|
|
||||||
// Called when a confirmation completes successfully or when the
|
// Called when a confirmation completes successfully or when the
|
||||||
// confirmation context changes.
|
// confirmation context changes.
|
||||||
void RecordEvent(const char* aReason);
|
void RecordEvent(const char* aReason);
|
||||||
|
|
|
@ -2111,6 +2111,7 @@ network.dns:
|
||||||
objects: ["context"]
|
objects: ["context"]
|
||||||
bug_numbers:
|
bug_numbers:
|
||||||
- 1691408
|
- 1691408
|
||||||
|
- 1694949
|
||||||
description: >
|
description: >
|
||||||
This telemetry records the status of the TRR confirmation across.
|
This telemetry records the status of the TRR confirmation across.
|
||||||
The value of the event is one of:
|
The value of the event is one of:
|
||||||
|
@ -2138,6 +2139,9 @@ network.dns:
|
||||||
String representation of the last 32 confirmation results.
|
String representation of the last 32 confirmation results.
|
||||||
Example: nnnnnnttttttttt indicates a number of network (n) failures
|
Example: nnnnnnttttttttt indicates a number of network (n) failures
|
||||||
followed by timeouts (t).
|
followed by timeouts (t).
|
||||||
|
failedLookups: >
|
||||||
|
When the trigger is failed-lookups, this contains the string
|
||||||
|
representation of the failures that triggered the confirmation.
|
||||||
networkID: >
|
networkID: >
|
||||||
The network ID for the recorded confirmation attempts
|
The network ID for the recorded confirmation attempts
|
||||||
captivePortal: >
|
captivePortal: >
|
||||||
|
|
Загрузка…
Ссылка в новой задаче