зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1694949 - Add event telemetry containing failure reasons that lead to TRR confirmation failure r=nhnt11,dragana,necko-reviewers
Differential Revision: https://phabricator.services.mozilla.com/D106618
This commit is contained in:
Родитель
f3d447ac6a
Коммит
a13ad605e4
|
@ -930,8 +930,7 @@ void TRR::ReportStatus(nsresult aStatusCode) {
|
|||
// it as failed; otherwise it can cause the confirmation to fail.
|
||||
if (UseDefaultServer() && aStatusCode != NS_ERROR_ABORT) {
|
||||
// Bad content is still considered "okay" if the HTTP response is okay
|
||||
gTRRService->TRRIsOkay(NS_SUCCEEDED(aStatusCode) ? TRRService::OKAY_NORMAL
|
||||
: TRRService::OKAY_BAD);
|
||||
gTRRService->TRRIsOkay(aStatusCode);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -947,26 +947,72 @@ TRRService::Notify(nsITimer* aTimer) {
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
void TRRService::TRRIsOkay(enum TrrOkay aReason) {
|
||||
static char StatusToChar(nsresult aLookupStatus, nsresult aChannelStatus) {
|
||||
// If the resolution fails in the TRR channel then we'll have a failed
|
||||
// aChannelStatus. Otherwise, we parse the response - if it's not a valid DNS
|
||||
// packet or doesn't contain the correct responses aLookupStatus will be a
|
||||
// failure code.
|
||||
if (aChannelStatus == NS_OK) {
|
||||
// Return + if confirmation was OK, or - if confirmation failed
|
||||
return aLookupStatus == NS_OK ? '+' : '-';
|
||||
}
|
||||
|
||||
if (nsCOMPtr<nsIIOService> ios = do_GetIOService()) {
|
||||
bool hasConnectiviy = true;
|
||||
ios->GetConnectivity(&hasConnectiviy);
|
||||
if (!hasConnectiviy) {
|
||||
// Browser has no active network interfaces = is offline.
|
||||
return 'o';
|
||||
}
|
||||
}
|
||||
|
||||
switch (aChannelStatus) {
|
||||
case NS_ERROR_NET_TIMEOUT_EXTERNAL:
|
||||
// TRR timeout expired
|
||||
return 't';
|
||||
case NS_ERROR_UNKNOWN_HOST:
|
||||
// TRRServiceChannel failed to due to unresolved host
|
||||
return 'd';
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// The error is a network error
|
||||
if (NS_ERROR_GET_MODULE(aChannelStatus) == NS_ERROR_MODULE_NETWORK) {
|
||||
return 'n';
|
||||
}
|
||||
|
||||
// Some other kind of failure.
|
||||
return '?';
|
||||
}
|
||||
|
||||
void TRRService::TRRIsOkay(nsresult aChannelStatus) {
|
||||
MOZ_ASSERT_IF(XRE_IsParentProcess(), NS_IsMainThread() || IsOnTRRThread());
|
||||
MOZ_ASSERT_IF(XRE_IsSocketProcess(), NS_IsMainThread());
|
||||
|
||||
Telemetry::AccumulateCategoricalKeyed(
|
||||
ProviderKey(), aReason == OKAY_NORMAL
|
||||
ProviderKey(), NS_SUCCEEDED(aChannelStatus)
|
||||
? Telemetry::LABELS_DNS_TRR_SUCCESS3::Fine
|
||||
: (aReason == OKAY_TIMEOUT
|
||||
: (aChannelStatus == NS_ERROR_NET_TIMEOUT_EXTERNAL
|
||||
? Telemetry::LABELS_DNS_TRR_SUCCESS3::Timeout
|
||||
: Telemetry::LABELS_DNS_TRR_SUCCESS3::Bad));
|
||||
if (aReason == OKAY_NORMAL) {
|
||||
if (NS_SUCCEEDED(aChannelStatus)) {
|
||||
mConfirmation.mTRRFailures = 0;
|
||||
} else if ((mMode == nsIDNSService::MODE_TRRFIRST) &&
|
||||
(mConfirmation.mState == CONFIRM_OK)) {
|
||||
// only count failures while in OK state
|
||||
mConfirmation.mFailureReasons[mConfirmation.mTRRFailures %
|
||||
ConfirmationContext::RESULTS_SIZE] =
|
||||
StatusToChar(NS_OK, aChannelStatus);
|
||||
uint32_t fails = ++mConfirmation.mTRRFailures;
|
||||
|
||||
if (fails >= StaticPrefs::network_trr_max_fails()) {
|
||||
LOG(("TRRService goes FAILED after %u failures in a row\n", fails));
|
||||
mConfirmation.mState = CONFIRM_FAILED;
|
||||
mConfirmation.mTrigger.Assign("failed-lookups");
|
||||
mConfirmation.mFailedLookups =
|
||||
nsDependentCSubstring(mConfirmation.mFailureReasons,
|
||||
fails % ConfirmationContext::RESULTS_SIZE);
|
||||
// Fire off a timer and start re-trying the NS domain again
|
||||
NS_NewTimerWithCallback(getter_AddRefs(mConfirmation.mTimer), this,
|
||||
mConfirmation.mRetryInterval,
|
||||
|
@ -985,6 +1031,7 @@ void TRRService::ConfirmationContext::RecordEvent(const char* aReason) {
|
|||
mFirstRequestTime = TimeStamp();
|
||||
mContextChangeReason.Assign(aReason);
|
||||
mTrigger.Truncate();
|
||||
mFailedLookups.Truncate();
|
||||
|
||||
mRetryInterval = StaticPrefs::network_trr_retry_timeout_ms();
|
||||
};
|
||||
|
@ -1038,6 +1085,11 @@ void TRRService::ConfirmationContext::RecordEvent(const char* aReason) {
|
|||
nsPrintfCString("%i", mCaptivePortalStatus)},
|
||||
});
|
||||
|
||||
if (mTrigger.Equals("failed-lookups"_ns)) {
|
||||
extra.ref().AppendElement(
|
||||
Telemetry::EventExtraEntry{"failedLookups"_ns, mFailedLookups});
|
||||
}
|
||||
|
||||
ConfirmationState state = mState;
|
||||
Telemetry::RecordEvent(eventType, mozilla::Some(nsPrintfCString("%u", state)),
|
||||
extra);
|
||||
|
@ -1047,33 +1099,8 @@ void TRRService::ConfirmationContext::RecordEvent(const char* aReason) {
|
|||
|
||||
void TRRService::ConfirmationContext::RequestCompleted(
|
||||
nsresult aLookupStatus, nsresult aChannelStatus) {
|
||||
auto statusToChar = [aLookupStatus, aChannelStatus]() -> char {
|
||||
if (aChannelStatus == NS_OK) {
|
||||
// Return + if confirmation was OK, or - if confirmation failed
|
||||
return aLookupStatus == NS_OK ? '+' : '-';
|
||||
}
|
||||
|
||||
switch (aChannelStatus) {
|
||||
case NS_ERROR_NET_TIMEOUT_EXTERNAL:
|
||||
// TRR timeout expired
|
||||
return 't';
|
||||
case NS_ERROR_UNKNOWN_HOST:
|
||||
// TRRServiceChannel failed to due to unresolved host
|
||||
return 'd';
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// The error is a network error
|
||||
if (NS_ERROR_GET_MODULE(aChannelStatus) == NS_ERROR_MODULE_NETWORK) {
|
||||
return 'n';
|
||||
}
|
||||
|
||||
// Some other kind of failure.
|
||||
return '?';
|
||||
};
|
||||
|
||||
mResults[mAttemptCount % RESULTS_SIZE] = statusToChar();
|
||||
mResults[mAttemptCount % RESULTS_SIZE] =
|
||||
StatusToChar(aLookupStatus, aChannelStatus);
|
||||
mAttemptCount++;
|
||||
}
|
||||
|
||||
|
|
|
@ -62,8 +62,7 @@ class TRRService : public TRRServiceBase,
|
|||
bool IsExcludedFromTRR(const nsACString& aHost);
|
||||
|
||||
bool MaybeBootstrap(const nsACString& possible, nsACString& result);
|
||||
enum TrrOkay { OKAY_NORMAL = 0, OKAY_TIMEOUT = 1, OKAY_BAD = 2 };
|
||||
void TRRIsOkay(enum TrrOkay aReason);
|
||||
void TRRIsOkay(nsresult aChannelStatus);
|
||||
bool ParentalControlEnabled() const { return mParentalControlEnabled; }
|
||||
|
||||
nsresult DispatchTRRRequest(TRR* aTrrRequest);
|
||||
|
@ -146,9 +145,9 @@ class TRRService : public TRRServiceBase,
|
|||
};
|
||||
|
||||
class ConfirmationContext {
|
||||
public:
|
||||
static const size_t RESULTS_SIZE = 32;
|
||||
|
||||
public:
|
||||
Atomic<ConfirmationState, Relaxed> mState;
|
||||
RefPtr<TRR> mTask;
|
||||
nsCOMPtr<nsITimer> mTimer;
|
||||
|
@ -156,6 +155,10 @@ class TRRService : public TRRServiceBase,
|
|||
// The number of TRR requests that failed in a row.
|
||||
Atomic<uint32_t, Relaxed> mTRRFailures;
|
||||
|
||||
// This buffer holds consecutive TRR failures reported by calling
|
||||
// TRRIsOkay(). It is only meant for reporting event telemetry.
|
||||
char mFailureReasons[RESULTS_SIZE] = {0};
|
||||
|
||||
// The number of confirmation retries.
|
||||
uint32_t mAttemptCount = 0;
|
||||
|
||||
|
@ -177,6 +180,10 @@ class TRRService : public TRRServiceBase,
|
|||
// What triggered the confirmation
|
||||
nsCString mTrigger;
|
||||
|
||||
// String representation of consecutive failed lookups that triggered
|
||||
// confirmation.
|
||||
nsCString mFailedLookups;
|
||||
|
||||
// Called when a confirmation completes successfully or when the
|
||||
// confirmation context changes.
|
||||
void RecordEvent(const char* aReason);
|
||||
|
|
|
@ -2111,6 +2111,7 @@ network.dns:
|
|||
objects: ["context"]
|
||||
bug_numbers:
|
||||
- 1691408
|
||||
- 1694949
|
||||
description: >
|
||||
This telemetry records the status of the TRR confirmation across.
|
||||
The value of the event is one of:
|
||||
|
@ -2138,6 +2139,9 @@ network.dns:
|
|||
String representation of the last 32 confirmation results.
|
||||
Example: nnnnnnttttttttt indicates a number of network (n) failures
|
||||
followed by timeouts (t).
|
||||
failedLookups: >
|
||||
When the trigger is failed-lookups, this contains the string
|
||||
representation of the failures that triggered the confirmation.
|
||||
networkID: >
|
||||
The network ID for the recorded confirmation attempts
|
||||
captivePortal: >
|
||||
|
|
Загрузка…
Ссылка в новой задаче