Improve logging, handling of empty metric values

This commit is contained in:
Steven Fairchild 2024-06-27 16:43:54 -04:00 коммит произвёл Caden Marchese
Родитель cf51f260ac
Коммит 3db436013f
3 изменённых файлов: 86 добавлений и 61 удалений

Просмотреть файл

@ -17,12 +17,12 @@ import (
func (ocb *openShiftClusterBackend) emitMetrics(log *logrus.Entry, doc *api.OpenShiftClusterDocument, operationType, provisioningState api.ProvisioningState, backendErr error) map[string]string {
dimensions := map[string]string{}
ocb.gatherOperationMetrics(operationType, provisioningState, backendErr, dimensions)
ocb.gatherCorrelationID(doc, dimensions)
ocb.gatherMiscMetrics(doc, dimensions)
ocb.gatherAuthMetrics(doc, dimensions)
ocb.gatherNetworkMetrics(doc, dimensions)
ocb.gatherNodeMetrics(doc, dimensions)
ocb.gatherOperationMetrics(log, operationType, provisioningState, backendErr, dimensions)
ocb.gatherCorrelationID(log, doc, dimensions)
ocb.gatherMiscMetrics(log, doc, dimensions)
ocb.gatherAuthMetrics(log, doc, dimensions)
ocb.gatherNetworkMetrics(log, doc, dimensions)
ocb.gatherNodeMetrics(log, doc, dimensions)
ocb.logMetricDimensions(log, operationType, dimensions)
ocb.m.EmitGauge(ocb.getMetricName(operationType), metricValue, dimensions)
@ -44,49 +44,64 @@ func (ocb *openShiftClusterBackend) getResultType(backendErr error) utillog.Resu
return resultType
}
func (ocb *openShiftClusterBackend) getStringMetricValue(log *logrus.Entry, metricName, value string) string {
if value != "" {
return value
}
log.Warnf("%s %s", metricFailToCollectErr, metricName)
return empty
}
func (ocb *openShiftClusterBackend) logMetricDimensions(log *logrus.Entry, operationType api.ProvisioningState, dimensions map[string]string) {
for metric, value := range dimensions {
log.Info(fmt.Sprintf("%s.%s: %s = %s", metricPackage, operationType, metric, value))
}
}
func (m *openShiftClusterBackend) gatherCorrelationID(doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
func (ocb *openShiftClusterBackend) gatherCorrelationID(log *logrus.Entry, doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
if doc.CorrelationData != nil {
dimensions[correlationDataIdMetricName] = doc.CorrelationData.CorrelationID
dimensions[correlationDataClientRequestIdMetricName] = doc.CorrelationData.ClientRequestID
dimensions[correlationDataRequestIdMetricName] = doc.CorrelationData.RequestID
dimensions[correlationDataIdMetricName] = ocb.getStringMetricValue(log, correlationDataIdMetricName, doc.CorrelationData.CorrelationID)
dimensions[correlationDataClientRequestIdMetricName] = ocb.getStringMetricValue(log, correlationDataClientRequestIdMetricName, doc.CorrelationData.ClientRequestID)
dimensions[correlationDataRequestIdMetricName] = ocb.getStringMetricValue(log, correlationDataRequestIdMetricName, doc.CorrelationData.RequestID)
} else {
log.Warnf("%s %s", metricFailToCollectErr, correlationDataMetricName)
dimensions[correlationDataIdMetricName] = empty
dimensions[correlationDataClientRequestIdMetricName] = empty
dimensions[correlationDataRequestIdMetricName] = empty
}
}
func (ocb *openShiftClusterBackend) gatherOperationMetrics(operationType, provisioningState api.ProvisioningState, backendErr error, dimensions map[string]string) {
func (ocb *openShiftClusterBackend) gatherOperationMetrics(log *logrus.Entry, operationType, provisioningState api.ProvisioningState, backendErr error, dimensions map[string]string) {
// These are provided internally by endLease, not expected to be ""
dimensions[operationTypeMetricName] = operationType.String()
dimensions[provisioningStateMetricName] = provisioningState.String()
dimensions[resultTypeMetricName] = string(ocb.getResultType(backendErr))
dimensions[resultTypeMetricName] = ocb.getStringMetricValue(log, resultTypeMetricName, string(ocb.getResultType(backendErr)))
}
func (ocb *openShiftClusterBackend) gatherMiscMetrics(doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
dimensions[subscriptionIdMetricName] = ocb.env.SubscriptionID()
dimensions[resourceIdMetricName] = doc.ResourceID
if doc.OpenShiftCluster != nil {
dimensions[clusterNameMetricName] = doc.OpenShiftCluster.Name
dimensions[locationMetricName] = doc.OpenShiftCluster.Location
dimensions[ocpVersionMetricName] = doc.OpenShiftCluster.Properties.ClusterProfile.Version
dimensions[rpVersionMetricName] = doc.OpenShiftCluster.Properties.ProvisionedBy
dimensions[resourecGroupMetricName] = doc.OpenShiftCluster.Properties.ClusterProfile.ResourceGroupID
func (ocb *openShiftClusterBackend) gatherMiscMetrics(log *logrus.Entry, doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
dimensions[subscriptionIdMetricName] = ocb.getStringMetricValue(log, subscriptionIdMetricName, ocb.env.SubscriptionID())
dimensions[resourceIdMetricName] = ocb.getStringMetricValue(log, resourceIdMetricName, doc.ResourceID)
for flag, feature := range doc.OpenShiftCluster.Properties.OperatorFlags {
dimensions[fmt.Sprintf("%s-%s", operatorFlagsMetricName, flag)] = feature
}
dimensions[clusterNameMetricName] = ocb.getStringMetricValue(log, clusterNameMetricName, doc.OpenShiftCluster.Name)
dimensions[clusterIdMetricName] = ocb.getStringMetricValue(log, clusterIdMetricName, doc.OpenShiftCluster.ID)
dimensions[locationMetricName] = ocb.getStringMetricValue(log, locationMetricName, doc.OpenShiftCluster.Location)
dimensions[ocpVersionMetricName] = ocb.getStringMetricValue(log, ocpVersionMetricName, doc.OpenShiftCluster.Properties.ClusterProfile.Version)
dimensions[rpVersionMetricName] = ocb.getStringMetricValue(log, rpVersionMetricName, doc.OpenShiftCluster.Properties.ProvisionedBy)
dimensions[resourecGroupMetricName] = ocb.getStringMetricValue(log, resourecGroupMetricName, doc.OpenShiftCluster.Properties.ClusterProfile.ResourceGroupID)
for flag, feature := range doc.OpenShiftCluster.Properties.OperatorFlags {
flagMetricName := fmt.Sprintf("%s-%s", operatorFlagsMetricName, flag)
dimensions[flagMetricName] = ocb.getStringMetricValue(log, flagMetricName, feature)
}
dimensions[asyncOperationsIdMetricName] = doc.AsyncOperationID
dimensions[asyncOperationsIdMetricName] = ocb.getStringMetricValue(log, asyncOperationsIdMetricName, doc.AsyncOperationID)
if doc.OpenShiftCluster.Properties.WorkerProfiles != nil {
dimensions[workerProfileCountMetricName] = strconv.FormatInt(int64(len(doc.OpenShiftCluster.Properties.WorkerProfiles)), 10)
} else {
dimensions[workerProfileCountMetricName] = ocb.getStringMetricValue(log, workerProfileCountMetricName, "")
}
if doc.OpenShiftCluster.Tags != nil {
@ -96,7 +111,7 @@ func (ocb *openShiftClusterBackend) gatherMiscMetrics(doc *api.OpenShiftClusterD
}
}
func (ocb *openShiftClusterBackend) gatherNodeMetrics(doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
func (ocb *openShiftClusterBackend) gatherNodeMetrics(log *logrus.Entry, doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
if doc.OpenShiftCluster.Properties.MasterProfile.DiskEncryptionSetID != "" {
dimensions[masterProfileEncryptionSetIdMetricName] = enabled
} else {
@ -104,22 +119,21 @@ func (ocb *openShiftClusterBackend) gatherNodeMetrics(doc *api.OpenShiftClusterD
}
mp := doc.OpenShiftCluster.Properties.MasterProfile
dimensions[masterProfileVmSizeMetricName] = string(mp.VMSize)
dimensions[masterProfileVmSizeMetricName] = ocb.getStringMetricValue(log, masterProfileVmSizeMetricName, string(mp.VMSize))
if doc.OpenShiftCluster.Properties.MasterProfile.EncryptionAtHost == api.EncryptionAtHostEnabled {
dimensions[masterEncryptionAtHostMetricName] = string(api.EncryptionAtHostEnabled)
} else if doc.OpenShiftCluster.Properties.MasterProfile.EncryptionAtHost == api.EncryptionAtHostDisabled {
dimensions[masterEncryptionAtHostMetricName] = string(api.EncryptionAtHostDisabled)
} else {
log.Warnf("%s %s", metricFailToCollectErr, masterEncryptionAtHostMetricName)
dimensions[masterEncryptionAtHostMetricName] = unknown
}
if len(doc.OpenShiftCluster.Properties.WorkerProfiles) > 0 {
wp := doc.OpenShiftCluster.Properties.WorkerProfiles[0]
dimensions[workerVmSizeMetricName] = string(wp.VMSize)
dimensions[workerVmDiskSizeMetricName] = strconv.FormatInt(int64(wp.DiskSizeGB), 10)
dimensions[workerVmSizeMetricName] = string(wp.VMSize)
dimensions[workerVmSizeMetricName] = ocb.getStringMetricValue(log, workerVmSizeMetricName, string(wp.VMSize))
dimensions[workerVmDiskSizeMetricName] = strconv.FormatInt(int64(wp.DiskSizeGB), 10)
if wp.EncryptionAtHost == api.EncryptionAtHostEnabled {
@ -127,6 +141,7 @@ func (ocb *openShiftClusterBackend) gatherNodeMetrics(doc *api.OpenShiftClusterD
} else if wp.EncryptionAtHost == api.EncryptionAtHostDisabled {
dimensions[workerEncryptionAtHostMetricName] = string(api.EncryptionAtHostDisabled)
} else {
log.Warnf("%s %s", metricFailToCollectErr, workerEncryptionAtHostMetricName)
dimensions[workerEncryptionAtHostMetricName] = unknown
}
}
@ -136,16 +151,18 @@ func (ocb *openShiftClusterBackend) gatherNodeMetrics(doc *api.OpenShiftClusterD
} else if doc.OpenShiftCluster.Properties.ClusterProfile.FipsValidatedModules == api.FipsValidatedModulesDisabled {
dimensions[fipsMetricName] = string(api.FipsValidatedModulesDisabled)
} else {
log.Warnf("%s %s", metricFailToCollectErr, fipsMetricName)
dimensions[fipsMetricName] = unknown
}
}
func (ocb *openShiftClusterBackend) gatherAuthMetrics(doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
func (ocb *openShiftClusterBackend) gatherAuthMetrics(log *logrus.Entry, doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
if doc.OpenShiftCluster.Properties.PlatformWorkloadIdentityProfile != nil {
dimensions[clusterIdentityMetricName] = clusterIdentityManagedIdMetricName
} else if doc.OpenShiftCluster.Properties.ServicePrincipalProfile != nil {
dimensions[clusterIdentityMetricName] = clusterIdentityServicePrincipalMetricName
} else {
log.Warnf("%s %s", metricFailToCollectErr, clusterIdentityMetricName)
dimensions[clusterIdentityMetricName] = unknown
}
@ -156,13 +173,14 @@ func (ocb *openShiftClusterBackend) gatherAuthMetrics(doc *api.OpenShiftClusterD
}
}
func (ocb *openShiftClusterBackend) gatherNetworkMetrics(doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
func (ocb *openShiftClusterBackend) gatherNetworkMetrics(log *logrus.Entry, doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
for _, p := range doc.OpenShiftCluster.Properties.IngressProfiles {
if p.Visibility == api.VisibilityPrivate {
dimensions[ingressProfileMetricName] = fmt.Sprintf("%s.%s", string(api.VisibilityPrivate), p.Name)
} else if p.Visibility == api.VisibilityPublic {
dimensions[ingressProfileMetricName] = fmt.Sprintf("%s.%s", string(api.VisibilityPublic), p.Name)
} else {
log.Warnf("%s %s", metricFailToCollectErr, ingressProfileMetricName)
dimensions[ingressProfileMetricName] = unknown
}
}
@ -172,6 +190,7 @@ func (ocb *openShiftClusterBackend) gatherNetworkMetrics(doc *api.OpenShiftClust
} else if doc.OpenShiftCluster.Properties.NetworkProfile.OutboundType == api.OutboundTypeLoadbalancer {
dimensions[networkProfileOutboundTypeMetricName] = string(api.OutboundTypeLoadbalancer)
} else {
log.Warnf("%s %s", metricFailToCollectErr, networkProfileManagedOutboundIpsMetricName)
dimensions[networkProfileOutboundTypeMetricName] = unknown
}
@ -188,7 +207,10 @@ func (ocb *openShiftClusterBackend) gatherNetworkMetrics(doc *api.OpenShiftClust
}
domain, err := dns.ManagedDomain(ocb.env, doc.OpenShiftCluster.Properties.ClusterProfile.Domain)
if err == nil {
if err != nil {
dimensions[clusterProfileDomainMetricName] = empty
log.Warnf("%s %s, due to %s", metricFailToCollectErr, clusterProfileDomainMetricName, err.Error())
} else {
if domain != "" {
dimensions[clusterProfileDomainMetricName] = custom
} else {
@ -198,6 +220,9 @@ func (ocb *openShiftClusterBackend) gatherNetworkMetrics(doc *api.OpenShiftClust
if doc.OpenShiftCluster.Properties.NetworkProfile.LoadBalancerProfile.ManagedOutboundIPs != nil {
dimensions[networkProfileManagedOutboundIpsMetricName] = strconv.FormatInt(int64(doc.OpenShiftCluster.Properties.NetworkProfile.LoadBalancerProfile.ManagedOutboundIPs.Count), 10)
} else {
log.Warnf("%s %s", metricFailToCollectErr, networkProfileManagedOutboundIpsMetricName)
dimensions[networkProfileManagedOutboundIpsMetricName] = unknown
}
if doc.OpenShiftCluster.Properties.NetworkProfile.PreconfiguredNSG == api.PreconfiguredNSGEnabled {
@ -205,6 +230,7 @@ func (ocb *openShiftClusterBackend) gatherNetworkMetrics(doc *api.OpenShiftClust
} else if doc.OpenShiftCluster.Properties.NetworkProfile.PreconfiguredNSG == api.PreconfiguredNSGDisabled {
dimensions[networkProfilePreConfiguredNSGMetricName] = string(api.PreconfiguredNSGDisabled)
} else {
log.Warnf("%s %s", metricFailToCollectErr, networkProfilePreConfiguredNSGMetricName)
dimensions[networkProfilePreConfiguredNSGMetricName] = unknown
}

Просмотреть файл

@ -4,15 +4,16 @@ package backend
// Licensed under the Apache License 2.0.
const (
metricPackage = "backend.openshiftcluster"
metricValue int64 = 1
enabled = "Enabled"
disabled = "Disabled"
custom = "Custom"
defaultSet = "Default"
unknown = "unknown"
empty = "empty"
managed = "managed"
metricPackage = "backend.openshiftcluster"
metricValue int64 = 1
enabled = "Enabled"
disabled = "Disabled"
custom = "Custom"
defaultSet = "Default"
unknown = "unknown"
empty = "empty"
managed = "managed"
metricFailToCollectErr = "failed to collect metric:"
encryptionAtHostMetricName = "encryptionathost"
diskSizeMetricName = "disksize"
@ -56,11 +57,13 @@ const (
operatorFlagsMetricName = "operatorflags"
asyncOperationsIdMetricName = "async_operationsid"
rpVersionMetricName = "rpversion"
ocpVersionMetricName = "ocpversion"
clusterNameMetricName = "clustername"
resourecGroupMetricName = "resourcegroup"
locationMetricName = "location"
openshiftClusterMetricName = "openshiftcluster"
rpVersionMetricName = openshiftClusterMetricName + "." + "rpversion"
ocpVersionMetricName = openshiftClusterMetricName + "." + "ocpversion"
clusterNameMetricName = openshiftClusterMetricName + "." + "clustername"
clusterIdMetricName = openshiftClusterMetricName + "." + "clusterid"
resourecGroupMetricName = openshiftClusterMetricName + "." + "resourcegroup"
locationMetricName = openshiftClusterMetricName + "." + "location"
resourceIdMetricName = "resourceid"
subscriptionIdMetricName = "subscriptionid"

Просмотреть файл

@ -104,14 +104,10 @@ func TestEmitMetrics(t *testing.T) {
FipsValidatedModules: api.FipsValidatedModulesEnabled,
},
NetworkProfile: api.NetworkProfile{
LoadBalancerProfile: &api.LoadBalancerProfile{
ManagedOutboundIPs: &api.ManagedOutboundIPs{
Count: 1,
},
},
PodCIDR: "10.128.0.1/14",
ServiceCIDR: "172.30.0.1/16",
PreconfiguredNSG: api.PreconfiguredNSGEnabled,
LoadBalancerProfile: &api.LoadBalancerProfile{},
PodCIDR: "10.128.0.1/14",
ServiceCIDR: "172.30.0.1/16",
PreconfiguredNSG: api.PreconfiguredNSGEnabled,
},
OperatorFlags: api.OperatorFlags{"testFlag": "true"},
WorkerProfiles: []api.WorkerProfile{
@ -204,12 +200,12 @@ func TestEmitMetrics(t *testing.T) {
}
dimensions := map[string]string{}
ocb.gatherOperationMetrics(tt.operationType, tt.provisioningState, tt.backendErr, dimensions)
ocb.gatherCorrelationID(tt.doc, dimensions)
ocb.gatherMiscMetrics(tt.doc, dimensions)
ocb.gatherAuthMetrics(tt.doc, dimensions)
ocb.gatherNetworkMetrics(tt.doc, dimensions)
ocb.gatherNodeMetrics(tt.doc, dimensions)
ocb.gatherOperationMetrics(log, tt.operationType, tt.provisioningState, tt.backendErr, dimensions)
ocb.gatherCorrelationID(log, tt.doc, dimensions)
ocb.gatherMiscMetrics(log, tt.doc, dimensions)
ocb.gatherAuthMetrics(log, tt.doc, dimensions)
ocb.gatherNetworkMetrics(log, tt.doc, dimensions)
ocb.gatherNodeMetrics(log, tt.doc, dimensions)
emitter.EXPECT().EmitGauge(ocb.getMetricName(tt.operationType), metricValue, dimensions).MaxTimes(1)