diff --git a/pkg/cluster/install.go b/pkg/cluster/install.go index b71e0b155..0290cd191 100644 --- a/pkg/cluster/install.go +++ b/pkg/cluster/install.go @@ -31,7 +31,7 @@ import ( // AdminUpdate performs an admin update of an ARO cluster func (m *manager) AdminUpdate(ctx context.Context) error { toRun := m.adminUpdate() - return m.runSteps(ctx, toRun, false) + return m.runSteps(ctx, toRun, "adminUpdate") } func (m *manager) adminUpdate() []steps.Step { @@ -189,7 +189,7 @@ func (m *manager) Update(ctx context.Context) error { ) } - return m.runSteps(ctx, s, false) + return m.runSteps(ctx, s, "update") } func (m *manager) runIntegratedInstaller(ctx context.Context) error { @@ -337,21 +337,21 @@ func (m *manager) Install(ctx context.Context) error { return fmt.Errorf("unrecognised phase %s", m.doc.OpenShiftCluster.Properties.Install.Phase) } m.log.Printf("starting phase %s", m.doc.OpenShiftCluster.Properties.Install.Phase) - return m.runSteps(ctx, steps[m.doc.OpenShiftCluster.Properties.Install.Phase], true) + return m.runSteps(ctx, steps[m.doc.OpenShiftCluster.Properties.Install.Phase], "install") } -func (m *manager) runSteps(ctx context.Context, s []steps.Step, emitMetrics bool) error { +func (m *manager) runSteps(ctx context.Context, s []steps.Step, metricsTopic string) error { var err error - if emitMetrics { + if metricsTopic != "" { var stepsTimeRun map[string]int64 stepsTimeRun, err = steps.Run(ctx, m.log, 10*time.Second, s, m.now) if err == nil { var totalInstallTime int64 - for topic, duration := range stepsTimeRun { - m.metricsEmitter.EmitGauge(fmt.Sprintf("backend.openshiftcluster.installtime.%s", topic), duration, nil) + for stepName, duration := range stepsTimeRun { + m.metricsEmitter.EmitGauge(fmt.Sprintf("backend.openshiftcluster.%s.%s.duration.seconds", metricsTopic, stepName), duration, nil) totalInstallTime += duration } - m.metricsEmitter.EmitGauge("backend.openshiftcluster.installtime.total", totalInstallTime, nil) + m.metricsEmitter.EmitGauge(fmt.Sprintf("backend.openshiftcluster.%s.duration.total.seconds", metricsTopic), totalInstallTime, nil) } } else { _, err = steps.Run(ctx, m.log, 10*time.Second, s, nil) diff --git a/pkg/cluster/install_test.go b/pkg/cluster/install_test.go index 355bde3c5..00414f664 100644 --- a/pkg/cluster/install_test.go +++ b/pkg/cluster/install_test.go @@ -176,7 +176,7 @@ func TestStepRunnerWithInstaller(t *testing.T) { now: func() time.Time { return time.Now() }, } - err := m.runSteps(ctx, tt.steps, false) + err := m.runSteps(ctx, tt.steps, "") if err != nil && err.Error() != tt.wantErr || err == nil && tt.wantErr != "" { t.Error(err) @@ -239,28 +239,49 @@ func TestInstallationTimeMetrics(t *testing.T) { for _, tt := range []struct { name string + metricsTopic string + timePerStep int64 steps []steps.Step wantedMetrics map[string]int64 }{ { - name: "Failed step run will not generate any install time metrics", + name: "Failed step run will not generate any install time metrics", + metricsTopic: "install", steps: []steps.Step{ steps.Action(successfulActionStep), steps.Action(failingFunc), }, }, { - name: "Succeeded step run will generate a valid install time metrics", + name: "Succeeded step run for cluster installation will generate a valid install time metrics", + metricsTopic: "install", + timePerStep: 2, steps: []steps.Step{ steps.Action(successfulActionStep), steps.Condition(successfulConditionStep, 30*time.Minute, true), steps.AuthorizationRefreshingAction(nil, steps.Action(successfulActionStep)), }, wantedMetrics: map[string]int64{ - "backend.openshiftcluster.installtime.total": 6, - "backend.openshiftcluster.installtime.action.successfulActionStep": 2, - "backend.openshiftcluster.installtime.condition.successfulConditionStep": 2, - "backend.openshiftcluster.installtime.refreshing.successfulActionStep": 2, + "backend.openshiftcluster.install.duration.total.seconds": 6, + "backend.openshiftcluster.install.action.successfulActionStep.duration.seconds": 2, + "backend.openshiftcluster.install.condition.successfulConditionStep.duration.seconds": 2, + "backend.openshiftcluster.install.refreshing.successfulActionStep.duration.seconds": 2, + }, + }, + { + name: "Succeeded step run for cluster update will generate a valid install time metrics", + metricsTopic: "update", + timePerStep: 3, + steps: []steps.Step{ + steps.Action(successfulActionStep), + steps.Condition(successfulConditionStep, 30*time.Minute, true), + steps.AuthorizationRefreshingAction(nil, steps.Action(successfulActionStep)), + }, + wantedMetrics: map[string]int64{ + "backend.openshiftcluster.update.duration.total.seconds": 9, + "backend.openshiftcluster.update.action.successfulActionStep.duration.seconds": 3, + "backend.openshiftcluster.update.condition.successfulConditionStep.duration.seconds": 3, + "backend.openshiftcluster.update.refreshing.successfulActionStep.duration.seconds": 3, }, }, } { @@ -269,10 +290,10 @@ func TestInstallationTimeMetrics(t *testing.T) { m := &manager{ log: log, metricsEmitter: fm, - now: func() time.Time { return time.Now().Add(2 * time.Second) }, + now: func() time.Time { return time.Now().Add(time.Duration(tt.timePerStep) * time.Second) }, } - err := m.runSteps(ctx, tt.steps, true) + err := m.runSteps(ctx, tt.steps, tt.metricsTopic) if err != nil { if len(fm.Metrics) != 0 { t.Error("fake metrics obj should be empty when run steps failed") @@ -282,10 +303,10 @@ func TestInstallationTimeMetrics(t *testing.T) { for k, v := range tt.wantedMetrics { time, ok := fm.Metrics[k] if !ok { - t.Errorf("unexpected metrics topic: %s", k) + t.Errorf("unexpected metrics key: %s", k) } if time != v { - t.Errorf("incorrect fake metrics obj, want: %d, got: %d", v, time) + t.Errorf("incorrect fake metrics value, want: %d, got: %d", v, time) } } }